Skip to content

Commit cb7509a

Browse files
committed
mask_amd64.s: Remove AVX2 fully
1 parent 685a56e commit cb7509a

File tree

3 files changed

+2
-30
lines changed

3 files changed

+2
-30
lines changed

mask_amd64.s

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,6 @@ TEXT ·maskAsm(SB), NOSPLIT, $0-28
2626
TESTQ $31, AX
2727
JNZ unaligned
2828

29-
aligned:
30-
CMPB ·useAVX2(SB), $1
31-
JE avx2
32-
JMP sse
33-
3429
unaligned_loop_1byte:
3530
XORB SI, (AX)
3631
INCQ AX
@@ -47,7 +42,7 @@ unaligned_loop_1byte:
4742
ORQ DX, DI
4843

4944
TESTQ $31, AX
50-
JZ aligned
45+
JZ sse
5146

5247
unaligned:
5348
TESTQ $7, AX // AND $7 & len, if not zero jump to loop_1b.
@@ -60,27 +55,7 @@ unaligned_loop:
6055
SUBQ $8, CX
6156
TESTQ $31, AX
6257
JNZ unaligned_loop
63-
JMP aligned
64-
65-
avx2:
66-
CMPQ CX, $0x80
67-
JL sse
68-
VMOVQ DI, X0
69-
VPBROADCASTQ X0, Y0
70-
71-
avx2_loop:
72-
VPXOR (AX), Y0, Y1
73-
VPXOR 32(AX), Y0, Y2
74-
VPXOR 64(AX), Y0, Y3
75-
VPXOR 96(AX), Y0, Y4
76-
VMOVDQU Y1, (AX)
77-
VMOVDQU Y2, 32(AX)
78-
VMOVDQU Y3, 64(AX)
79-
VMOVDQU Y4, 96(AX)
80-
ADDQ $0x80, AX
81-
SUBQ $0x80, CX
82-
CMPQ CX, $0x80
83-
JAE avx2_loop // loop if CX >= 0x80
58+
JMP sse
8459

8560
sse:
8661
CMPQ CX, $0x40

mask_arm64.s

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ TEXT ·maskAsm(SB), NOSPLIT, $0-28
1515
CMP $64, R1
1616
BLT less_than_64
1717

18-
// todo: optimize unaligned case
1918
loop_64:
2019
VLD1 (R0), [V1.B16, V2.B16, V3.B16, V4.B16]
2120
VEOR V1.B16, V0.B16, V1.B16

mask_asm.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,5 @@ func mask(key uint32, b []byte) uint32 {
99
return key
1010
}
1111

12-
var useAVX2 = false
13-
1412
//go:noescape
1513
func maskAsm(b *byte, len int, key uint32) uint32

0 commit comments

Comments
 (0)