diff --git a/internal/subtle/xor_s390x.s b/internal/subtle/xor_s390x.s index b284f04..4d97b23 100644 --- a/internal/subtle/xor_s390x.s +++ b/internal/subtle/xor_s390x.s @@ -13,7 +13,7 @@ TEXT ·xorBytes(SB),NOSPLIT,$0-32 MOVD n+24(FP), R4 MOVD $0, R5 - CMPBLT R4, $16, less_than16 + CMPBLT R4, $16, tail loop16b: VL 0(R2)(R5*1), V0 @@ -24,8 +24,8 @@ loop16b: SUB $16, R4 CMPBGE R4, $16, loop16b -less_than16: - CMPBLT R4, $8, tail +tail: + CMPBLT R4, $8, less_than8 MOVD 0(R2)(R5*1), R7 MOVD 0(R3)(R5*1), R8 XOR R7, R8 @@ -33,15 +33,30 @@ less_than16: LAY 8(R5), R5 SUB $8, R4 -tail: +less_than8: + CMPBLT R4, $4, less_than4 + MOVWZ 0(R2)(R5*1), R7 + MOVWZ 0(R3)(R5*1), R8 + XOR R7, R8 + MOVW R8, 0(R1)(R5*1) + LAY 4(R5), R5 + SUB $4, R4 + +less_than4: + CMPBLT R4, $2, less_than2 + MOVHZ 0(R2)(R5*1), R7 + MOVHZ 0(R3)(R5*1), R8 + XOR R7, R8 + MOVH R8, 0(R1)(R5*1) + LAY 2(R5), R5 + SUB $2, R4 + +less_than2: CMPBEQ R4, $0, done MOVB 0(R2)(R5*1), R7 MOVB 0(R3)(R5*1), R8 XOR R7, R8 MOVB R8, 0(R1)(R5*1) - LAY 1(R5), R5 - SUB $1, R4 - BR tail done: RET