mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-28 05:06:18 +08:00
sm3: fix arm ni issue
This commit is contained in:
parent
d2382247b1
commit
d38216752d
@ -88,9 +88,9 @@ func roundB(buf *bytes.Buffer, i, t, st1, st2, w, wt byte) {
|
||||
// st1, st2, sm3 state
|
||||
func qroundA(buf *bytes.Buffer, t, st1, st2, s0, s1, s2, s3, s4 byte) {
|
||||
fmt.Fprintf(buf, "\t// Extension\n")
|
||||
fmt.Fprintf(buf, "\tVEXT $3, V%d.B16, V%d.B16, V%d.B16\n", s2, s1, s4)
|
||||
fmt.Fprintf(buf, "\tVEXT $3, V%d.B16, V%d.B16, V%d.B16\n", s1, s0, 6)
|
||||
fmt.Fprintf(buf, "\tVEXT $2, V%d.B16, V%d.B16, V%d.B16\n", s3, s2, 7)
|
||||
fmt.Fprintf(buf, "\tVEXT $12, V%d.B16, V%d.B16, V%d.B16\n", s2, s1, s4)
|
||||
fmt.Fprintf(buf, "\tVEXT $12, V%d.B16, V%d.B16, V%d.B16\n", s1, s0, 6)
|
||||
fmt.Fprintf(buf, "\tVEXT $8, V%d.B16, V%d.B16, V%d.B16\n", s3, s2, 7)
|
||||
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3PARTW1 V%d.4S, V%d.4S, V%d.4S\n", sm3partw1(s4, s0, s3), s4, s0, s3)
|
||||
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3PARTW2 V%d.4S, V%d.4S, V%d.4S\n", sm3partw2(s4, 7, 6), s4, 7, 6)
|
||||
fmt.Fprintf(buf, "\tVEOR V%d.B16, V%d.B16, V10.B16\n", s1, s0)
|
||||
@ -106,9 +106,9 @@ func qroundA(buf *bytes.Buffer, t, st1, st2, s0, s1, s2, s3, s4 byte) {
|
||||
func qroundB(buf *bytes.Buffer, t, st1, st2, s0, s1, s2, s3, s4 byte) {
|
||||
if s4 != 0xff {
|
||||
fmt.Fprintf(buf, "\t// Extension\n")
|
||||
fmt.Fprintf(buf, "\tVEXT $3, V%d.B16, V%d.B16, V%d.B16\n", s2, s1, s4)
|
||||
fmt.Fprintf(buf, "\tVEXT $3, V%d.B16, V%d.B16, V%d.B16\n", s1, s0, 6)
|
||||
fmt.Fprintf(buf, "\tVEXT $2, V%d.B16, V%d.B16, V%d.B16\n", s3, s2, 7)
|
||||
fmt.Fprintf(buf, "\tVEXT $12, V%d.B16, V%d.B16, V%d.B16\n", s2, s1, s4)
|
||||
fmt.Fprintf(buf, "\tVEXT $12, V%d.B16, V%d.B16, V%d.B16\n", s1, s0, 6)
|
||||
fmt.Fprintf(buf, "\tVEXT $8, V%d.B16, V%d.B16, V%d.B16\n", s3, s2, 7)
|
||||
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3PARTW1 V%d.4S, V%d.4S, V%d.4S\n", sm3partw1(s4, s0, s3), s4, s0, s3)
|
||||
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3PARTW2 V%d.4S, V%d.4S, V%d.4S\n", sm3partw2(s4, 7, 6), s4, 7, 6)
|
||||
}
|
||||
@ -183,7 +183,7 @@ sm3ret:
|
||||
|
||||
`[1:])
|
||||
src := buf.Bytes()
|
||||
fmt.Println(string(src))
|
||||
// fmt.Println(string(src))
|
||||
|
||||
err := os.WriteFile("sm3blockni_arm64.s", src, 0644)
|
||||
if err != nil {
|
||||
|
@ -29,9 +29,9 @@ blockloop:
|
||||
// first 16 rounds
|
||||
VMOV R5, V11.S[3]
|
||||
// Extension
|
||||
VEXT $3, V2.B16, V1.B16, V4.B16
|
||||
VEXT $3, V1.B16, V0.B16, V6.B16
|
||||
VEXT $2, V3.B16, V2.B16, V7.B16
|
||||
VEXT $12, V2.B16, V1.B16, V4.B16
|
||||
VEXT $12, V1.B16, V0.B16, V6.B16
|
||||
VEXT $8, V3.B16, V2.B16, V7.B16
|
||||
WORD $0xce63c004 //SM3PARTW1 V4.4S, V0.4S, V3.4S
|
||||
WORD $0xce66c4e4 //SM3PARTW2 V4.4S, V7.4S, V6.4S
|
||||
VEOR V1.B16, V0.B16, V10.B16
|
||||
@ -79,9 +79,9 @@ blockloop:
|
||||
WORD $0xce41b8a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 3
|
||||
|
||||
// Extension
|
||||
VEXT $3, V4.B16, V3.B16, V1.B16
|
||||
VEXT $3, V3.B16, V2.B16, V6.B16
|
||||
VEXT $2, V0.B16, V4.B16, V7.B16
|
||||
VEXT $12, V4.B16, V3.B16, V1.B16
|
||||
VEXT $12, V3.B16, V2.B16, V6.B16
|
||||
VEXT $8, V0.B16, V4.B16, V7.B16
|
||||
WORD $0xce60c041 //SM3PARTW1 V1.4S, V2.4S, V0.4S
|
||||
WORD $0xce66c4e1 //SM3PARTW2 V1.4S, V7.4S, V6.4S
|
||||
VEOR V3.B16, V2.B16, V10.B16
|
||||
@ -104,9 +104,9 @@ blockloop:
|
||||
WORD $0xce42b8a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 3
|
||||
|
||||
// Extension
|
||||
VEXT $3, V0.B16, V4.B16, V2.B16
|
||||
VEXT $3, V4.B16, V3.B16, V6.B16
|
||||
VEXT $2, V1.B16, V0.B16, V7.B16
|
||||
VEXT $12, V0.B16, V4.B16, V2.B16
|
||||
VEXT $12, V4.B16, V3.B16, V6.B16
|
||||
VEXT $8, V1.B16, V0.B16, V7.B16
|
||||
WORD $0xce61c062 //SM3PARTW1 V2.4S, V3.4S, V1.4S
|
||||
WORD $0xce66c4e2 //SM3PARTW2 V2.4S, V7.4S, V6.4S
|
||||
VEOR V4.B16, V3.B16, V10.B16
|
||||
@ -131,9 +131,9 @@ blockloop:
|
||||
// second 48 rounds
|
||||
VMOV R6, V11.S[3]
|
||||
// Extension
|
||||
VEXT $3, V1.B16, V0.B16, V3.B16
|
||||
VEXT $3, V0.B16, V4.B16, V6.B16
|
||||
VEXT $2, V2.B16, V1.B16, V7.B16
|
||||
VEXT $12, V1.B16, V0.B16, V3.B16
|
||||
VEXT $12, V0.B16, V4.B16, V6.B16
|
||||
VEXT $8, V2.B16, V1.B16, V7.B16
|
||||
WORD $0xce62c083 //SM3PARTW1 V3.4S, V4.4S, V2.4S
|
||||
WORD $0xce66c4e3 //SM3PARTW2 V3.4S, V7.4S, V6.4S
|
||||
VEOR V0.B16, V4.B16, V10.B16
|
||||
@ -156,9 +156,9 @@ blockloop:
|
||||
WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3
|
||||
|
||||
// Extension
|
||||
VEXT $3, V2.B16, V1.B16, V4.B16
|
||||
VEXT $3, V1.B16, V0.B16, V6.B16
|
||||
VEXT $2, V3.B16, V2.B16, V7.B16
|
||||
VEXT $12, V2.B16, V1.B16, V4.B16
|
||||
VEXT $12, V1.B16, V0.B16, V6.B16
|
||||
VEXT $8, V3.B16, V2.B16, V7.B16
|
||||
WORD $0xce63c004 //SM3PARTW1 V4.4S, V0.4S, V3.4S
|
||||
WORD $0xce66c4e4 //SM3PARTW2 V4.4S, V7.4S, V6.4S
|
||||
VEOR V1.B16, V0.B16, V10.B16
|
||||
@ -181,9 +181,9 @@ blockloop:
|
||||
WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3
|
||||
|
||||
// Extension
|
||||
VEXT $3, V3.B16, V2.B16, V0.B16
|
||||
VEXT $3, V2.B16, V1.B16, V6.B16
|
||||
VEXT $2, V4.B16, V3.B16, V7.B16
|
||||
VEXT $12, V3.B16, V2.B16, V0.B16
|
||||
VEXT $12, V2.B16, V1.B16, V6.B16
|
||||
VEXT $8, V4.B16, V3.B16, V7.B16
|
||||
WORD $0xce64c020 //SM3PARTW1 V0.4S, V1.4S, V4.4S
|
||||
WORD $0xce66c4e0 //SM3PARTW2 V0.4S, V7.4S, V6.4S
|
||||
VEOR V2.B16, V1.B16, V10.B16
|
||||
@ -206,9 +206,9 @@ blockloop:
|
||||
WORD $0xce41bca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 3
|
||||
|
||||
// Extension
|
||||
VEXT $3, V4.B16, V3.B16, V1.B16
|
||||
VEXT $3, V3.B16, V2.B16, V6.B16
|
||||
VEXT $2, V0.B16, V4.B16, V7.B16
|
||||
VEXT $12, V4.B16, V3.B16, V1.B16
|
||||
VEXT $12, V3.B16, V2.B16, V6.B16
|
||||
VEXT $8, V0.B16, V4.B16, V7.B16
|
||||
WORD $0xce60c041 //SM3PARTW1 V1.4S, V2.4S, V0.4S
|
||||
WORD $0xce66c4e1 //SM3PARTW2 V1.4S, V7.4S, V6.4S
|
||||
VEOR V3.B16, V2.B16, V10.B16
|
||||
@ -231,9 +231,9 @@ blockloop:
|
||||
WORD $0xce42bca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 3
|
||||
|
||||
// Extension
|
||||
VEXT $3, V0.B16, V4.B16, V2.B16
|
||||
VEXT $3, V4.B16, V3.B16, V6.B16
|
||||
VEXT $2, V1.B16, V0.B16, V7.B16
|
||||
VEXT $12, V0.B16, V4.B16, V2.B16
|
||||
VEXT $12, V4.B16, V3.B16, V6.B16
|
||||
VEXT $8, V1.B16, V0.B16, V7.B16
|
||||
WORD $0xce61c062 //SM3PARTW1 V2.4S, V3.4S, V1.4S
|
||||
WORD $0xce66c4e2 //SM3PARTW2 V2.4S, V7.4S, V6.4S
|
||||
VEOR V4.B16, V3.B16, V10.B16
|
||||
@ -256,9 +256,9 @@ blockloop:
|
||||
WORD $0xce43bca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 3
|
||||
|
||||
// Extension
|
||||
VEXT $3, V1.B16, V0.B16, V3.B16
|
||||
VEXT $3, V0.B16, V4.B16, V6.B16
|
||||
VEXT $2, V2.B16, V1.B16, V7.B16
|
||||
VEXT $12, V1.B16, V0.B16, V3.B16
|
||||
VEXT $12, V0.B16, V4.B16, V6.B16
|
||||
VEXT $8, V2.B16, V1.B16, V7.B16
|
||||
WORD $0xce62c083 //SM3PARTW1 V3.4S, V4.4S, V2.4S
|
||||
WORD $0xce66c4e3 //SM3PARTW2 V3.4S, V7.4S, V6.4S
|
||||
VEOR V0.B16, V4.B16, V10.B16
|
||||
@ -281,9 +281,9 @@ blockloop:
|
||||
WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3
|
||||
|
||||
// Extension
|
||||
VEXT $3, V2.B16, V1.B16, V4.B16
|
||||
VEXT $3, V1.B16, V0.B16, V6.B16
|
||||
VEXT $2, V3.B16, V2.B16, V7.B16
|
||||
VEXT $12, V2.B16, V1.B16, V4.B16
|
||||
VEXT $12, V1.B16, V0.B16, V6.B16
|
||||
VEXT $8, V3.B16, V2.B16, V7.B16
|
||||
WORD $0xce63c004 //SM3PARTW1 V4.4S, V0.4S, V3.4S
|
||||
WORD $0xce66c4e4 //SM3PARTW2 V4.4S, V7.4S, V6.4S
|
||||
VEOR V1.B16, V0.B16, V10.B16
|
||||
@ -306,9 +306,9 @@ blockloop:
|
||||
WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3
|
||||
|
||||
// Extension
|
||||
VEXT $3, V3.B16, V2.B16, V0.B16
|
||||
VEXT $3, V2.B16, V1.B16, V6.B16
|
||||
VEXT $2, V4.B16, V3.B16, V7.B16
|
||||
VEXT $12, V3.B16, V2.B16, V0.B16
|
||||
VEXT $12, V2.B16, V1.B16, V6.B16
|
||||
VEXT $8, V4.B16, V3.B16, V7.B16
|
||||
WORD $0xce64c020 //SM3PARTW1 V0.4S, V1.4S, V4.4S
|
||||
WORD $0xce66c4e0 //SM3PARTW2 V0.4S, V7.4S, V6.4S
|
||||
VEOR V2.B16, V1.B16, V10.B16
|
||||
@ -331,9 +331,9 @@ blockloop:
|
||||
WORD $0xce41bca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 3
|
||||
|
||||
// Extension
|
||||
VEXT $3, V4.B16, V3.B16, V1.B16
|
||||
VEXT $3, V3.B16, V2.B16, V6.B16
|
||||
VEXT $2, V0.B16, V4.B16, V7.B16
|
||||
VEXT $12, V4.B16, V3.B16, V1.B16
|
||||
VEXT $12, V3.B16, V2.B16, V6.B16
|
||||
VEXT $8, V0.B16, V4.B16, V7.B16
|
||||
WORD $0xce60c041 //SM3PARTW1 V1.4S, V2.4S, V0.4S
|
||||
WORD $0xce66c4e1 //SM3PARTW2 V1.4S, V7.4S, V6.4S
|
||||
VEOR V3.B16, V2.B16, V10.B16
|
||||
|
Loading…
x
Reference in New Issue
Block a user