diff --git a/CHANGES b/CHANGES index 69d6408..eecd219 100644 --- a/CHANGES +++ b/CHANGES @@ -18,6 +18,8 @@ unless you #define _NETBSD_SOURCE, and then their CMSG_* macros use it. This is horrible even by OpenBSD standards) remove support for ip6.int (it's no longer delegated) + add asm versions of imult64 and umult64 for x86_64 + (22 cycles -> 12 cycles on my Athlon 64) 0.24: fix scan_to_sa (Tim Lorenz) diff --git a/mult/imult64.c b/mult/imult64.c index be59d66..961d226 100644 --- a/mult/imult64.c +++ b/mult/imult64.c @@ -1,3 +1,24 @@ +#ifdef __x86_64__ + +void imult64() { + asm volatile( + "xchgq %rdx,%rsi\n" + "movq %rdi,%rax\n" + "imulq %rdx\n" + "jc 1f\n" /* overflow */ + "movq %rax,(%rsi)\n" + "xorq %rax,%rax\n" + "inc %rax\n" + "ret\n" + "1:\n" + "xorq %rax,%rax\n" + /* the closing ret is renerated by gcc */ + ); +} + + +#else + #include "safemult.h" int imult64(int64 a,int64 b,int64* c) { @@ -11,3 +32,4 @@ int imult64(int64 a,int64 b,int64* c) { return 1; } +#endif diff --git a/mult/umult64.c b/mult/umult64.c index 6d5e37a..5b644dd 100644 --- a/mult/umult64.c +++ b/mult/umult64.c @@ -1,3 +1,23 @@ +#ifdef __x86_64__ + +void umult64() { + asm volatile( + "xchgq %rdx,%rsi\n" + "movq %rdi,%rax\n" + "mulq %rdx\n" + "jc 1f\n" /* overflow */ + "movq %rax,(%rsi)\n" + "xorq %rax,%rax\n" + "inc %rax\n" + "ret\n" + "1:\n" + "xorq %rax,%rax\n" + /* the closing ret is renerated by gcc */ + ); +} + +#else + #include "safemult.h" /* return 1 for overflow, 0 for ok */ @@ -20,3 +40,4 @@ int umult64(uint64 a,uint64 b,uint64* c) { return 1; } +#endif