add asm versions of imult64 and umult64 for x86_64

(22 cycles -> 12 cycles on my Athlon 64)
master
leitner 19 years ago
parent 28ac95863a
commit 66b342099c

@ -18,6 +18,8 @@
unless you #define _NETBSD_SOURCE, and then their CMSG_* macros use unless you #define _NETBSD_SOURCE, and then their CMSG_* macros use
it. This is horrible even by OpenBSD standards) it. This is horrible even by OpenBSD standards)
remove support for ip6.int (it's no longer delegated) remove support for ip6.int (it's no longer delegated)
add asm versions of imult64 and umult64 for x86_64
(22 cycles -> 12 cycles on my Athlon 64)
0.24: 0.24:
fix scan_to_sa (Tim Lorenz) fix scan_to_sa (Tim Lorenz)

@ -1,3 +1,24 @@
#ifdef __x86_64__
void imult64() {
asm volatile(
"xchgq %rdx,%rsi\n"
"movq %rdi,%rax\n"
"imulq %rdx\n"
"jc 1f\n" /* overflow */
"movq %rax,(%rsi)\n"
"xorq %rax,%rax\n"
"inc %rax\n"
"ret\n"
"1:\n"
"xorq %rax,%rax\n"
/* the closing ret is renerated by gcc */
);
}
#else
#include "safemult.h" #include "safemult.h"
int imult64(int64 a,int64 b,int64* c) { int imult64(int64 a,int64 b,int64* c) {
@ -11,3 +32,4 @@ int imult64(int64 a,int64 b,int64* c) {
return 1; return 1;
} }
#endif

@ -1,3 +1,23 @@
#ifdef __x86_64__
void umult64() {
asm volatile(
"xchgq %rdx,%rsi\n"
"movq %rdi,%rax\n"
"mulq %rdx\n"
"jc 1f\n" /* overflow */
"movq %rax,(%rsi)\n"
"xorq %rax,%rax\n"
"inc %rax\n"
"ret\n"
"1:\n"
"xorq %rax,%rax\n"
/* the closing ret is renerated by gcc */
);
}
#else
#include "safemult.h" #include "safemult.h"
/* return 1 for overflow, 0 for ok */ /* return 1 for overflow, 0 for ok */
@ -20,3 +40,4 @@ int umult64(uint64 a,uint64 b,uint64* c) {
return 1; return 1;
} }
#endif

Loading…
Cancel
Save