diff --git a/CHANGES b/CHANGES index ab25abe..089679b 100644 --- a/CHANGES +++ b/CHANGES @@ -2,6 +2,7 @@ remove OpenBSD #warning (obsd maintainer says no longer needed) move headers to upon install fix fmt_ip6 (Erwin Hoffmann) + add MSG_ZEROCOPY support (only used for buffers >8k) 0.31: special case buffer_get_token with token length 1 through memccpy (almost 4x speedup) diff --git a/io/iob_send.c b/io/iob_send.c index 48a6d00..447ca53 100644 --- a/io/iob_send.c +++ b/io/iob_send.c @@ -119,10 +119,12 @@ int64 iob_send(int64 s,io_batch* b) { #include #include #include "havealloca.h" +#include "io_internal.h" #include "iob_internal.h" int64 iob_send(int64 s,io_batch* b) { iob_entry* e,* last; + io_entry* E; struct iovec* v; uint64 total; int64 sent; @@ -137,8 +139,13 @@ int64 iob_send(int64 s,io_batch* b) { #ifdef TCP_CORK int corked=0; #endif +#ifdef MSG_ZEROCOPY + size_t sum=0; +#endif if (b->bytesleft==0) return 0; + E=iarray_get(&io_fds,s); + if (!E) { errno=EBADF; return -3; } last=(iob_entry*)(((char*)array_start(&b->b))+array_bytes(&b->b)); v=alloca(b->bufs*sizeof(struct iovec)); total=0; @@ -154,6 +161,9 @@ int64 iob_send(int64 s,io_batch* b) { if (e[i].type==FROMFILE) break; v[i].iov_base=(char*)(e[i].buf+e[i].offset); v[i].iov_len=e[i].n; +#ifdef MSG_ZEROCOPY + if (sum + v[i].iov_len > sum) sum += v[i].iov_len; else sum=-1; +#endif } headers=i; #ifdef HAVE_BSDSENDFILE @@ -210,20 +220,39 @@ eagain: corked=1; } if (headers) { - if (docork<0) { /* write+writev */ + int ZEROCOPY=0; +#ifdef MSG_ZEROCOPY + static int nozerocopy; + int dozerocopy=1; +#endif + if (nozerocopy && dozerocopy==0 && docork<0) { /* write+writev */ if (headers==1) /* cosmetics for strace */ sent=write(s,v[0].iov_base,v[0].iov_len); else sent=writev(s,v,headers); } else { +#ifdef MSG_ZEROCOPY + if (!nozerocopy && sum>=8*1024) { + /* MSG_ZEROCOPY has page table management overhead, + * it only pays off after 8k or so */ + if (E->zerocopy==0) { + if (setsockopt(s, SOL_SOCKET, SO_ZEROCOPY, (int[]){ 1 },sizeof(int)) == 0) { + E->zerocopy=1; + ZEROCOPY=MSG_ZEROCOPY; + } else + nozerocopy=1; + } else + ZEROCOPY=MSG_ZEROCOPY; + } +#endif if (headers==1) /* cosmetics for strace */ - sent=sendto(s,v[0].iov_base,v[0].iov_len,MSG_MORE, NULL, 0); + sent=sendto(s, v[0].iov_base, v[0].iov_len, MSG_MORE|ZEROCOPY, NULL, 0); else { struct msghdr msg; memset(&msg,0,sizeof(msg)); msg.msg_iov=v; msg.msg_iovlen=headers; - sent=sendmsg(s,&msg,MSG_MORE); + sent=sendmsg(s,&msg,MSG_MORE|ZEROCOPY); } } if (sent==-1) { diff --git a/io_internal.h b/io_internal.h index 90def97..2d7c1bf 100644 --- a/io_internal.h +++ b/io_internal.h @@ -47,6 +47,7 @@ typedef struct { unsigned int kernelwantwrite:1; unsigned int epolladded:1; unsigned int closed:1; /* io_close called, but close deferred because of outstanding events */ + unsigned int zerocopy:1; /* linux: setsockopt SO_ZEROCOPY done */ #ifdef __MINGW32__ unsigned int readqueued:2; unsigned int writequeued:2;