diff --git a/io/io_waituntil2.c b/io/io_waituntil2.c index 13a5157..5bef5af 100644 --- a/io/io_waituntil2.c +++ b/io/io_waituntil2.c @@ -261,27 +261,89 @@ int64 io_waituntil2(int64 milliseconds) { for (i=n-1; i>=0; --i) { io_entry* e=iarray_get(&io_fds,y[--n].ident); if (e) { + /* copies from epoll above */ + int curevents=0,newevents; + if (e->kernelwantread) curevents |= EVFILT_READ; + if (e->kernelwantwrite) curevents |= EVFILT_WRITE; + + newevents=0; + if (!e->canread || e->wantread) { + newevents |= EVFILT_READ; + e->kernelwantread=1; + } else + e->kernelwantread=0; + if (!e->canwrite || e->wantwrite) { + newevents |= EVFILT_WRITE; + e->kernelwantwrite=1; + } else + e->kernelwantwrite=0; + if (y[n].flags&EV_ERROR) { /* error; signal whatever app is looking for */ - if (e->wantread) y[n].filter=EVFILT_READ; else + if (e->wantread) y[n].filter=EVFILT_READ; if (e->wantwrite) y[n].filter=EVFILT_WRITE; } - if (!e->canread && (y[n].filter==EVFILT_READ)) { - e->canread=1; -#ifdef DEBUG - assert(e->next_read==-1); -#endif - e->next_read=first_readable; - first_readable=y[n].ident; + + /* if we think we can not read, but the kernel tells us that we + * can, put this fd in the relevant data structures */ + if (!e->canread && (y[n].filter&EVFILT_WRITE)) { + if (e->canread) { + newevents &= ~EVFILT_READ; + } else { + e->canread=1; + if (e->wantread) { + e->next_read=first_readable; + first_readable=y[n].ident; + } + } } - if (y[n].filter==EVFILT_WRITE) { - e->canwrite=1; -#ifdef DEBUG - assert(e->next_write==-1); -#endif - e->next_write=first_writeable; - first_writeable=y[i].ident; + + /* if the kernel says the fd is writable, ... */ + if (y[i].filter&EVFILT_WRITE) { + /* Usually, if the kernel says a descriptor is writable, we + * note it and do not tell the kernel not to tell us again. + * The idea is that once we notify the caller that the fd is + * writable, and the caller handles the event, the caller will + * just ask to be notified of future write events again. We + * are trying to save the superfluous epoll_ctl syscalls. + * If e->canwrite is set, then this gamble did not work out. + * We told the caller, yet after the caller is done we still + * got another write event. Clearly the user is implementing + * some kind of throttling and we can tell the kernel to leave + * us alone for now. */ + if (e->canwrite) { + newevents &= ~EVFILT_WRITE; + e->kernelwantwrite=0; + } else { + /* If !e->wantwrite: The laziness optimization in + * io_dontwantwrite hit. We did not tell the kernel that we + * are no longer interested in writing to save the syscall. + * Now we know we could write if we wanted; remember that + * and then go on. */ + e->canwrite=1; + if (e->wantwrite) { + e->next_write=first_writeable; + first_writeable=y[n].ident; + } + } } + + if (newevents != curevents) { + struct kevent kev; + struct timespec ts; + ts.tv_sec=0; ts.tv_nsec=0; + if (curevents &~ newevents) { + EV_SET(&kev, n, curevents &~ newevents, EV_DELETE, 0, 0, 0); + kevent(io_master,&kev,1,0,0,&ts); + } + if (newevents &~ curevents) { + EV_SET(&kev, n, newevents, EV_ADD|EV_ENABLE, 0, 0, 0); + kevent(io_master,&kev,1,0,0,&ts); + } + if (!newevents) + --io_wanted_fds; + } + #ifdef DEBUG } else { fprintf(stderr,"got kevent on fd#%d, which is not in array!\n",y[n].ident);