diff --git a/CMakeLists.txt b/CMakeLists.txt index 86e29b79..6275bc98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -142,6 +142,7 @@ if(BUILD_TESTING) tests/ipaddr.c tests/ipc.c tests/overload.c + tests/pollset.c tests/prefix.c tests/rbtree.c tests/signals.c diff --git a/Makefile.am b/Makefile.am index 3ab8c27f..0eb46a60 100644 --- a/Makefile.am +++ b/Makefile.am @@ -137,6 +137,7 @@ check_PROGRAMS += \ tests/iol \ tests/tcp \ tests/ipc \ + tests/pollset \ tests/prefix \ tests/socks5 \ tests/suffix \ diff --git a/epoll.c.inc b/epoll.c.inc index c925125e..389f9c61 100644 --- a/epoll.c.inc +++ b/epoll.c.inc @@ -177,8 +177,13 @@ int dill_pollset_clean(int fd) { struct dill_ctx_pollset *ctx = &dill_getctx->pollset; struct dill_fdinfo *fdi = &ctx->fdinfos[fd]; if(!fdi->cached) return 0; - /* We cannot clean an fd that someone is waiting for. */ - if(dill_slow(fdi->in || fdi->out)) {errno = EBUSY; return -1;} + /* Cancel any coroutines still waiting on this fd. dill_trigger calls + dill_docancel which cancels ALL clauses on the coroutine — not just + the fd clause but also timers, channel waits, etc. The fd cancel + callbacks (dill_fdcancelin/out) run synchronously during this call + and NULL the in/out pointers before we proceed to cleanup. */ + if(fdi->in) dill_trigger(&fdi->in->cl, ECANCELED); + if(fdi->out) dill_trigger(&fdi->out->cl, ECANCELED); /* Remove the file descriptor from the pollset if it is still there. */ if(fdi->currevs) { struct epoll_event ev; diff --git a/fd.c b/fd.c index 92e93887..7704fff3 100644 --- a/fd.c +++ b/fd.c @@ -94,17 +94,17 @@ int dill_fd_unblock(int s) { if (opt == -1) opt = 0; int rc = fcntl(s, F_SETFL, opt | O_NONBLOCK); - dill_assert(rc == 0); + if(dill_slow(rc != 0)) return -1; /* Allow re-using the same local address rapidly. */ opt = 1; rc = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof (opt)); - dill_assert(rc == 0); + if(dill_slow(rc != 0)) return -1; /* If possible, prevent SIGPIPE signal when writing to the connection already closed by the peer. */ #ifdef SO_NOSIGPIPE opt = 1; rc = setsockopt (s, SOL_SOCKET, SO_NOSIGPIPE, &opt, sizeof (opt)); - dill_assert (rc == 0 || errno == EINVAL); + if(dill_slow(rc != 0 && errno != EINVAL)) return -1; #endif return 0; } @@ -385,7 +385,7 @@ int dill_fd_recv(int s, struct dill_fd_rxbuf *rxbuf, struct dill_iolist *first, void dill_fd_close(int s) { int rc = dill_fdclean(s); - dill_assert(rc == 0); + if(dill_slow(rc != 0)) { close(s); return; } /* Discard any pending outbound data. If SO_LINGER option cannot be set, never mind and continue anyway. */ struct linger lng; diff --git a/kqueue.c.inc b/kqueue.c.inc index 92a0674f..f9621ef1 100644 --- a/kqueue.c.inc +++ b/kqueue.c.inc @@ -174,8 +174,13 @@ int dill_pollset_clean(int fd) { struct dill_ctx_pollset *ctx = &dill_getctx->pollset; struct dill_fdinfo *fdi = &ctx->fdinfos[fd]; if(!fdi->cached) return 0; - /* We cannot clean an fd that someone is waiting for. */ - if(dill_slow(fdi->in || fdi->out)) {errno = EBUSY; return -1;} + /* Cancel any coroutines still waiting on this fd. dill_trigger calls + dill_docancel which cancels ALL clauses on the coroutine — not just + the fd clause but also timers, channel waits, etc. The fd cancel + callbacks (dill_fdcancelin/out) run synchronously during this call + and NULL the in/out pointers before we proceed to cleanup. */ + if(fdi->in) dill_trigger(&fdi->in->cl, ECANCELED); + if(fdi->out) dill_trigger(&fdi->out->cl, ECANCELED); /* Remove the file descriptor from the pollset if it is still there. */ int nevs = 0; struct kevent evs[2]; diff --git a/libdill.c b/libdill.c index 37605350..dec22eae 100644 --- a/libdill.c +++ b/libdill.c @@ -56,6 +56,7 @@ int dill_fdin(int fd, int64_t deadline) { int id = dill_wait(); if(dill_slow(id < 0)) return -1; if(dill_slow(id == 2)) {errno = ETIMEDOUT; return -1;} + if(dill_slow(errno != 0)) return -1; return 0; } @@ -74,6 +75,7 @@ int dill_fdout(int fd, int64_t deadline) { int id = dill_wait(); if(dill_slow(id < 0)) return -1; if(dill_slow(id == 2)) {errno = ETIMEDOUT; return -1;} + if(dill_slow(errno != 0)) return -1; return 0; } diff --git a/poll.c.inc b/poll.c.inc index a5757994..a51e413b 100644 --- a/poll.c.inc +++ b/poll.c.inc @@ -168,7 +168,13 @@ int dill_pollset_clean(int fd) { struct dill_ctx_pollset *ctx = &dill_getctx->pollset; struct dill_fdinfo *fdi = &ctx->fdinfos[fd]; if(!fdi->cached) return 0; - if(dill_slow(fdi->in || fdi->out)) {errno = EBUSY; return -1;} + /* Cancel any coroutines still waiting on this fd. dill_trigger calls + dill_docancel which cancels ALL clauses on the coroutine — not just + the fd clause but also timers, channel waits, etc. The fd cancel + callbacks (dill_fdcancelin/out) run synchronously during this call + and NULL the in/out pointers before we proceed to cleanup. */ + if(fdi->in) dill_trigger(&fdi->in->cl, ECANCELED); + if(fdi->out) dill_trigger(&fdi->out->cl, ECANCELED); /* If the fd happens to still be in the pollset remove it. */ if(fdi->idx >= 0) { --ctx->pollset_size; diff --git a/tests/pollset.c b/tests/pollset.c new file mode 100644 index 00000000..58e06c50 --- /dev/null +++ b/tests/pollset.c @@ -0,0 +1,132 @@ +/* + + Copyright (c) 2017 Martin Sustrik + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom + the Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + IN THE SOFTWARE. + +*/ + +#include + +#include +#include +#include +#include +#include + +#include "assert.h" +#include "../libdill.h" + +/* Coroutine that blocks in tcp_accept and reports the errno back + through a channel when the accept completes (or fails). */ +coroutine void blocked_accepter(int ls, int ch) { + int as = tcp_accept(ls, NULL, -1); + int err = errno; + if(as >= 0) hclose(as); + int rc = chsend(ch, &err, sizeof(err), -1); + errno_assert(rc == 0); +} + +/* Coroutine that connects and sends data, then waits to be cancelled. */ +coroutine void connector(int port) { + struct ipaddr addr; + int rc = ipaddr_remote(&addr, "127.0.0.1", port, 0, -1); + errno_assert(rc == 0); + int cs = tcp_connect(&addr, now() + 1000); + errno_assert(cs >= 0); + rc = bsend(cs, "XYZ", 3, -1); + errno_assert(rc == 0); + rc = msleep(-1); + errno_assert(rc == -1 && errno == ECANCELED); + rc = hclose(cs); + errno_assert(rc == 0); +} + +int main(void) { + int rc; + + /* Phase 1: Close a listener while a coroutine is blocked in tcp_accept. + Before the fix, dill_pollset_clean returned EBUSY, leaving the fd's + pollset cache entry with cached=1. The accept coroutine would hang + or hit an assert. */ + struct ipaddr addr; + rc = ipaddr_local(&addr, NULL, 0, 0); + errno_assert(rc == 0); + int ls = tcp_listen(&addr, 10); + errno_assert(ls >= 0); + + int ch[2]; + rc = chmake(ch); + errno_assert(rc == 0); + int cr = go(blocked_accepter(ls, ch[0])); + errno_assert(cr >= 0); + + /* Let the coroutine block in tcp_accept. */ + rc = msleep(now() + 50); + errno_assert(rc == 0); + + /* Close the listener — triggers dill_pollset_clean which should + cancel the blocked accept coroutine instead of returning EBUSY. */ + rc = hclose(ls); + errno_assert(rc == 0); + + /* The accepter should have been woken with ECANCELED. */ + int err; + rc = chrecv(ch[1], &err, sizeof(err), now() + 1000); + errno_assert(rc == 0); + assert(err == ECANCELED); + + rc = hclose(cr); + errno_assert(rc == 0); + rc = hclose(ch[0]); + errno_assert(rc == 0); + rc = hclose(ch[1]); + errno_assert(rc == 0); + + /* Phase 2: Exercise fd-number reuse. + The OS will likely hand back the same fd number that the old listener + used. If the pollset cache was not properly cleared (the old bug), + the new fd would be treated as already registered and either hang + forever or hit an assert in kqueue/epoll. */ + rc = ipaddr_local(&addr, NULL, 0, 0); + errno_assert(rc == 0); + int ls2 = tcp_listen(&addr, 10); + errno_assert(ls2 >= 0); + int port = ipaddr_port(&addr); + + int cr2 = go(connector(port)); + errno_assert(cr2 >= 0); + int as = tcp_accept(ls2, NULL, now() + 1000); + errno_assert(as >= 0); + + /* Verify the reused fd is fully functional. */ + char buf[3]; + rc = brecv(as, buf, 3, now() + 1000); + errno_assert(rc == 0); + assert(buf[0] == 'X' && buf[1] == 'Y' && buf[2] == 'Z'); + + rc = hclose(as); + errno_assert(rc == 0); + rc = hclose(cr2); + errno_assert(rc == 0); + rc = hclose(ls2); + errno_assert(rc == 0); + + return 0; +} \ No newline at end of file