diff --git a/man/rules/meson.build b/man/rules/meson.build index 6295330c5e..95eac54a65 100644 --- a/man/rules/meson.build +++ b/man/rules/meson.build @@ -272,7 +272,9 @@ manpages = [ ''], ['sd_event_add_inotify', '3', - ['sd_event_inotify_handler_t', 'sd_event_source_get_inotify_mask'], + ['sd_event_add_inotify_fd', + 'sd_event_inotify_handler_t', + 'sd_event_source_get_inotify_mask'], ''], ['sd_event_add_io', '3', diff --git a/man/sd_event_add_inotify.xml b/man/sd_event_add_inotify.xml index 605863c356..17415bbc17 100644 --- a/man/sd_event_add_inotify.xml +++ b/man/sd_event_add_inotify.xml @@ -18,6 +18,7 @@ sd_event_add_inotify + sd_event_add_inotify_fd sd_event_source_get_inotify_mask sd_event_inotify_handler_t @@ -47,6 +48,16 @@ void *userdata + + int sd_event_add_inotify_fd + sd_event *event + sd_event_source **source + int fd + uint32_t mask + sd_event_inotify_handler_t handler + void *userdata + + int sd_event_source_get_inotify_mask sd_event_source *source @@ -72,6 +83,11 @@ inotify7 for further information. + sd_event_add_inotify_fd() is identical to + sd_event_add_inotify(), except that it takes a file descriptor to an inode (possibly + an O_PATH one, but any other will do too) instead of a path in the file + system. + If multiple event sources are installed for the same inode the backing inotify watch descriptor is automatically shared. The mask parameter may contain any flag defined by the inotify API, with the exception of IN_MASK_ADD. diff --git a/src/libsystemd/libsystemd.sym b/src/libsystemd/libsystemd.sym index 449918093c..dc37472b18 100644 --- a/src/libsystemd/libsystemd.sym +++ b/src/libsystemd/libsystemd.sym @@ -593,5 +593,6 @@ global: LIBSYSTEMD_250 { global: + sd_event_add_inotify_fd; sd_event_source_set_ratelimit_expire_callback; } LIBSYSTEMD_248; diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index 549103bc6f..9247087c41 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -263,6 +263,11 @@ struct inotify_data { * the events locally if they can't be coalesced). */ unsigned n_pending; + /* If this counter is non-zero, don't GC the inotify data object even if not used to watch any inode + * anymore. This is useful to pin the object for a bit longer, after the last event source needing it + * is gone. */ + unsigned n_busy; + /* A linked list of all inotify objects with data already read, that still need processing. We keep this list * to make it efficient to figure out what inotify objects to process data on next. */ LIST_FIELDS(struct inotify_data, buffered); @@ -1129,6 +1134,7 @@ static void source_free(sd_event_source *s) { free(s->description); free(s); } +DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free); static int source_set_pending(sd_event_source *s, bool b) { int r; @@ -1844,6 +1850,29 @@ static void event_free_inode_data( free(d); } +static void event_gc_inotify_data( + sd_event *e, + struct inotify_data *d) { + + assert(e); + + /* GCs the inotify data object if we don't need it anymore. That's the case if we don't want to watch + * any inode with it anymore, which in turn happens if no event source of this priority is interested + * in any inode any longer. That said, we maintain an extra busy counter: if non-zero we'll delay GC + * (under the expectation that the GC is called again once the counter is decremented). */ + + if (!d) + return; + + if (!hashmap_isempty(d->inodes)) + return; + + if (d->n_busy > 0) + return; + + event_free_inotify_data(e, d); +} + static void event_gc_inode_data( sd_event *e, struct inode_data *d) { @@ -1861,8 +1890,7 @@ static void event_gc_inode_data( inotify_data = d->inotify_data; event_free_inode_data(e, d); - if (inotify_data && hashmap_isempty(inotify_data->inodes)) - event_free_inotify_data(e, inotify_data); + event_gc_inotify_data(e, inotify_data); } static int event_make_inode_data( @@ -1985,26 +2013,25 @@ static int inode_data_realize_watch(sd_event *e, struct inode_data *d) { return 1; } -_public_ int sd_event_add_inotify( +static int event_add_inotify_fd_internal( sd_event *e, sd_event_source **ret, - const char *path, + int fd, + bool donate, uint32_t mask, sd_event_inotify_handler_t callback, void *userdata) { - bool rm_inotify = false, rm_inode = false; + _cleanup_close_ int donated_fd = donate ? fd : -1; + _cleanup_(source_freep) sd_event_source *s = NULL; struct inotify_data *inotify_data = NULL; struct inode_data *inode_data = NULL; - _cleanup_close_ int fd = -1; - sd_event_source *s; struct stat st; int r; assert_return(e, -EINVAL); assert_return(e = event_resolve(e), -ENOPKG); - assert_return(path, -EINVAL); - assert_return(callback, -EINVAL); + assert_return(fd >= 0, -EBADF); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(e), -ECHILD); @@ -2014,12 +2041,6 @@ _public_ int sd_event_add_inotify( if (mask & IN_MASK_ADD) return -EINVAL; - fd = open(path, O_PATH|O_CLOEXEC| - (mask & IN_ONLYDIR ? O_DIRECTORY : 0)| - (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0)); - if (fd < 0) - return -errno; - if (fstat(fd, &st) < 0) return -errno; @@ -2035,18 +2056,28 @@ _public_ int sd_event_add_inotify( /* Allocate an inotify object for this priority, and an inode object within it */ r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data); if (r < 0) - goto fail; - rm_inotify = r > 0; + return r; r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data); - if (r < 0) - goto fail; - rm_inode = r > 0; + if (r < 0) { + event_gc_inotify_data(e, inotify_data); + return r; + } /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of * the event source, until then, for which we need the original inode. */ if (inode_data->fd < 0) { - inode_data->fd = TAKE_FD(fd); + if (donated_fd >= 0) + inode_data->fd = TAKE_FD(donated_fd); + else { + inode_data->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + if (inode_data->fd < 0) { + r = -errno; + event_gc_inode_data(e, inode_data); + return r; + } + } + LIST_PREPEND(to_close, e->inode_data_to_close, inode_data); } @@ -2054,28 +2085,56 @@ _public_ int sd_event_add_inotify( LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s); s->inotify.inode_data = inode_data; - rm_inode = rm_inotify = false; - /* Actually realize the watch now */ r = inode_data_realize_watch(e, inode_data); if (r < 0) - goto fail; - - (void) sd_event_source_set_description(s, path); + return r; if (ret) *ret = s; + TAKE_PTR(s); return 0; +} -fail: - source_free(s); +_public_ int sd_event_add_inotify_fd( + sd_event *e, + sd_event_source **ret, + int fd, + uint32_t mask, + sd_event_inotify_handler_t callback, + void *userdata) { - if (rm_inode) - event_free_inode_data(e, inode_data); + return event_add_inotify_fd_internal(e, ret, fd, /* donate= */ false, mask, callback, userdata); +} - if (rm_inotify) - event_free_inotify_data(e, inotify_data); +_public_ int sd_event_add_inotify( + sd_event *e, + sd_event_source **ret, + const char *path, + uint32_t mask, + sd_event_inotify_handler_t callback, + void *userdata) { + + sd_event_source *s; + int fd, r; + + assert_return(path, -EINVAL); + + fd = open(path, O_PATH|O_CLOEXEC| + (mask & IN_ONLYDIR ? O_DIRECTORY : 0)| + (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0)); + if (fd < 0) + return -errno; + + r = event_add_inotify_fd_internal(e, &s, fd, /* donate= */ true, mask, callback, userdata); + if (r < 0) + return r; + + (void) sd_event_source_set_description(s, path); + + if (ret) + *ret = s; return r; } @@ -3459,13 +3518,23 @@ static int source_dispatch(sd_event_source *s) { sz = offsetof(struct inotify_event, name) + d->buffer.ev.len; assert(d->buffer_filled >= sz); + /* If the inotify callback destroys the event source then this likely means we don't need to + * watch the inode anymore, and thus also won't need the inotify object anymore. But if we'd + * free it immediately, then we couldn't drop the event from the inotify event queue without + * memory corruption anymore, as below. Hence, let's not free it immediately, but mark it + * "busy" with a counter (which will ensure it's not GC'ed away prematurely). Let's then + * explicitly GC it after we are done dropping the inotify event from the buffer. */ + d->n_busy++; r = s->inotify.callback(s, &d->buffer.ev, s->userdata); + d->n_busy--; - /* When no event is pending anymore on this inotify object, then let's drop the event from the - * buffer. */ + /* When no event is pending anymore on this inotify object, then let's drop the event from + * the inotify event queue buffer. */ if (d->n_pending == 0) event_inotify_data_drop(e, d, sz); + /* Now we don't want to access 'd' anymore, it's OK to GC now. */ + event_gc_inotify_data(e, d); break; } diff --git a/src/libsystemd/sd-event/test-event.c b/src/libsystemd/sd-event/test-event.c index 9135b22839..df3de1bfb0 100644 --- a/src/libsystemd/sd-event/test-event.c +++ b/src/libsystemd/sd-event/test-event.c @@ -588,6 +588,40 @@ static void test_ratelimit(void) { assert_se(expired == 0); } +static int inotify_self_destroy_handler(sd_event_source *s, const struct inotify_event *ev, void *userdata) { + sd_event_source **p = userdata; + + assert_se(ev); + assert_se(p); + assert_se(*p == s); + + assert_se(FLAGS_SET(ev->mask, IN_ATTRIB)); + + assert_se(sd_event_exit(sd_event_source_get_event(s), 0) >= 0); + + *p = sd_event_source_unref(*p); /* here's what we actually intend to test: we destroy the event + * source from inside the event source handler */ + return 1; +} + +static void test_inotify_self_destroy(void) { + _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL; + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + char path[] = "/tmp/inotifyXXXXXX"; + _cleanup_close_ int fd = -1; + + /* Tests that destroying an inotify event source from its own handler is safe */ + + assert_se(sd_event_default(&e) >= 0); + + fd = mkostemp_safe(path); + assert_se(fd >= 0); + assert_se(sd_event_add_inotify_fd(e, &s, fd, IN_ATTRIB, inotify_self_destroy_handler, &s) >= 0); + fd = safe_close(fd); + assert_se(unlink(path) >= 0); /* This will trigger IN_ATTRIB because link count goes to zero */ + assert_se(sd_event_loop(e) >= 0); +} + int main(int argc, char *argv[]) { log_set_max_level(LOG_DEBUG); @@ -602,5 +636,7 @@ int main(int argc, char *argv[]) { test_ratelimit(); + test_inotify_self_destroy(); + return 0; } diff --git a/src/systemd/sd-event.h b/src/systemd/sd-event.h index 960bea1ac4..48b053c127 100644 --- a/src/systemd/sd-event.h +++ b/src/systemd/sd-event.h @@ -91,6 +91,7 @@ int sd_event_add_time_relative(sd_event *e, sd_event_source **s, clockid_t clock int sd_event_add_signal(sd_event *e, sd_event_source **s, int sig, sd_event_signal_handler_t callback, void *userdata); int sd_event_add_child(sd_event *e, sd_event_source **s, pid_t pid, int options, sd_event_child_handler_t callback, void *userdata); int sd_event_add_inotify(sd_event *e, sd_event_source **s, const char *path, uint32_t mask, sd_event_inotify_handler_t callback, void *userdata); +int sd_event_add_inotify_fd(sd_event *e, sd_event_source **s, int fd, uint32_t mask, sd_event_inotify_handler_t callback, void *userdata); int sd_event_add_defer(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata); int sd_event_add_post(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata); int sd_event_add_exit(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata);