From ad8be0c4fdfa5308432fef8c0e3f8084bc909dc5 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Thu, 23 Jan 2014 10:33:11 -0500 Subject: [PATCH] Revert fsnotify changes as they cause slab corruption for multiple people --- kernel.spec | 7 + revert-fsnotify-changes.patch | 2027 +++++++++++++++++++++++++++++++++ 2 files changed, 2034 insertions(+) create mode 100644 revert-fsnotify-changes.patch diff --git a/kernel.spec b/kernel.spec index 66b75b380..b43525a2b 100644 --- a/kernel.spec +++ b/kernel.spec @@ -644,6 +644,8 @@ Patch25185: perf-plugin-dir.patch Patch25186: peterz-printk-timestamp-fix.patch +Patch25187: revert-fsnotify-changes.patch + # END OF PATCH DEFINITIONS %endif @@ -1310,6 +1312,10 @@ ApplyPatch perf-plugin-dir.patch ApplyPatch peterz-printk-timestamp-fix.patch +# Davej and others are reporting slab corruption with the fsnotify changes. +# Revert them until they're worked out upstream +ApplyPatch revert-fsnotify-changes.patch + # END OF PATCH APPLICATIONS %endif @@ -2089,6 +2095,7 @@ fi # || || %changelog * Thu Jan 23 2014 Josh Boyer - 3.14.0-0.rc0.git6.1.1 +- Revert fsnotify changes as they cause slab corruption for multiple people - Linux v3.13-3995-g0dc3fd0 * Thu Jan 23 2014 Josh Boyer - 3.14.0-0.rc0.git5.1 diff --git a/revert-fsnotify-changes.patch b/revert-fsnotify-changes.patch new file mode 100644 index 000000000..6a4c7ab29 --- /dev/null +++ b/revert-fsnotify-changes.patch @@ -0,0 +1,2027 @@ +From 8fc16f2010c5d2f4200f172da86590da73f6c89e Mon Sep 17 00:00:00 2001 +From: Josh Boyer +Date: Thu, 23 Jan 2014 10:20:08 -0500 +Subject: [PATCH 1/3] Revert "fsnotify: remove pointless NULL initializers" + +This reverts commit 56b27cf6030dd36c56a5542ab8bfa406d337f083. +--- + fs/notify/dnotify/dnotify.c | 3 +++ + fs/notify/fanotify/fanotify.c | 1 + + kernel/audit_tree.c | 2 ++ + kernel/audit_watch.c | 3 +++ + 4 files changed, 9 insertions(+) + +diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c +index 0b9ff43..928688e 100644 +--- a/fs/notify/dnotify/dnotify.c ++++ b/fs/notify/dnotify/dnotify.c +@@ -138,6 +138,9 @@ static void dnotify_free_mark(struct fsnotify_mark *fsn_mark) + + static struct fsnotify_ops dnotify_fsnotify_ops = { + .handle_event = dnotify_handle_event, ++ .free_group_priv = NULL, ++ .freeing_mark = NULL, ++ .free_event = NULL, + }; + + /* +diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c +index 5877262..1f8f052 100644 +--- a/fs/notify/fanotify/fanotify.c ++++ b/fs/notify/fanotify/fanotify.c +@@ -230,4 +230,5 @@ const struct fsnotify_ops fanotify_fsnotify_ops = { + .handle_event = fanotify_handle_event, + .free_group_priv = fanotify_free_group_priv, + .free_event = fanotify_free_event, ++ .freeing_mark = NULL, + }; +diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c +index 67ccf0e..ae8103b 100644 +--- a/kernel/audit_tree.c ++++ b/kernel/audit_tree.c +@@ -936,6 +936,8 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify + + static const struct fsnotify_ops audit_tree_ops = { + .handle_event = audit_tree_handle_event, ++ .free_group_priv = NULL, ++ .free_event = NULL, + .freeing_mark = audit_tree_freeing_mark, + }; + +diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c +index 2596fac..367ac9a 100644 +--- a/kernel/audit_watch.c ++++ b/kernel/audit_watch.c +@@ -505,6 +505,9 @@ static int audit_watch_handle_event(struct fsnotify_group *group, + + static const struct fsnotify_ops audit_watch_fsnotify_ops = { + .handle_event = audit_watch_handle_event, ++ .free_group_priv = NULL, ++ .freeing_mark = NULL, ++ .free_event = NULL, + }; + + static int __init audit_watch_init(void) +-- +1.8.4.2 + + +From 24bd25cea32de37512189a9aeb1c2bd3b2a83cfe Mon Sep 17 00:00:00 2001 +From: Josh Boyer +Date: Thu, 23 Jan 2014 10:20:17 -0500 +Subject: [PATCH 2/3] Revert "fsnotify: remove .should_send_event callback" + +This reverts commit 83c4c4b0a3aadc1ce7b5b2870ce1fc1f65498da0. +--- + fs/notify/dnotify/dnotify.c | 22 ++++++++++++++++++---- + fs/notify/fanotify/fanotify.c | 18 ++++++++---------- + fs/notify/fsnotify.c | 5 +++++ + fs/notify/inotify/inotify_fsnotify.c | 24 +++++++++++++++++------- + include/linux/fsnotify_backend.h | 4 ++++ + kernel/audit_tree.c | 12 +++++++++++- + kernel/audit_watch.c | 9 +++++++++ + 7 files changed, 72 insertions(+), 22 deletions(-) + +diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c +index 928688e..bfca53d 100644 +--- a/fs/notify/dnotify/dnotify.c ++++ b/fs/notify/dnotify/dnotify.c +@@ -94,10 +94,6 @@ static int dnotify_handle_event(struct fsnotify_group *group, + struct fown_struct *fown; + __u32 test_mask = mask & ~FS_EVENT_ON_CHILD; + +- /* not a dir, dnotify doesn't care */ +- if (!S_ISDIR(inode->i_mode)) +- return 0; +- + BUG_ON(vfsmount_mark); + + dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark); +@@ -125,6 +121,23 @@ static int dnotify_handle_event(struct fsnotify_group *group, + return 0; + } + ++/* ++ * Given an inode and mask determine if dnotify would be interested in sending ++ * userspace notification for that pair. ++ */ ++static bool dnotify_should_send_event(struct fsnotify_group *group, ++ struct inode *inode, ++ struct fsnotify_mark *inode_mark, ++ struct fsnotify_mark *vfsmount_mark, ++ __u32 mask, void *data, int data_type) ++{ ++ /* not a dir, dnotify doesn't care */ ++ if (!S_ISDIR(inode->i_mode)) ++ return false; ++ ++ return true; ++} ++ + static void dnotify_free_mark(struct fsnotify_mark *fsn_mark) + { + struct dnotify_mark *dn_mark = container_of(fsn_mark, +@@ -138,6 +151,7 @@ static void dnotify_free_mark(struct fsnotify_mark *fsn_mark) + + static struct fsnotify_ops dnotify_fsnotify_ops = { + .handle_event = dnotify_handle_event, ++ .should_send_event = dnotify_should_send_event, + .free_group_priv = NULL, + .freeing_mark = NULL, + .free_event = NULL, +diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c +index 1f8f052..c26268d 100644 +--- a/fs/notify/fanotify/fanotify.c ++++ b/fs/notify/fanotify/fanotify.c +@@ -88,17 +88,18 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group, + } + #endif + +-static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, ++static bool fanotify_should_send_event(struct fsnotify_group *group, ++ struct inode *inode, ++ struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmnt_mark, +- u32 event_mask, +- void *data, int data_type) ++ __u32 event_mask, void *data, int data_type) + { + __u32 marks_mask, marks_ignored_mask; + struct path *path = data; + +- pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p" +- " data_type=%d\n", __func__, inode_mark, vfsmnt_mark, +- event_mask, data, data_type); ++ pr_debug("%s: group=%p inode=%p inode_mark=%p vfsmnt_mark=%p " ++ "mask=%x data=%p data_type=%d\n", __func__, group, inode, ++ inode_mark, vfsmnt_mark, event_mask, data, data_type); + + /* if we don't have enough info to send an event to userspace say no */ + if (data_type != FSNOTIFY_EVENT_PATH) +@@ -162,10 +163,6 @@ static int fanotify_handle_event(struct fsnotify_group *group, + BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); + BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); + +- if (!fanotify_should_send_event(inode_mark, fanotify_mark, mask, data, +- data_type)) +- return 0; +- + pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode, + mask); + +@@ -228,6 +225,7 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) + + const struct fsnotify_ops fanotify_fsnotify_ops = { + .handle_event = fanotify_handle_event, ++ .should_send_event = fanotify_should_send_event, + .free_group_priv = fanotify_free_group_priv, + .free_event = fanotify_free_event, + .freeing_mark = NULL, +diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c +index 1d4e1ea..7c754c9 100644 +--- a/fs/notify/fsnotify.c ++++ b/fs/notify/fsnotify.c +@@ -177,6 +177,11 @@ static int send_to_group(struct inode *to_tell, + if (!inode_test_mask && !vfsmount_test_mask) + return 0; + ++ if (group->ops->should_send_event(group, to_tell, inode_mark, ++ vfsmount_mark, mask, data, ++ data_is) == false) ++ return 0; ++ + return group->ops->handle_event(group, to_tell, inode_mark, + vfsmount_mark, mask, data, data_is, + file_name); +diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c +index aad1a35..6fabbd1 100644 +--- a/fs/notify/inotify/inotify_fsnotify.c ++++ b/fs/notify/inotify/inotify_fsnotify.c +@@ -81,13 +81,6 @@ int inotify_handle_event(struct fsnotify_group *group, + + BUG_ON(vfsmount_mark); + +- if ((inode_mark->mask & FS_EXCL_UNLINK) && +- (data_type == FSNOTIFY_EVENT_PATH)) { +- struct path *path = data; +- +- if (d_unlinked(path->dentry)) +- return 0; +- } + if (file_name) { + len = strlen(file_name); + alloc_len += len + 1; +@@ -129,6 +122,22 @@ static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify + inotify_ignored_and_remove_idr(fsn_mark, group); + } + ++static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, ++ struct fsnotify_mark *inode_mark, ++ struct fsnotify_mark *vfsmount_mark, ++ __u32 mask, void *data, int data_type) ++{ ++ if ((inode_mark->mask & FS_EXCL_UNLINK) && ++ (data_type == FSNOTIFY_EVENT_PATH)) { ++ struct path *path = data; ++ ++ if (d_unlinked(path->dentry)) ++ return false; ++ } ++ ++ return true; ++} ++ + /* + * This is NEVER supposed to be called. Inotify marks should either have been + * removed from the idr when the watch was removed or in the +@@ -180,6 +189,7 @@ static void inotify_free_event(struct fsnotify_event *fsn_event) + + const struct fsnotify_ops inotify_fsnotify_ops = { + .handle_event = inotify_handle_event, ++ .should_send_event = inotify_should_send_event, + .free_group_priv = inotify_free_group_priv, + .free_event = inotify_free_event, + .freeing_mark = inotify_freeing_mark, +diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h +index 7d8d5e6..7f3d7dcf 100644 +--- a/include/linux/fsnotify_backend.h ++++ b/include/linux/fsnotify_backend.h +@@ -94,6 +94,10 @@ struct fsnotify_fname; + * userspace messages that marks have been removed. + */ + struct fsnotify_ops { ++ bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode, ++ struct fsnotify_mark *inode_mark, ++ struct fsnotify_mark *vfsmount_mark, ++ __u32 mask, void *data, int data_type); + int (*handle_event)(struct fsnotify_group *group, + struct inode *inode, + struct fsnotify_mark *inode_mark, +diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c +index ae8103b..bcc0b18 100644 +--- a/kernel/audit_tree.c ++++ b/kernel/audit_tree.c +@@ -918,7 +918,8 @@ static int audit_tree_handle_event(struct fsnotify_group *group, + u32 mask, void *data, int data_type, + const unsigned char *file_name) + { +- return 0; ++ BUG(); ++ return -EOPNOTSUPP; + } + + static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group) +@@ -934,8 +935,17 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify + BUG_ON(atomic_read(&entry->refcnt) < 1); + } + ++static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode, ++ struct fsnotify_mark *inode_mark, ++ struct fsnotify_mark *vfsmount_mark, ++ __u32 mask, void *data, int data_type) ++{ ++ return false; ++} ++ + static const struct fsnotify_ops audit_tree_ops = { + .handle_event = audit_tree_handle_event, ++ .should_send_event = audit_tree_send_event, + .free_group_priv = NULL, + .free_event = NULL, + .freeing_mark = audit_tree_freeing_mark, +diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c +index 367ac9a..a760c32 100644 +--- a/kernel/audit_watch.c ++++ b/kernel/audit_watch.c +@@ -465,6 +465,14 @@ void audit_remove_watch_rule(struct audit_krule *krule) + } + } + ++static bool audit_watch_should_send_event(struct fsnotify_group *group, struct inode *inode, ++ struct fsnotify_mark *inode_mark, ++ struct fsnotify_mark *vfsmount_mark, ++ __u32 mask, void *data, int data_type) ++{ ++ return true; ++} ++ + /* Update watch data in audit rules based on fsnotify events. */ + static int audit_watch_handle_event(struct fsnotify_group *group, + struct inode *to_tell, +@@ -504,6 +512,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group, + } + + static const struct fsnotify_ops audit_watch_fsnotify_ops = { ++ .should_send_event = audit_watch_should_send_event, + .handle_event = audit_watch_handle_event, + .free_group_priv = NULL, + .freeing_mark = NULL, +-- +1.8.4.2 + + +From 0be830523466a37554f73c26487d71ed313a44d1 Mon Sep 17 00:00:00 2001 +From: Josh Boyer +Date: Thu, 23 Jan 2014 10:20:25 -0500 +Subject: [PATCH 3/3] Revert "fsnotify: do not share events between + notification groups" + +This reverts commit 7053aee26a3548ebaba046ae2e52396ccf56ac6c. +--- + fs/notify/dnotify/dnotify.c | 11 +- + fs/notify/fanotify/fanotify.c | 211 +++++++++++----------- + fs/notify/fanotify/fanotify.h | 23 --- + fs/notify/fanotify/fanotify_user.c | 41 ++--- + fs/notify/fsnotify.c | 37 ++-- + fs/notify/group.c | 1 - + fs/notify/inotify/inotify.h | 21 +-- + fs/notify/inotify/inotify_fsnotify.c | 125 ++++++++----- + fs/notify/inotify/inotify_user.c | 86 ++++++--- + fs/notify/notification.c | 334 ++++++++++++++++++++++++++++++++--- + include/linux/fsnotify_backend.h | 114 +++++++++--- + kernel/audit_tree.c | 8 +- + kernel/audit_watch.c | 14 +- + 13 files changed, 708 insertions(+), 318 deletions(-) + delete mode 100644 fs/notify/fanotify/fanotify.h + +diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c +index bfca53d..1fedd5f 100644 +--- a/fs/notify/dnotify/dnotify.c ++++ b/fs/notify/dnotify/dnotify.c +@@ -82,20 +82,21 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) + * events. + */ + static int dnotify_handle_event(struct fsnotify_group *group, +- struct inode *inode, + struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, +- u32 mask, void *data, int data_type, +- const unsigned char *file_name) ++ struct fsnotify_event *event) + { + struct dnotify_mark *dn_mark; ++ struct inode *to_tell; + struct dnotify_struct *dn; + struct dnotify_struct **prev; + struct fown_struct *fown; +- __u32 test_mask = mask & ~FS_EVENT_ON_CHILD; ++ __u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD; + + BUG_ON(vfsmount_mark); + ++ to_tell = event->to_tell; ++ + dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark); + + spin_lock(&inode_mark->lock); +@@ -154,7 +155,7 @@ static struct fsnotify_ops dnotify_fsnotify_ops = { + .should_send_event = dnotify_should_send_event, + .free_group_priv = NULL, + .freeing_mark = NULL, +- .free_event = NULL, ++ .free_event_priv = NULL, + }; + + /* +diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c +index c26268d..0c2f912 100644 +--- a/fs/notify/fanotify/fanotify.c ++++ b/fs/notify/fanotify/fanotify.c +@@ -9,27 +9,31 @@ + #include + #include + +-#include "fanotify.h" +- +-static bool should_merge(struct fsnotify_event *old_fsn, +- struct fsnotify_event *new_fsn) ++static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new) + { +- struct fanotify_event_info *old, *new; ++ pr_debug("%s: old=%p new=%p\n", __func__, old, new); + ++ if (old->to_tell == new->to_tell && ++ old->data_type == new->data_type && ++ old->tgid == new->tgid) { ++ switch (old->data_type) { ++ case (FSNOTIFY_EVENT_PATH): + #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS +- /* dont merge two permission events */ +- if ((old_fsn->mask & FAN_ALL_PERM_EVENTS) && +- (new_fsn->mask & FAN_ALL_PERM_EVENTS)) +- return false; ++ /* dont merge two permission events */ ++ if ((old->mask & FAN_ALL_PERM_EVENTS) && ++ (new->mask & FAN_ALL_PERM_EVENTS)) ++ return false; + #endif +- pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn); +- old = FANOTIFY_E(old_fsn); +- new = FANOTIFY_E(new_fsn); +- +- if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid && +- old->path.mnt == new->path.mnt && +- old->path.dentry == new->path.dentry) +- return true; ++ if ((old->path.mnt == new->path.mnt) && ++ (old->path.dentry == new->path.dentry)) ++ return true; ++ break; ++ case (FSNOTIFY_EVENT_NONE): ++ return true; ++ default: ++ BUG(); ++ }; ++ } + return false; + } + +@@ -37,28 +41,59 @@ static bool should_merge(struct fsnotify_event *old_fsn, + static struct fsnotify_event *fanotify_merge(struct list_head *list, + struct fsnotify_event *event) + { +- struct fsnotify_event *test_event; +- bool do_merge = false; ++ struct fsnotify_event_holder *test_holder; ++ struct fsnotify_event *test_event = NULL; ++ struct fsnotify_event *new_event; + + pr_debug("%s: list=%p event=%p\n", __func__, list, event); + +- list_for_each_entry_reverse(test_event, list, list) { +- if (should_merge(test_event, event)) { +- do_merge = true; ++ ++ list_for_each_entry_reverse(test_holder, list, event_list) { ++ if (should_merge(test_holder->event, event)) { ++ test_event = test_holder->event; + break; + } + } + +- if (!do_merge) ++ if (!test_event) + return NULL; + +- test_event->mask |= event->mask; +- return test_event; ++ fsnotify_get_event(test_event); ++ ++ /* if they are exactly the same we are done */ ++ if (test_event->mask == event->mask) ++ return test_event; ++ ++ /* ++ * if the refcnt == 2 this is the only queue ++ * for this event and so we can update the mask ++ * in place. ++ */ ++ if (atomic_read(&test_event->refcnt) == 2) { ++ test_event->mask |= event->mask; ++ return test_event; ++ } ++ ++ new_event = fsnotify_clone_event(test_event); ++ ++ /* done with test_event */ ++ fsnotify_put_event(test_event); ++ ++ /* couldn't allocate memory, merge was not possible */ ++ if (unlikely(!new_event)) ++ return ERR_PTR(-ENOMEM); ++ ++ /* build new event and replace it on the list */ ++ new_event->mask = (test_event->mask | event->mask); ++ fsnotify_replace_event(test_holder, new_event); ++ ++ /* we hold a reference on new_event from clone_event */ ++ return new_event; + } + + #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + static int fanotify_get_response_from_access(struct fsnotify_group *group, +- struct fanotify_event_info *event) ++ struct fsnotify_event *event) + { + int ret; + +@@ -71,6 +106,7 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group, + return 0; + + /* userspace responded, convert to something usable */ ++ spin_lock(&event->lock); + switch (event->response) { + case FAN_ALLOW: + ret = 0; +@@ -80,6 +116,7 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group, + ret = -EPERM; + } + event->response = 0; ++ spin_unlock(&event->lock); + + pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__, + group, event, ret); +@@ -88,8 +125,48 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group, + } + #endif + ++static int fanotify_handle_event(struct fsnotify_group *group, ++ struct fsnotify_mark *inode_mark, ++ struct fsnotify_mark *fanotify_mark, ++ struct fsnotify_event *event) ++{ ++ int ret = 0; ++ struct fsnotify_event *notify_event = NULL; ++ ++ BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); ++ BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); ++ BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); ++ BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE); ++ BUILD_BUG_ON(FAN_OPEN != FS_OPEN); ++ BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD); ++ BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW); ++ BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM); ++ BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); ++ BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); ++ ++ pr_debug("%s: group=%p event=%p\n", __func__, group, event); ++ ++ notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge); ++ if (IS_ERR(notify_event)) ++ return PTR_ERR(notify_event); ++ ++#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS ++ if (event->mask & FAN_ALL_PERM_EVENTS) { ++ /* if we merged we need to wait on the new event */ ++ if (notify_event) ++ event = notify_event; ++ ret = fanotify_get_response_from_access(group, event); ++ } ++#endif ++ ++ if (notify_event) ++ fsnotify_put_event(notify_event); ++ ++ return ret; ++} ++ + static bool fanotify_should_send_event(struct fsnotify_group *group, +- struct inode *inode, ++ struct inode *to_tell, + struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmnt_mark, + __u32 event_mask, void *data, int data_type) +@@ -97,8 +174,8 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, + __u32 marks_mask, marks_ignored_mask; + struct path *path = data; + +- pr_debug("%s: group=%p inode=%p inode_mark=%p vfsmnt_mark=%p " +- "mask=%x data=%p data_type=%d\n", __func__, group, inode, ++ pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p " ++ "mask=%x data=%p data_type=%d\n", __func__, group, to_tell, + inode_mark, vfsmnt_mark, event_mask, data, data_type); + + /* if we don't have enough info to send an event to userspace say no */ +@@ -140,70 +217,6 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, + return false; + } + +-static int fanotify_handle_event(struct fsnotify_group *group, +- struct inode *inode, +- struct fsnotify_mark *inode_mark, +- struct fsnotify_mark *fanotify_mark, +- u32 mask, void *data, int data_type, +- const unsigned char *file_name) +-{ +- int ret = 0; +- struct fanotify_event_info *event; +- struct fsnotify_event *fsn_event; +- struct fsnotify_event *notify_fsn_event; +- +- BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); +- BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); +- BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); +- BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE); +- BUILD_BUG_ON(FAN_OPEN != FS_OPEN); +- BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD); +- BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW); +- BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM); +- BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); +- BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); +- +- pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode, +- mask); +- +- event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL); +- if (unlikely(!event)) +- return -ENOMEM; +- +- fsn_event = &event->fse; +- fsnotify_init_event(fsn_event, inode, mask); +- event->tgid = get_pid(task_tgid(current)); +- if (data_type == FSNOTIFY_EVENT_PATH) { +- struct path *path = data; +- event->path = *path; +- path_get(&event->path); +- } else { +- event->path.mnt = NULL; +- event->path.dentry = NULL; +- } +-#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS +- event->response = 0; +-#endif +- +- notify_fsn_event = fsnotify_add_notify_event(group, fsn_event, +- fanotify_merge); +- if (notify_fsn_event) { +- /* Our event wasn't used in the end. Free it. */ +- fsnotify_destroy_event(group, fsn_event); +- if (IS_ERR(notify_fsn_event)) +- return PTR_ERR(notify_fsn_event); +- /* We need to ask about a different events after a merge... */ +- event = FANOTIFY_E(notify_fsn_event); +- fsn_event = notify_fsn_event; +- } +- +-#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS +- if (fsn_event->mask & FAN_ALL_PERM_EVENTS) +- ret = fanotify_get_response_from_access(group, event); +-#endif +- return ret; +-} +- + static void fanotify_free_group_priv(struct fsnotify_group *group) + { + struct user_struct *user; +@@ -213,20 +226,10 @@ static void fanotify_free_group_priv(struct fsnotify_group *group) + free_uid(user); + } + +-static void fanotify_free_event(struct fsnotify_event *fsn_event) +-{ +- struct fanotify_event_info *event; +- +- event = FANOTIFY_E(fsn_event); +- path_put(&event->path); +- put_pid(event->tgid); +- kmem_cache_free(fanotify_event_cachep, event); +-} +- + const struct fsnotify_ops fanotify_fsnotify_ops = { + .handle_event = fanotify_handle_event, + .should_send_event = fanotify_should_send_event, + .free_group_priv = fanotify_free_group_priv, +- .free_event = fanotify_free_event, ++ .free_event_priv = NULL, + .freeing_mark = NULL, + }; +diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h +deleted file mode 100644 +index 0e90174..0000000 +--- a/fs/notify/fanotify/fanotify.h ++++ /dev/null +@@ -1,23 +0,0 @@ +-#include +-#include +-#include +- +-extern struct kmem_cache *fanotify_event_cachep; +- +-struct fanotify_event_info { +- struct fsnotify_event fse; +- /* +- * We hold ref to this path so it may be dereferenced at any point +- * during this object's lifetime +- */ +- struct path path; +- struct pid *tgid; +-#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS +- u32 response; /* userspace answer to question */ +-#endif +-}; +- +-static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse) +-{ +- return container_of(fse, struct fanotify_event_info, fse); +-} +diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c +index 57d7c08..e44cb64 100644 +--- a/fs/notify/fanotify/fanotify_user.c ++++ b/fs/notify/fanotify/fanotify_user.c +@@ -19,7 +19,6 @@ + + #include "../../mount.h" + #include "../fdinfo.h" +-#include "fanotify.h" + + #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 + #define FANOTIFY_DEFAULT_MAX_MARKS 8192 +@@ -29,12 +28,11 @@ extern const struct fsnotify_ops fanotify_fsnotify_ops; + + static struct kmem_cache *fanotify_mark_cache __read_mostly; + static struct kmem_cache *fanotify_response_event_cache __read_mostly; +-struct kmem_cache *fanotify_event_cachep __read_mostly; + + struct fanotify_response_event { + struct list_head list; + __s32 fd; +- struct fanotify_event_info *event; ++ struct fsnotify_event *event; + }; + + /* +@@ -63,8 +61,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, + } + + static int create_fd(struct fsnotify_group *group, +- struct fanotify_event_info *event, +- struct file **file) ++ struct fsnotify_event *event, ++ struct file **file) + { + int client_fd; + struct file *new_file; +@@ -75,6 +73,12 @@ static int create_fd(struct fsnotify_group *group, + if (client_fd < 0) + return client_fd; + ++ if (event->data_type != FSNOTIFY_EVENT_PATH) { ++ WARN_ON(1); ++ put_unused_fd(client_fd); ++ return -EINVAL; ++ } ++ + /* + * we need a new file handle for the userspace program so it can read even if it was + * originally opened O_WRONLY. +@@ -105,25 +109,23 @@ static int create_fd(struct fsnotify_group *group, + } + + static int fill_event_metadata(struct fsnotify_group *group, +- struct fanotify_event_metadata *metadata, +- struct fsnotify_event *fsn_event, +- struct file **file) ++ struct fanotify_event_metadata *metadata, ++ struct fsnotify_event *event, ++ struct file **file) + { + int ret = 0; +- struct fanotify_event_info *event; + + pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, +- group, metadata, fsn_event); ++ group, metadata, event); + + *file = NULL; +- event = container_of(fsn_event, struct fanotify_event_info, fse); + metadata->event_len = FAN_EVENT_METADATA_LEN; + metadata->metadata_len = FAN_EVENT_METADATA_LEN; + metadata->vers = FANOTIFY_METADATA_VERSION; + metadata->reserved = 0; +- metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS; ++ metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; + metadata->pid = pid_vnr(event->tgid); +- if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW)) ++ if (unlikely(event->mask & FAN_Q_OVERFLOW)) + metadata->fd = FAN_NOFD; + else { + metadata->fd = create_fd(group, event, file); +@@ -207,7 +209,7 @@ static int prepare_for_access_response(struct fsnotify_group *group, + if (!re) + return -ENOMEM; + +- re->event = FANOTIFY_E(event); ++ re->event = event; + re->fd = fd; + + mutex_lock(&group->fanotify_data.access_mutex); +@@ -215,7 +217,7 @@ static int prepare_for_access_response(struct fsnotify_group *group, + if (atomic_read(&group->fanotify_data.bypass_perm)) { + mutex_unlock(&group->fanotify_data.access_mutex); + kmem_cache_free(fanotify_response_event_cache, re); +- FANOTIFY_E(event)->response = FAN_ALLOW; ++ event->response = FAN_ALLOW; + return 0; + } + +@@ -271,7 +273,7 @@ out_close_fd: + out: + #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + if (event->mask & FAN_ALL_PERM_EVENTS) { +- FANOTIFY_E(event)->response = FAN_DENY; ++ event->response = FAN_DENY; + wake_up(&group->fanotify_data.access_waitq); + } + #endif +@@ -319,7 +321,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, + if (IS_ERR(kevent)) + break; + ret = copy_event_to_user(group, kevent, buf); +- fsnotify_destroy_event(group, kevent); ++ fsnotify_put_event(kevent); + if (ret < 0) + break; + buf += ret; +@@ -407,7 +409,7 @@ static int fanotify_release(struct inode *ignored, struct file *file) + static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) + { + struct fsnotify_group *group; +- struct fsnotify_event *fsn_event; ++ struct fsnotify_event_holder *holder; + void __user *p; + int ret = -ENOTTY; + size_t send_len = 0; +@@ -419,7 +421,7 @@ static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long ar + switch (cmd) { + case FIONREAD: + mutex_lock(&group->notification_mutex); +- list_for_each_entry(fsn_event, &group->notification_list, list) ++ list_for_each_entry(holder, &group->notification_list, event_list) + send_len += FAN_EVENT_METADATA_LEN; + mutex_unlock(&group->notification_mutex); + ret = put_user(send_len, (int __user *) p); +@@ -904,7 +906,6 @@ static int __init fanotify_user_setup(void) + fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC); + fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event, + SLAB_PANIC); +- fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC); + + return 0; + } +diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c +index 7c754c9..4bb21d6 100644 +--- a/fs/notify/fsnotify.c ++++ b/fs/notify/fsnotify.c +@@ -128,7 +128,8 @@ static int send_to_group(struct inode *to_tell, + struct fsnotify_mark *vfsmount_mark, + __u32 mask, void *data, + int data_is, u32 cookie, +- const unsigned char *file_name) ++ const unsigned char *file_name, ++ struct fsnotify_event **event) + { + struct fsnotify_group *group = NULL; + __u32 inode_test_mask = 0; +@@ -169,10 +170,10 @@ static int send_to_group(struct inode *to_tell, + + pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p" + " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x" +- " data=%p data_is=%d cookie=%d\n", ++ " data=%p data_is=%d cookie=%d event=%p\n", + __func__, group, to_tell, mask, inode_mark, + inode_test_mask, vfsmount_mark, vfsmount_test_mask, data, +- data_is, cookie); ++ data_is, cookie, *event); + + if (!inode_test_mask && !vfsmount_test_mask) + return 0; +@@ -182,9 +183,14 @@ static int send_to_group(struct inode *to_tell, + data_is) == false) + return 0; + +- return group->ops->handle_event(group, to_tell, inode_mark, +- vfsmount_mark, mask, data, data_is, +- file_name); ++ if (!*event) { ++ *event = fsnotify_create_event(to_tell, mask, data, ++ data_is, file_name, ++ cookie, GFP_KERNEL); ++ if (!*event) ++ return -ENOMEM; ++ } ++ return group->ops->handle_event(group, inode_mark, vfsmount_mark, *event); + } + + /* +@@ -199,6 +205,7 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, + struct hlist_node *inode_node = NULL, *vfsmount_node = NULL; + struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; + struct fsnotify_group *inode_group, *vfsmount_group; ++ struct fsnotify_event *event = NULL; + struct mount *mnt; + int idx, ret = 0; + /* global tests shouldn't care about events on child only the specific event */ +@@ -251,18 +258,18 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, + + if (inode_group > vfsmount_group) { + /* handle inode */ +- ret = send_to_group(to_tell, inode_mark, NULL, mask, +- data, data_is, cookie, file_name); ++ ret = send_to_group(to_tell, inode_mark, NULL, mask, data, ++ data_is, cookie, file_name, &event); + /* we didn't use the vfsmount_mark */ + vfsmount_group = NULL; + } else if (vfsmount_group > inode_group) { +- ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, +- data, data_is, cookie, file_name); ++ ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, data, ++ data_is, cookie, file_name, &event); + inode_group = NULL; + } else { + ret = send_to_group(to_tell, inode_mark, vfsmount_mark, +- mask, data, data_is, cookie, +- file_name); ++ mask, data, data_is, cookie, file_name, ++ &event); + } + + if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) +@@ -278,6 +285,12 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, + ret = 0; + out: + srcu_read_unlock(&fsnotify_mark_srcu, idx); ++ /* ++ * fsnotify_create_event() took a reference so the event can't be cleaned ++ * up while we are still trying to add it to lists, drop that one. ++ */ ++ if (event) ++ fsnotify_put_event(event); + + return ret; + } +diff --git a/fs/notify/group.c b/fs/notify/group.c +index ee674fe..bd2625b 100644 +--- a/fs/notify/group.c ++++ b/fs/notify/group.c +@@ -99,7 +99,6 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) + INIT_LIST_HEAD(&group->marks_list); + + group->ops = ops; +- fsnotify_init_event(&group->overflow_event, NULL, FS_Q_OVERFLOW); + + return group; + } +diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h +index 485eef3..b6642e4 100644 +--- a/fs/notify/inotify/inotify.h ++++ b/fs/notify/inotify/inotify.h +@@ -2,12 +2,11 @@ + #include + #include /* struct kmem_cache */ + +-struct inotify_event_info { +- struct fsnotify_event fse; ++extern struct kmem_cache *event_priv_cachep; ++ ++struct inotify_event_private_data { ++ struct fsnotify_event_private_data fsnotify_event_priv_data; + int wd; +- u32 sync_cookie; +- int name_len; +- char name[]; + }; + + struct inotify_inode_mark { +@@ -15,18 +14,8 @@ struct inotify_inode_mark { + int wd; + }; + +-static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse) +-{ +- return container_of(fse, struct inotify_event_info, fse); +-} +- + extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, + struct fsnotify_group *group); +-extern int inotify_handle_event(struct fsnotify_group *group, +- struct inode *inode, +- struct fsnotify_mark *inode_mark, +- struct fsnotify_mark *vfsmount_mark, +- u32 mask, void *data, int data_type, +- const unsigned char *file_name); ++extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv); + + extern const struct fsnotify_ops inotify_fsnotify_ops; +diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c +index 6fabbd1..4216308 100644 +--- a/fs/notify/inotify/inotify_fsnotify.c ++++ b/fs/notify/inotify/inotify_fsnotify.c +@@ -34,80 +34,100 @@ + #include "inotify.h" + + /* +- * Check if 2 events contain the same information. ++ * Check if 2 events contain the same information. We do not compare private data ++ * but at this moment that isn't a problem for any know fsnotify listeners. + */ +-static bool event_compare(struct fsnotify_event *old_fsn, +- struct fsnotify_event *new_fsn) ++static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new) + { +- struct inotify_event_info *old, *new; +- +- if (old_fsn->mask & FS_IN_IGNORED) +- return false; +- old = INOTIFY_E(old_fsn); +- new = INOTIFY_E(new_fsn); +- if ((old_fsn->mask == new_fsn->mask) && +- (old_fsn->inode == new_fsn->inode) && +- (old->name_len == new->name_len) && +- (!old->name_len || !strcmp(old->name, new->name))) +- return true; ++ if ((old->mask == new->mask) && ++ (old->to_tell == new->to_tell) && ++ (old->data_type == new->data_type) && ++ (old->name_len == new->name_len)) { ++ switch (old->data_type) { ++ case (FSNOTIFY_EVENT_INODE): ++ /* remember, after old was put on the wait_q we aren't ++ * allowed to look at the inode any more, only thing ++ * left to check was if the file_name is the same */ ++ if (!old->name_len || ++ !strcmp(old->file_name, new->file_name)) ++ return true; ++ break; ++ case (FSNOTIFY_EVENT_PATH): ++ if ((old->path.mnt == new->path.mnt) && ++ (old->path.dentry == new->path.dentry)) ++ return true; ++ break; ++ case (FSNOTIFY_EVENT_NONE): ++ if (old->mask & FS_Q_OVERFLOW) ++ return true; ++ else if (old->mask & FS_IN_IGNORED) ++ return false; ++ return true; ++ }; ++ } + return false; + } + + static struct fsnotify_event *inotify_merge(struct list_head *list, + struct fsnotify_event *event) + { ++ struct fsnotify_event_holder *last_holder; + struct fsnotify_event *last_event; + +- last_event = list_entry(list->prev, struct fsnotify_event, list); +- if (!event_compare(last_event, event)) +- return NULL; ++ /* and the list better be locked by something too */ ++ spin_lock(&event->lock); ++ ++ last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list); ++ last_event = last_holder->event; ++ if (event_compare(last_event, event)) ++ fsnotify_get_event(last_event); ++ else ++ last_event = NULL; ++ ++ spin_unlock(&event->lock); ++ + return last_event; + } + +-int inotify_handle_event(struct fsnotify_group *group, +- struct inode *inode, +- struct fsnotify_mark *inode_mark, +- struct fsnotify_mark *vfsmount_mark, +- u32 mask, void *data, int data_type, +- const unsigned char *file_name) ++static int inotify_handle_event(struct fsnotify_group *group, ++ struct fsnotify_mark *inode_mark, ++ struct fsnotify_mark *vfsmount_mark, ++ struct fsnotify_event *event) + { + struct inotify_inode_mark *i_mark; +- struct inotify_event_info *event; ++ struct inode *to_tell; ++ struct inotify_event_private_data *event_priv; ++ struct fsnotify_event_private_data *fsn_event_priv; + struct fsnotify_event *added_event; +- struct fsnotify_event *fsn_event; +- int ret = 0; +- int len = 0; +- int alloc_len = sizeof(struct inotify_event_info); ++ int wd, ret = 0; + + BUG_ON(vfsmount_mark); + +- if (file_name) { +- len = strlen(file_name); +- alloc_len += len + 1; +- } ++ pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group, ++ event, event->to_tell, event->mask); + +- pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode, +- mask); ++ to_tell = event->to_tell; + + i_mark = container_of(inode_mark, struct inotify_inode_mark, + fsn_mark); ++ wd = i_mark->wd; + +- event = kmalloc(alloc_len, GFP_KERNEL); +- if (unlikely(!event)) ++ event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); ++ if (unlikely(!event_priv)) + return -ENOMEM; + +- fsn_event = &event->fse; +- fsnotify_init_event(fsn_event, inode, mask); +- event->wd = i_mark->wd; +- event->name_len = len; +- if (len) +- strcpy(event->name, file_name); ++ fsn_event_priv = &event_priv->fsnotify_event_priv_data; + +- added_event = fsnotify_add_notify_event(group, fsn_event, inotify_merge); ++ fsnotify_get_group(group); ++ fsn_event_priv->group = group; ++ event_priv->wd = wd; ++ ++ added_event = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge); + if (added_event) { +- /* Our event wasn't used in the end. Free it. */ +- fsnotify_destroy_event(group, fsn_event); +- if (IS_ERR(added_event)) ++ inotify_free_event_priv(fsn_event_priv); ++ if (!IS_ERR(added_event)) ++ fsnotify_put_event(added_event); ++ else + ret = PTR_ERR(added_event); + } + +@@ -182,15 +202,22 @@ static void inotify_free_group_priv(struct fsnotify_group *group) + free_uid(group->inotify_data.user); + } + +-static void inotify_free_event(struct fsnotify_event *fsn_event) ++void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv) + { +- kfree(INOTIFY_E(fsn_event)); ++ struct inotify_event_private_data *event_priv; ++ ++ ++ event_priv = container_of(fsn_event_priv, struct inotify_event_private_data, ++ fsnotify_event_priv_data); ++ ++ fsnotify_put_group(fsn_event_priv->group); ++ kmem_cache_free(event_priv_cachep, event_priv); + } + + const struct fsnotify_ops inotify_fsnotify_ops = { + .handle_event = inotify_handle_event, + .should_send_event = inotify_should_send_event, + .free_group_priv = inotify_free_group_priv, +- .free_event = inotify_free_event, ++ .free_event_priv = inotify_free_event_priv, + .freeing_mark = inotify_freeing_mark, + }; +diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c +index 497395c..1bb6dc8 100644 +--- a/fs/notify/inotify/inotify_user.c ++++ b/fs/notify/inotify/inotify_user.c +@@ -50,6 +50,7 @@ static int inotify_max_queued_events __read_mostly; + static int inotify_max_user_watches __read_mostly; + + static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; ++struct kmem_cache *event_priv_cachep __read_mostly; + + #ifdef CONFIG_SYSCTL + +@@ -123,11 +124,8 @@ static unsigned int inotify_poll(struct file *file, poll_table *wait) + return ret; + } + +-static int round_event_name_len(struct fsnotify_event *fsn_event) ++static int round_event_name_len(struct fsnotify_event *event) + { +- struct inotify_event_info *event; +- +- event = INOTIFY_E(fsn_event); + if (!event->name_len) + return 0; + return roundup(event->name_len + 1, sizeof(struct inotify_event)); +@@ -171,27 +169,40 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, + * buffer we had in "get_one_event()" above. + */ + static ssize_t copy_event_to_user(struct fsnotify_group *group, +- struct fsnotify_event *fsn_event, ++ struct fsnotify_event *event, + char __user *buf) + { + struct inotify_event inotify_event; +- struct inotify_event_info *event; ++ struct fsnotify_event_private_data *fsn_priv; ++ struct inotify_event_private_data *priv; + size_t event_size = sizeof(struct inotify_event); + size_t name_len; + size_t pad_name_len; + +- pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event); ++ pr_debug("%s: group=%p event=%p\n", __func__, group, event); ++ ++ /* we get the inotify watch descriptor from the event private data */ ++ spin_lock(&event->lock); ++ fsn_priv = fsnotify_remove_priv_from_event(group, event); ++ spin_unlock(&event->lock); ++ ++ if (!fsn_priv) ++ inotify_event.wd = -1; ++ else { ++ priv = container_of(fsn_priv, struct inotify_event_private_data, ++ fsnotify_event_priv_data); ++ inotify_event.wd = priv->wd; ++ inotify_free_event_priv(fsn_priv); ++ } + +- event = INOTIFY_E(fsn_event); + name_len = event->name_len; + /* + * round up name length so it is a multiple of event_size + * plus an extra byte for the terminating '\0'. + */ +- pad_name_len = round_event_name_len(fsn_event); ++ pad_name_len = round_event_name_len(event); + inotify_event.len = pad_name_len; +- inotify_event.mask = inotify_mask_to_arg(fsn_event->mask); +- inotify_event.wd = event->wd; ++ inotify_event.mask = inotify_mask_to_arg(event->mask); + inotify_event.cookie = event->sync_cookie; + + /* send the main event */ +@@ -207,7 +218,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, + */ + if (pad_name_len) { + /* copy the path name */ +- if (copy_to_user(buf, event->name, name_len)) ++ if (copy_to_user(buf, event->file_name, name_len)) + return -EFAULT; + buf += name_len; + +@@ -246,7 +257,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf, + if (IS_ERR(kevent)) + break; + ret = copy_event_to_user(group, kevent, buf); +- fsnotify_destroy_event(group, kevent); ++ fsnotify_put_event(kevent); + if (ret < 0) + break; + buf += ret; +@@ -289,7 +300,8 @@ static long inotify_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) + { + struct fsnotify_group *group; +- struct fsnotify_event *fsn_event; ++ struct fsnotify_event_holder *holder; ++ struct fsnotify_event *event; + void __user *p; + int ret = -ENOTTY; + size_t send_len = 0; +@@ -302,10 +314,10 @@ static long inotify_ioctl(struct file *file, unsigned int cmd, + switch (cmd) { + case FIONREAD: + mutex_lock(&group->notification_mutex); +- list_for_each_entry(fsn_event, &group->notification_list, +- list) { ++ list_for_each_entry(holder, &group->notification_list, event_list) { ++ event = holder->event; + send_len += sizeof(struct inotify_event); +- send_len += round_event_name_len(fsn_event); ++ send_len += round_event_name_len(event); + } + mutex_unlock(&group->notification_mutex); + ret = put_user(send_len, (int __user *) p); +@@ -492,12 +504,43 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, + struct fsnotify_group *group) + { + struct inotify_inode_mark *i_mark; +- +- /* Queue ignore event for the watch */ +- inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED, +- NULL, FSNOTIFY_EVENT_NONE, NULL); ++ struct fsnotify_event *ignored_event, *notify_event; ++ struct inotify_event_private_data *event_priv; ++ struct fsnotify_event_private_data *fsn_event_priv; ++ int ret; + + i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); ++ ++ ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, ++ FSNOTIFY_EVENT_NONE, NULL, 0, ++ GFP_NOFS); ++ if (!ignored_event) ++ goto skip_send_ignore; ++ ++ event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS); ++ if (unlikely(!event_priv)) ++ goto skip_send_ignore; ++ ++ fsn_event_priv = &event_priv->fsnotify_event_priv_data; ++ ++ fsnotify_get_group(group); ++ fsn_event_priv->group = group; ++ event_priv->wd = i_mark->wd; ++ ++ notify_event = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL); ++ if (notify_event) { ++ if (IS_ERR(notify_event)) ++ ret = PTR_ERR(notify_event); ++ else ++ fsnotify_put_event(notify_event); ++ inotify_free_event_priv(fsn_event_priv); ++ } ++ ++skip_send_ignore: ++ /* matches the reference taken when the event was created */ ++ if (ignored_event) ++ fsnotify_put_event(ignored_event); ++ + /* remove this mark from the idr */ + inotify_remove_from_idr(group, i_mark); + +@@ -794,6 +837,7 @@ static int __init inotify_user_setup(void) + BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21); + + inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); ++ event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC); + + inotify_max_queued_events = 16384; + inotify_max_user_instances = 128; +diff --git a/fs/notify/notification.c b/fs/notify/notification.c +index 952237b..7b51b05 100644 +--- a/fs/notify/notification.c ++++ b/fs/notify/notification.c +@@ -48,6 +48,15 @@ + #include + #include "fsnotify.h" + ++static struct kmem_cache *fsnotify_event_cachep; ++static struct kmem_cache *fsnotify_event_holder_cachep; ++/* ++ * This is a magic event we send when the q is too full. Since it doesn't ++ * hold real event information we just keep one system wide and use it any time ++ * it is needed. It's refcnt is set 1 at kernel init time and will never ++ * get set to 0 so it will never get 'freed' ++ */ ++static struct fsnotify_event *q_overflow_event; + static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0); + + /** +@@ -67,14 +76,60 @@ bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group) + return list_empty(&group->notification_list) ? true : false; + } + +-void fsnotify_destroy_event(struct fsnotify_group *group, +- struct fsnotify_event *event) ++void fsnotify_get_event(struct fsnotify_event *event) + { +- /* Overflow events are per-group and we don't want to free them */ +- if (!event || event->mask == FS_Q_OVERFLOW) ++ atomic_inc(&event->refcnt); ++} ++ ++void fsnotify_put_event(struct fsnotify_event *event) ++{ ++ if (!event) + return; + +- group->ops->free_event(event); ++ if (atomic_dec_and_test(&event->refcnt)) { ++ pr_debug("%s: event=%p\n", __func__, event); ++ ++ if (event->data_type == FSNOTIFY_EVENT_PATH) ++ path_put(&event->path); ++ ++ BUG_ON(!list_empty(&event->private_data_list)); ++ ++ kfree(event->file_name); ++ put_pid(event->tgid); ++ kmem_cache_free(fsnotify_event_cachep, event); ++ } ++} ++ ++struct fsnotify_event_holder *fsnotify_alloc_event_holder(void) ++{ ++ return kmem_cache_alloc(fsnotify_event_holder_cachep, GFP_KERNEL); ++} ++ ++void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder) ++{ ++ if (holder) ++ kmem_cache_free(fsnotify_event_holder_cachep, holder); ++} ++ ++/* ++ * Find the private data that the group previously attached to this event when ++ * the group added the event to the notification queue (fsnotify_add_notify_event) ++ */ ++struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event) ++{ ++ struct fsnotify_event_private_data *lpriv; ++ struct fsnotify_event_private_data *priv = NULL; ++ ++ assert_spin_locked(&event->lock); ++ ++ list_for_each_entry(lpriv, &event->private_data_list, event_list) { ++ if (lpriv->group == group) { ++ priv = lpriv; ++ list_del(&priv->event_list); ++ break; ++ } ++ } ++ return priv; + } + + /* +@@ -82,35 +137,91 @@ void fsnotify_destroy_event(struct fsnotify_group *group, + * event off the queue to deal with. If the event is successfully added to the + * group's notification queue, a reference is taken on event. + */ +-struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, +- struct fsnotify_event *event, ++struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event, ++ struct fsnotify_event_private_data *priv, + struct fsnotify_event *(*merge)(struct list_head *, + struct fsnotify_event *)) + { + struct fsnotify_event *return_event = NULL; ++ struct fsnotify_event_holder *holder = NULL; + struct list_head *list = &group->notification_list; + +- pr_debug("%s: group=%p event=%p\n", __func__, group, event); ++ pr_debug("%s: group=%p event=%p priv=%p\n", __func__, group, event, priv); ++ ++ /* ++ * There is one fsnotify_event_holder embedded inside each fsnotify_event. ++ * Check if we expect to be able to use that holder. If not alloc a new ++ * holder. ++ * For the overflow event it's possible that something will use the in ++ * event holder before we get the lock so we may need to jump back and ++ * alloc a new holder, this can't happen for most events... ++ */ ++ if (!list_empty(&event->holder.event_list)) { ++alloc_holder: ++ holder = fsnotify_alloc_event_holder(); ++ if (!holder) ++ return ERR_PTR(-ENOMEM); ++ } + + mutex_lock(&group->notification_mutex); + + if (group->q_len >= group->max_events) { +- /* Queue overflow event only if it isn't already queued */ +- if (list_empty(&group->overflow_event.list)) +- event = &group->overflow_event; ++ event = q_overflow_event; ++ ++ /* ++ * we need to return the overflow event ++ * which means we need a ref ++ */ ++ fsnotify_get_event(event); + return_event = event; ++ ++ /* sorry, no private data on the overflow event */ ++ priv = NULL; + } + + if (!list_empty(list) && merge) { +- return_event = merge(list, event); +- if (return_event) { ++ struct fsnotify_event *tmp; ++ ++ tmp = merge(list, event); ++ if (tmp) { + mutex_unlock(&group->notification_mutex); +- return return_event; ++ ++ if (return_event) ++ fsnotify_put_event(return_event); ++ if (holder != &event->holder) ++ fsnotify_destroy_event_holder(holder); ++ return tmp; ++ } ++ } ++ ++ spin_lock(&event->lock); ++ ++ if (list_empty(&event->holder.event_list)) { ++ if (unlikely(holder)) ++ fsnotify_destroy_event_holder(holder); ++ holder = &event->holder; ++ } else if (unlikely(!holder)) { ++ /* between the time we checked above and got the lock the in ++ * event holder was used, go back and get a new one */ ++ spin_unlock(&event->lock); ++ mutex_unlock(&group->notification_mutex); ++ ++ if (return_event) { ++ fsnotify_put_event(return_event); ++ return_event = NULL; + } ++ ++ goto alloc_holder; + } + + group->q_len++; +- list_add_tail(&event->list, list); ++ holder->event = event; ++ ++ fsnotify_get_event(event); ++ list_add_tail(&holder->event_list, list); ++ if (priv) ++ list_add_tail(&priv->event_list, &event->private_data_list); ++ spin_unlock(&event->lock); + mutex_unlock(&group->notification_mutex); + + wake_up(&group->notification_waitq); +@@ -119,20 +230,32 @@ struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, + } + + /* +- * Remove and return the first event from the notification list. It is the +- * responsibility of the caller to destroy the obtained event ++ * Remove and return the first event from the notification list. There is a ++ * reference held on this event since it was on the list. It is the responsibility ++ * of the caller to drop this reference. + */ + struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group) + { + struct fsnotify_event *event; ++ struct fsnotify_event_holder *holder; + + BUG_ON(!mutex_is_locked(&group->notification_mutex)); + + pr_debug("%s: group=%p\n", __func__, group); + +- event = list_first_entry(&group->notification_list, +- struct fsnotify_event, list); +- list_del(&event->list); ++ holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list); ++ ++ event = holder->event; ++ ++ spin_lock(&event->lock); ++ holder->event = NULL; ++ list_del_init(&holder->event_list); ++ spin_unlock(&event->lock); ++ ++ /* event == holder means we are referenced through the in event holder */ ++ if (holder != &event->holder) ++ fsnotify_destroy_event_holder(holder); ++ + group->q_len--; + + return event; +@@ -143,10 +266,15 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group + */ + struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group) + { ++ struct fsnotify_event *event; ++ struct fsnotify_event_holder *holder; ++ + BUG_ON(!mutex_is_locked(&group->notification_mutex)); + +- return list_first_entry(&group->notification_list, +- struct fsnotify_event, list); ++ holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list); ++ event = holder->event; ++ ++ return event; + } + + /* +@@ -156,31 +284,181 @@ struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group) + void fsnotify_flush_notify(struct fsnotify_group *group) + { + struct fsnotify_event *event; ++ struct fsnotify_event_private_data *priv; + + mutex_lock(&group->notification_mutex); + while (!fsnotify_notify_queue_is_empty(group)) { + event = fsnotify_remove_notify_event(group); +- fsnotify_destroy_event(group, event); ++ /* if they don't implement free_event_priv they better not have attached any */ ++ if (group->ops->free_event_priv) { ++ spin_lock(&event->lock); ++ priv = fsnotify_remove_priv_from_event(group, event); ++ spin_unlock(&event->lock); ++ if (priv) ++ group->ops->free_event_priv(priv); ++ } ++ fsnotify_put_event(event); /* matches fsnotify_add_notify_event */ + } + mutex_unlock(&group->notification_mutex); + } + ++static void initialize_event(struct fsnotify_event *event) ++{ ++ INIT_LIST_HEAD(&event->holder.event_list); ++ atomic_set(&event->refcnt, 1); ++ ++ spin_lock_init(&event->lock); ++ ++ INIT_LIST_HEAD(&event->private_data_list); ++} ++ ++/* ++ * Caller damn well better be holding whatever mutex is protecting the ++ * old_holder->event_list and the new_event must be a clean event which ++ * cannot be found anywhere else in the kernel. ++ */ ++int fsnotify_replace_event(struct fsnotify_event_holder *old_holder, ++ struct fsnotify_event *new_event) ++{ ++ struct fsnotify_event *old_event = old_holder->event; ++ struct fsnotify_event_holder *new_holder = &new_event->holder; ++ ++ enum event_spinlock_class { ++ SPINLOCK_OLD, ++ SPINLOCK_NEW, ++ }; ++ ++ pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, new_event); ++ ++ /* ++ * if the new_event's embedded holder is in use someone ++ * screwed up and didn't give us a clean new event. ++ */ ++ BUG_ON(!list_empty(&new_holder->event_list)); ++ ++ spin_lock_nested(&old_event->lock, SPINLOCK_OLD); ++ spin_lock_nested(&new_event->lock, SPINLOCK_NEW); ++ ++ new_holder->event = new_event; ++ list_replace_init(&old_holder->event_list, &new_holder->event_list); ++ ++ spin_unlock(&new_event->lock); ++ spin_unlock(&old_event->lock); ++ ++ /* event == holder means we are referenced through the in event holder */ ++ if (old_holder != &old_event->holder) ++ fsnotify_destroy_event_holder(old_holder); ++ ++ fsnotify_get_event(new_event); /* on the list take reference */ ++ fsnotify_put_event(old_event); /* off the list, drop reference */ ++ ++ return 0; ++} ++ ++struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event) ++{ ++ struct fsnotify_event *event; ++ ++ event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL); ++ if (!event) ++ return NULL; ++ ++ pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, event); ++ ++ memcpy(event, old_event, sizeof(*event)); ++ initialize_event(event); ++ ++ if (event->name_len) { ++ event->file_name = kstrdup(old_event->file_name, GFP_KERNEL); ++ if (!event->file_name) { ++ kmem_cache_free(fsnotify_event_cachep, event); ++ return NULL; ++ } ++ } ++ event->tgid = get_pid(old_event->tgid); ++ if (event->data_type == FSNOTIFY_EVENT_PATH) ++ path_get(&event->path); ++ ++ return event; ++} ++ + /* + * fsnotify_create_event - Allocate a new event which will be sent to each + * group's handle_event function if the group was interested in this + * particular event. + * +- * @inode the inode which is supposed to receive the event (sometimes a ++ * @to_tell the inode which is supposed to receive the event (sometimes a + * parent of the inode to which the event happened. + * @mask what actually happened. + * @data pointer to the object which was actually affected + * @data_type flag indication if the data is a file, path, inode, nothing... + * @name the filename, if available + */ +-void fsnotify_init_event(struct fsnotify_event *event, struct inode *inode, +- u32 mask) ++struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data, ++ int data_type, const unsigned char *name, ++ u32 cookie, gfp_t gfp) + { +- INIT_LIST_HEAD(&event->list); +- event->inode = inode; ++ struct fsnotify_event *event; ++ ++ event = kmem_cache_zalloc(fsnotify_event_cachep, gfp); ++ if (!event) ++ return NULL; ++ ++ pr_debug("%s: event=%p to_tell=%p mask=%x data=%p data_type=%d\n", ++ __func__, event, to_tell, mask, data, data_type); ++ ++ initialize_event(event); ++ ++ if (name) { ++ event->file_name = kstrdup(name, gfp); ++ if (!event->file_name) { ++ kmem_cache_free(fsnotify_event_cachep, event); ++ return NULL; ++ } ++ event->name_len = strlen(event->file_name); ++ } ++ ++ event->tgid = get_pid(task_tgid(current)); ++ event->sync_cookie = cookie; ++ event->to_tell = to_tell; ++ event->data_type = data_type; ++ ++ switch (data_type) { ++ case FSNOTIFY_EVENT_PATH: { ++ struct path *path = data; ++ event->path.dentry = path->dentry; ++ event->path.mnt = path->mnt; ++ path_get(&event->path); ++ break; ++ } ++ case FSNOTIFY_EVENT_INODE: ++ event->inode = data; ++ break; ++ case FSNOTIFY_EVENT_NONE: ++ event->inode = NULL; ++ event->path.dentry = NULL; ++ event->path.mnt = NULL; ++ break; ++ default: ++ BUG(); ++ } ++ + event->mask = mask; ++ ++ return event; ++} ++ ++static __init int fsnotify_notification_init(void) ++{ ++ fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC); ++ fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC); ++ ++ q_overflow_event = fsnotify_create_event(NULL, FS_Q_OVERFLOW, NULL, ++ FSNOTIFY_EVENT_NONE, NULL, 0, ++ GFP_KERNEL); ++ if (!q_overflow_event) ++ panic("unable to allocate fsnotify q_overflow_event\n"); ++ ++ return 0; + } ++subsys_initcall(fsnotify_notification_init); +diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h +index 7f3d7dcf..4b2ee8d 100644 +--- a/include/linux/fsnotify_backend.h ++++ b/include/linux/fsnotify_backend.h +@@ -15,6 +15,7 @@ + #include /* struct path */ + #include + #include ++ + #include + + /* +@@ -78,7 +79,6 @@ struct fsnotify_group; + struct fsnotify_event; + struct fsnotify_mark; + struct fsnotify_event_private_data; +-struct fsnotify_fname; + + /* + * Each group much define these ops. The fsnotify infrastructure will call +@@ -99,26 +99,12 @@ struct fsnotify_ops { + struct fsnotify_mark *vfsmount_mark, + __u32 mask, void *data, int data_type); + int (*handle_event)(struct fsnotify_group *group, +- struct inode *inode, + struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, +- u32 mask, void *data, int data_type, +- const unsigned char *file_name); ++ struct fsnotify_event *event); + void (*free_group_priv)(struct fsnotify_group *group); + void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); +- void (*free_event)(struct fsnotify_event *event); +-}; +- +-/* +- * all of the information about the original object we want to now send to +- * a group. If you want to carry more info from the accessing task to the +- * listener this structure is where you need to be adding fields. +- */ +-struct fsnotify_event { +- struct list_head list; +- /* inode may ONLY be dereferenced during handle_event(). */ +- struct inode *inode; /* either the inode the event happened to or its parent */ +- u32 mask; /* the type of access, bitwise OR for FS_* event types */ ++ void (*free_event_priv)(struct fsnotify_event_private_data *priv); + }; + + /* +@@ -162,11 +148,7 @@ struct fsnotify_group { + * a group */ + struct list_head marks_list; /* all inode marks for this group */ + +- struct fasync_struct *fsn_fa; /* async notification */ +- +- struct fsnotify_event overflow_event; /* Event we queue when the +- * notification list is too +- * full */ ++ struct fasync_struct *fsn_fa; /* async notification */ + + /* groups can define private fields here or use the void *private */ + union { +@@ -195,10 +177,76 @@ struct fsnotify_group { + }; + }; + ++/* ++ * A single event can be queued in multiple group->notification_lists. ++ * ++ * each group->notification_list will point to an event_holder which in turns points ++ * to the actual event that needs to be sent to userspace. ++ * ++ * Seemed cheaper to create a refcnt'd event and a small holder for every group ++ * than create a different event for every group ++ * ++ */ ++struct fsnotify_event_holder { ++ struct fsnotify_event *event; ++ struct list_head event_list; ++}; ++ ++/* ++ * Inotify needs to tack data onto an event. This struct lets us later find the ++ * correct private data of the correct group. ++ */ ++struct fsnotify_event_private_data { ++ struct fsnotify_group *group; ++ struct list_head event_list; ++}; ++ ++/* ++ * all of the information about the original object we want to now send to ++ * a group. If you want to carry more info from the accessing task to the ++ * listener this structure is where you need to be adding fields. ++ */ ++struct fsnotify_event { ++ /* ++ * If we create an event we are also likely going to need a holder ++ * to link to a group. So embed one holder in the event. Means only ++ * one allocation for the common case where we only have one group ++ */ ++ struct fsnotify_event_holder holder; ++ spinlock_t lock; /* protection for the associated event_holder and private_list */ ++ /* to_tell may ONLY be dereferenced during handle_event(). */ ++ struct inode *to_tell; /* either the inode the event happened to or its parent */ ++ /* ++ * depending on the event type we should have either a path or inode ++ * We hold a reference on path, but NOT on inode. Since we have the ref on ++ * the path, it may be dereferenced at any point during this object's ++ * lifetime. That reference is dropped when this object's refcnt hits ++ * 0. If this event contains an inode instead of a path, the inode may ++ * ONLY be used during handle_event(). ++ */ ++ union { ++ struct path path; ++ struct inode *inode; ++ }; + /* when calling fsnotify tell it if the data is a path or inode */ + #define FSNOTIFY_EVENT_NONE 0 + #define FSNOTIFY_EVENT_PATH 1 + #define FSNOTIFY_EVENT_INODE 2 ++ int data_type; /* which of the above union we have */ ++ atomic_t refcnt; /* how many groups still are using/need to send this event */ ++ __u32 mask; /* the type of access, bitwise OR for FS_* event types */ ++ ++ u32 sync_cookie; /* used to corrolate events, namely inotify mv events */ ++ const unsigned char *file_name; ++ size_t name_len; ++ struct pid *tgid; ++ ++#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS ++ __u32 response; /* userspace answer to question */ ++#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ ++ ++ struct list_head private_data_list; /* groups can store private data here */ ++}; + + /* + * Inode specific fields in an fsnotify_mark +@@ -322,12 +370,17 @@ extern void fsnotify_put_group(struct fsnotify_group *group); + extern void fsnotify_destroy_group(struct fsnotify_group *group); + /* fasync handler function */ + extern int fsnotify_fasync(int fd, struct file *file, int on); +-/* Free event from memory */ +-extern void fsnotify_destroy_event(struct fsnotify_group *group, +- struct fsnotify_event *event); ++/* take a reference to an event */ ++extern void fsnotify_get_event(struct fsnotify_event *event); ++extern void fsnotify_put_event(struct fsnotify_event *event); ++/* find private data previously attached to an event and unlink it */ ++extern struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, ++ struct fsnotify_event *event); ++ + /* attach the event to the group notification queue */ + extern struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, + struct fsnotify_event *event, ++ struct fsnotify_event_private_data *priv, + struct fsnotify_event *(*merge)(struct list_head *, + struct fsnotify_event *)); + /* true if the group notification queue is empty */ +@@ -377,8 +430,15 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark); + extern void fsnotify_unmount_inodes(struct list_head *list); + + /* put here because inotify does some weird stuff when destroying watches */ +-extern void fsnotify_init_event(struct fsnotify_event *event, +- struct inode *to_tell, u32 mask); ++extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, ++ void *data, int data_is, ++ const unsigned char *name, ++ u32 cookie, gfp_t gfp); ++ ++/* fanotify likes to change events after they are on lists... */ ++extern struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event); ++extern int fsnotify_replace_event(struct fsnotify_event_holder *old_holder, ++ struct fsnotify_event *new_event); + + #else + +diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c +index bcc0b18..43c307d 100644 +--- a/kernel/audit_tree.c ++++ b/kernel/audit_tree.c +@@ -912,11 +912,9 @@ static void evict_chunk(struct audit_chunk *chunk) + } + + static int audit_tree_handle_event(struct fsnotify_group *group, +- struct inode *to_tell, + struct fsnotify_mark *inode_mark, +- struct fsnotify_mark *vfsmount_mark, +- u32 mask, void *data, int data_type, +- const unsigned char *file_name) ++ struct fsnotify_mark *vfsmonut_mark, ++ struct fsnotify_event *event) + { + BUG(); + return -EOPNOTSUPP; +@@ -947,7 +945,7 @@ static const struct fsnotify_ops audit_tree_ops = { + .handle_event = audit_tree_handle_event, + .should_send_event = audit_tree_send_event, + .free_group_priv = NULL, +- .free_event = NULL, ++ .free_event_priv = NULL, + .freeing_mark = audit_tree_freeing_mark, + }; + +diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c +index a760c32..22831c4 100644 +--- a/kernel/audit_watch.c ++++ b/kernel/audit_watch.c +@@ -475,25 +475,25 @@ static bool audit_watch_should_send_event(struct fsnotify_group *group, struct i + + /* Update watch data in audit rules based on fsnotify events. */ + static int audit_watch_handle_event(struct fsnotify_group *group, +- struct inode *to_tell, + struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, +- u32 mask, void *data, int data_type, +- const unsigned char *dname) ++ struct fsnotify_event *event) + { + struct inode *inode; ++ __u32 mask = event->mask; ++ const char *dname = event->file_name; + struct audit_parent *parent; + + parent = container_of(inode_mark, struct audit_parent, mark); + + BUG_ON(group != audit_watch_group); + +- switch (data_type) { ++ switch (event->data_type) { + case (FSNOTIFY_EVENT_PATH): +- inode = ((struct path *)data)->dentry->d_inode; ++ inode = event->path.dentry->d_inode; + break; + case (FSNOTIFY_EVENT_INODE): +- inode = (struct inode *)data; ++ inode = event->inode; + break; + default: + BUG(); +@@ -516,7 +516,7 @@ static const struct fsnotify_ops audit_watch_fsnotify_ops = { + .handle_event = audit_watch_handle_event, + .free_group_priv = NULL, + .freeing_mark = NULL, +- .free_event = NULL, ++ .free_event_priv = NULL, + }; + + static int __init audit_watch_init(void) +-- +1.8.4.2 +