From 0e0b2afdf644aa523f5eb10ce1f9e3c6cd8362ec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Aug 2016 11:23:43 -0400 Subject: [PATCH 1/8] kernfs: add dummy implementation of kernfs_path_from_node() The dummy version of kernfs_path_from_node() was missing. This currently doesn't break anything. Let's add it for consistency and to ease adding wrappers around it. v2: Removed stray ';' which was causing build failures. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 96356ef012de..7d2efd2128bb 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -344,6 +344,11 @@ static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) static inline size_t kernfs_path_len(struct kernfs_node *kn) { return 0; } +static inline int kernfs_path_from_node(struct kernfs_node *root_kn, + struct kernfs_node *kn, + char *buf, size_t buflen) +{ return -ENOSYS; } + static inline char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) { return NULL; } From 3abb1d90f5d930c6183534a624aa0158a71bc5eb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Aug 2016 11:23:44 -0400 Subject: [PATCH 2/8] kernfs: make kernfs_path*() behave in the style of strlcpy() kernfs_path*() functions always return the length of the full path but the path content is undefined if the length is larger than the provided buffer. This makes its behavior different from strlcpy() and requires error handling in all its users even when they don't care about truncation. In addition, the implementation can actully be simplified by making it behave properly in strlcpy() style. * Update kernfs_path_from_node_locked() to always fill up the buffer with path. If the buffer is not large enough, the output is truncated and terminated. * kernfs_path() no longer needs error handling. Make it a simple inline wrapper around kernfs_path_from_node(). * sysfs_warn_dup()'s use of kernfs_path() doesn't need error handling. Updated accordingly. * cgroup_path()'s use of kernfs_path() updated to retain the old behavior. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman Acked-by: Serge Hallyn --- fs/kernfs/dir.c | 61 ++++++++++++------------------------------ fs/sysfs/dir.c | 6 ++--- include/linux/cgroup.h | 7 ++++- include/linux/kernfs.h | 21 +++++++++++---- 4 files changed, 42 insertions(+), 53 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index e57174d43683..09242aaa8829 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -110,8 +110,9 @@ static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a, * kn_to: /n1/n2/n3 [depth=3] * result: /../.. * - * return value: length of the string. If greater than buflen, - * then contents of buf are undefined. On error, -1 is returned. + * Returns the length of the full path. If the full length is equal to or + * greater than @buflen, @buf contains the truncated path with the trailing + * '\0'. On error, -errno is returned. */ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, struct kernfs_node *kn_from, @@ -119,9 +120,8 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, { struct kernfs_node *kn, *common; const char parent_str[] = "/.."; - size_t depth_from, depth_to, len = 0, nlen = 0; - char *p; - int i; + size_t depth_from, depth_to, len = 0; + int i, j; if (!kn_from) kn_from = kernfs_root(kn_to)->kn; @@ -131,7 +131,7 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, common = kernfs_common_ancestor(kn_from, kn_to); if (WARN_ON(!common)) - return -1; + return -EINVAL; depth_to = kernfs_depth(common, kn_to); depth_from = kernfs_depth(common, kn_from); @@ -144,22 +144,16 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, len < buflen ? buflen - len : 0); /* Calculate how many bytes we need for the rest */ - for (kn = kn_to; kn != common; kn = kn->parent) - nlen += strlen(kn->name) + 1; - - if (len + nlen >= buflen) - return len + nlen; - - p = buf + len + nlen; - *p = '\0'; - for (kn = kn_to; kn != common; kn = kn->parent) { - size_t tmp = strlen(kn->name); - p -= tmp; - memcpy(p, kn->name, tmp); - *(--p) = '/'; + for (i = depth_to - 1; i >= 0; i--) { + for (kn = kn_to, j = 0; j < i; j++) + kn = kn->parent; + len += strlcpy(buf + len, "/", + len < buflen ? buflen - len : 0); + len += strlcpy(buf + len, kn->name, + len < buflen ? buflen - len : 0); } - return len + nlen; + return len; } /** @@ -220,8 +214,9 @@ size_t kernfs_path_len(struct kernfs_node *kn) * path (which includes '..'s) as needed to reach from @from to @to is * returned. * - * If @buf isn't long enough, the return value will be greater than @buflen - * and @buf contents are undefined. + * Returns the length of the full path. If the full length is equal to or + * greater than @buflen, @buf contains the truncated path with the trailing + * '\0'. On error, -errno is returned. */ int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from, char *buf, size_t buflen) @@ -236,28 +231,6 @@ int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from, } EXPORT_SYMBOL_GPL(kernfs_path_from_node); -/** - * kernfs_path - build full path of a given node - * @kn: kernfs_node of interest - * @buf: buffer to copy @kn's name into - * @buflen: size of @buf - * - * Builds and returns the full path of @kn in @buf of @buflen bytes. The - * path is built from the end of @buf so the returned pointer usually - * doesn't match @buf. If @buf isn't long enough, @buf is nul terminated - * and %NULL is returned. - */ -char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) -{ - int ret; - - ret = kernfs_path_from_node(kn, NULL, buf, buflen); - if (ret < 0 || ret >= buflen) - return NULL; - return buf; -} -EXPORT_SYMBOL_GPL(kernfs_path); - /** * pr_cont_kernfs_name - pr_cont name of a kernfs_node * @kn: kernfs_node of interest diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 94374e435025..2b67bda2021b 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -21,14 +21,14 @@ DEFINE_SPINLOCK(sysfs_symlink_target_lock); void sysfs_warn_dup(struct kernfs_node *parent, const char *name) { - char *buf, *path = NULL; + char *buf; buf = kzalloc(PATH_MAX, GFP_KERNEL); if (buf) - path = kernfs_path(parent, buf, PATH_MAX); + kernfs_path(parent, buf, PATH_MAX); WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s/%s'\n", - path, name); + buf, name); kfree(buf); } diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 984f73b719a9..5a9abdee43fe 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -541,7 +541,12 @@ static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen) static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf, size_t buflen) { - return kernfs_path(cgrp->kn, buf, buflen); + int ret; + + ret = kernfs_path(cgrp->kn, buf, buflen); + if (ret < 0 || ret >= buflen) + return NULL; + return buf; } static inline void pr_cont_cgroup_name(struct cgroup *cgrp) diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 7d2efd2128bb..4a02b1b49821 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -272,7 +272,6 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); size_t kernfs_path_len(struct kernfs_node *kn); int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn, char *buf, size_t buflen); -char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen); void pr_cont_kernfs_name(struct kernfs_node *kn); void pr_cont_kernfs_path(struct kernfs_node *kn); struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn); @@ -349,10 +348,6 @@ static inline int kernfs_path_from_node(struct kernfs_node *root_kn, char *buf, size_t buflen) { return -ENOSYS; } -static inline char *kernfs_path(struct kernfs_node *kn, char *buf, - size_t buflen) -{ return NULL; } - static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { } static inline void pr_cont_kernfs_path(struct kernfs_node *kn) { } @@ -441,6 +436,22 @@ static inline void kernfs_init(void) { } #endif /* CONFIG_KERNFS */ +/** + * kernfs_path - build full path of a given node + * @kn: kernfs_node of interest + * @buf: buffer to copy @kn's name into + * @buflen: size of @buf + * + * Builds and returns the full path of @kn in @buf of @buflen bytes. The + * path is built from the end of @buf so the returned pointer usually + * doesn't match @buf. If @buf isn't long enough, @buf is nul terminated + * and %NULL is returned. + */ +static inline int kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) +{ + return kernfs_path_from_node(kn, NULL, buf, buflen); +} + static inline struct kernfs_node * kernfs_find_and_get(struct kernfs_node *kn, const char *name) { From bb09c8634b1e484b8840fb2384d55739bfcb68bd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Aug 2016 11:23:44 -0400 Subject: [PATCH 3/8] kernfs: remove kernfs_path_len() It doesn't have any in-kernel user and the same result can be obtained from kernfs_path(@kn, NULL, 0). Remove it. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman Cc: Serge Hallyn --- fs/kernfs/dir.c | 23 ----------------------- include/linux/kernfs.h | 4 ---- 2 files changed, 27 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 09242aaa8829..6e7fd37615f8 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -179,29 +179,6 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) return ret; } -/** - * kernfs_path_len - determine the length of the full path of a given node - * @kn: kernfs_node of interest - * - * The returned length doesn't include the space for the terminating '\0'. - */ -size_t kernfs_path_len(struct kernfs_node *kn) -{ - size_t len = 0; - unsigned long flags; - - spin_lock_irqsave(&kernfs_rename_lock, flags); - - do { - len += strlen(kn->name) + 1; - kn = kn->parent; - } while (kn && kn->parent); - - spin_unlock_irqrestore(&kernfs_rename_lock, flags); - - return len; -} - /** * kernfs_path_from_node - build path of node @to relative to @from. * @from: parent kernfs_node relative to which we need to build the path diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 4a02b1b49821..7056238fd9f5 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -269,7 +269,6 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) } int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); -size_t kernfs_path_len(struct kernfs_node *kn); int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn, char *buf, size_t buflen); void pr_cont_kernfs_name(struct kernfs_node *kn); @@ -340,9 +339,6 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) { return -ENOSYS; } -static inline size_t kernfs_path_len(struct kernfs_node *kn) -{ return 0; } - static inline int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn, char *buf, size_t buflen) From 4c737b41de7f4eef2a593803bad1b918dd718b10 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Aug 2016 11:23:44 -0400 Subject: [PATCH 4/8] cgroup: make cgroup_path() and friends behave in the style of strlcpy() cgroup_path() and friends used to format the path from the end and thus the resulting path usually didn't start at the start of the passed in buffer. Also, when the buffer was too small, the partial result was truncated from the head rather than tail and there was no way to tell how long the full path would be. These make the functions less robust and more awkward to use. With recent updates to kernfs_path(), cgroup_path() and friends can be made to behave in strlcpy() style. * cgroup_path(), cgroup_path_ns[_locked]() and task_cgroup_path() now always return the length of the full path. If buffer is too small, it contains nul terminated truncated output. * All users updated accordingly. v2: cgroup_path() usage in kernel/sched/debug.c converted. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman Cc: Serge Hallyn Cc: Peter Zijlstra --- include/linux/blk-cgroup.h | 11 +-------- include/linux/cgroup.h | 16 ++++-------- kernel/cgroup.c | 50 ++++++++++++++++++-------------------- kernel/cpuset.c | 12 ++++----- kernel/sched/debug.c | 3 ++- 5 files changed, 37 insertions(+), 55 deletions(-) diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 10648e300c93..4e8c215e185c 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -343,16 +343,7 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) */ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) { - char *p; - - p = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); - if (!p) { - strncpy(buf, "", buflen); - return -ENAMETOOLONG; - } - - memmove(buf, p, buf + buflen - p); - return 0; + return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); } /** diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 5a9abdee43fe..6df36361a492 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -97,7 +97,7 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts); void cgroup_file_notify(struct cgroup_file *cfile); -char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); +int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk); @@ -538,15 +538,9 @@ static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen) return kernfs_name(cgrp->kn, buf, buflen); } -static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf, - size_t buflen) +static inline int cgroup_path(struct cgroup *cgrp, char *buf, size_t buflen) { - int ret; - - ret = kernfs_path(cgrp->kn, buf, buflen); - if (ret < 0 || ret >= buflen) - return NULL; - return buf; + return kernfs_path(cgrp->kn, buf, buflen); } static inline void pr_cont_cgroup_name(struct cgroup *cgrp) @@ -639,8 +633,8 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, struct cgroup_namespace *old_ns); -char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, - struct cgroup_namespace *ns); +int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, + struct cgroup_namespace *ns); #else /* !CONFIG_CGROUPS */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d1c51b7f5221..3861161e460f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2315,22 +2315,18 @@ static struct file_system_type cgroup2_fs_type = { .fs_flags = FS_USERNS_MOUNT, }; -static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, - struct cgroup_namespace *ns) +static int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, + struct cgroup_namespace *ns) { struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root); - int ret; - ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen); - if (ret < 0 || ret >= buflen) - return NULL; - return buf; + return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen); } -char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, - struct cgroup_namespace *ns) +int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, + struct cgroup_namespace *ns) { - char *ret; + int ret; mutex_lock(&cgroup_mutex); spin_lock_irq(&css_set_lock); @@ -2357,12 +2353,12 @@ EXPORT_SYMBOL_GPL(cgroup_path_ns); * * Return value is the same as kernfs_path(). */ -char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) +int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) { struct cgroup_root *root; struct cgroup *cgrp; int hierarchy_id = 1; - char *path = NULL; + int ret; mutex_lock(&cgroup_mutex); spin_lock_irq(&css_set_lock); @@ -2371,16 +2367,15 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) if (root) { cgrp = task_cgroup_from_root(task, root); - path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns); + ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns); } else { /* if no hierarchy exists, everyone is in "/" */ - if (strlcpy(buf, "/", buflen) < buflen) - path = buf; + ret = strlcpy(buf, "/", buflen); } spin_unlock_irq(&css_set_lock); mutex_unlock(&cgroup_mutex); - return path; + return ret; } EXPORT_SYMBOL_GPL(task_cgroup_path); @@ -5716,7 +5711,7 @@ core_initcall(cgroup_wq_init); int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk) { - char *buf, *path; + char *buf; int retval; struct cgroup_root *root; @@ -5759,17 +5754,17 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, * " (deleted)" is appended to the cgroup path. */ if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) { - path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX, + retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX, current->nsproxy->cgroup_ns); - if (!path) { + if (retval >= PATH_MAX) { retval = -ENAMETOOLONG; goto out_unlock; } - } else { - path = "/"; - } - seq_puts(m, path); + seq_puts(m, buf); + } else { + seq_puts(m, "/"); + } if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp)) seq_puts(m, " (deleted)\n"); @@ -6035,8 +6030,9 @@ static void cgroup_release_agent(struct work_struct *work) { struct cgroup *cgrp = container_of(work, struct cgroup, release_agent_work); - char *pathbuf = NULL, *agentbuf = NULL, *path; + char *pathbuf = NULL, *agentbuf = NULL; char *argv[3], *envp[3]; + int ret; mutex_lock(&cgroup_mutex); @@ -6046,13 +6042,13 @@ static void cgroup_release_agent(struct work_struct *work) goto out; spin_lock_irq(&css_set_lock); - path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns); + ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns); spin_unlock_irq(&css_set_lock); - if (!path) + if (ret >= PATH_MAX) goto out; argv[0] = agentbuf; - argv[1] = path; + argv[1] = pathbuf; argv[2] = NULL; /* minimal command environment */ diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c7fd2778ed50..793ae6fd96dc 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2689,7 +2689,7 @@ void __cpuset_memory_pressure_bump(void) int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk) { - char *buf, *p; + char *buf; struct cgroup_subsys_state *css; int retval; @@ -2700,18 +2700,18 @@ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, retval = -ENAMETOOLONG; css = task_get_css(tsk, cpuset_cgrp_id); - p = cgroup_path_ns(css->cgroup, buf, PATH_MAX, - current->nsproxy->cgroup_ns); + retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX, + current->nsproxy->cgroup_ns); css_put(css); - if (!p) + if (retval >= PATH_MAX) goto out_free; - seq_puts(m, p); + seq_puts(m, buf); seq_putc(m, '\n'); retval = 0; out_free: kfree(buf); out: - return retval; + return 0; } #endif /* CONFIG_PROC_PID_CPUSET */ diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 2a0a9995256d..23cb609ba4eb 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -410,7 +410,8 @@ static char *task_group_path(struct task_group *tg) if (autogroup_path(tg, group_path, PATH_MAX)) return group_path; - return cgroup_path(tg->css.cgroup, group_path, PATH_MAX); + cgroup_path(tg->css.cgroup, group_path, PATH_MAX); + return group_path; } #endif From ed1777de25e45bfb58fad63341904f8a77911785 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Aug 2016 11:23:44 -0400 Subject: [PATCH 5/8] cgroup: add tracepoints for basic operations Debugging what goes wrong with cgroup setup can get hairy. Add tracepoints for cgroup hierarchy mount, cgroup creation/destruction and task migration operations for better visibility. Signed-off-by: Tejun Heo --- include/trace/events/cgroup.h | 163 ++++++++++++++++++++++++++++++++++ kernel/cgroup.c | 25 ++++++ 2 files changed, 188 insertions(+) create mode 100644 include/trace/events/cgroup.h diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h new file mode 100644 index 000000000000..ab68640a18d0 --- /dev/null +++ b/include/trace/events/cgroup.h @@ -0,0 +1,163 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cgroup + +#if !defined(_TRACE_CGROUP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_CGROUP_H + +#include +#include + +DECLARE_EVENT_CLASS(cgroup_root, + + TP_PROTO(struct cgroup_root *root), + + TP_ARGS(root), + + TP_STRUCT__entry( + __field( int, root ) + __field( u16, ss_mask ) + __string( name, root->name ) + ), + + TP_fast_assign( + __entry->root = root->hierarchy_id; + __entry->ss_mask = root->subsys_mask; + __assign_str(name, root->name); + ), + + TP_printk("root=%d ss_mask=%#x name=%s", + __entry->root, __entry->ss_mask, __get_str(name)) +); + +DEFINE_EVENT(cgroup_root, cgroup_setup_root, + + TP_PROTO(struct cgroup_root *root), + + TP_ARGS(root) +); + +DEFINE_EVENT(cgroup_root, cgroup_destroy_root, + + TP_PROTO(struct cgroup_root *root), + + TP_ARGS(root) +); + +DEFINE_EVENT(cgroup_root, cgroup_remount, + + TP_PROTO(struct cgroup_root *root), + + TP_ARGS(root) +); + +DECLARE_EVENT_CLASS(cgroup, + + TP_PROTO(struct cgroup *cgrp), + + TP_ARGS(cgrp), + + TP_STRUCT__entry( + __field( int, root ) + __field( int, id ) + __field( int, level ) + __dynamic_array(char, path, + cgrp->kn ? cgroup_path(cgrp, NULL, 0) + 1 + : strlen("(null)")) + ), + + TP_fast_assign( + __entry->root = cgrp->root->hierarchy_id; + __entry->id = cgrp->id; + __entry->level = cgrp->level; + if (cgrp->kn) + cgroup_path(cgrp, __get_dynamic_array(path), + __get_dynamic_array_len(path)); + else + __assign_str(path, "(null)"); + ), + + TP_printk("root=%d id=%d level=%d path=%s", + __entry->root, __entry->id, __entry->level, __get_str(path)) +); + +DEFINE_EVENT(cgroup, cgroup_mkdir, + + TP_PROTO(struct cgroup *cgroup), + + TP_ARGS(cgroup) +); + +DEFINE_EVENT(cgroup, cgroup_rmdir, + + TP_PROTO(struct cgroup *cgroup), + + TP_ARGS(cgroup) +); + +DEFINE_EVENT(cgroup, cgroup_release, + + TP_PROTO(struct cgroup *cgroup), + + TP_ARGS(cgroup) +); + +DEFINE_EVENT(cgroup, cgroup_rename, + + TP_PROTO(struct cgroup *cgroup), + + TP_ARGS(cgroup) +); + +DECLARE_EVENT_CLASS(cgroup_migrate, + + TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup), + + TP_ARGS(dst_cgrp, task, threadgroup), + + TP_STRUCT__entry( + __field( int, dst_root ) + __field( int, dst_id ) + __field( int, dst_level ) + __dynamic_array(char, dst_path, + dst_cgrp->kn ? cgroup_path(dst_cgrp, NULL, 0) + 1 + : strlen("(null)")) + __field( int, pid ) + __string( comm, task->comm ) + ), + + TP_fast_assign( + __entry->dst_root = dst_cgrp->root->hierarchy_id; + __entry->dst_id = dst_cgrp->id; + __entry->dst_level = dst_cgrp->level; + if (dst_cgrp->kn) + cgroup_path(dst_cgrp, __get_dynamic_array(dst_path), + __get_dynamic_array_len(dst_path)); + else + __assign_str(dst_path, "(null)"); + __entry->pid = task->pid; + __assign_str(comm, task->comm); + ), + + TP_printk("dst_root=%d dst_id=%d dst_level=%d dst_path=%s pid=%d comm=%s", + __entry->dst_root, __entry->dst_id, __entry->dst_level, + __get_str(dst_path), __entry->pid, __get_str(comm)) +); + +DEFINE_EVENT(cgroup_migrate, cgroup_attach_task, + + TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup), + + TP_ARGS(dst_cgrp, task, threadgroup) +); + +DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks, + + TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup), + + TP_ARGS(dst_cgrp, task, threadgroup) +); + +#endif /* _TRACE_CGROUP_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3861161e460f..5e2e81ad9175 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -64,6 +64,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + /* * pidlists linger the following amount before being destroyed. The goal * is avoiding frequent destruction in the middle of consecutive read calls @@ -1176,6 +1179,8 @@ static void cgroup_destroy_root(struct cgroup_root *root) struct cgroup *cgrp = &root->cgrp; struct cgrp_cset_link *link, *tmp_link; + trace_cgroup_destroy_root(root); + cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); BUG_ON(atomic_read(&root->nr_cgrps)); @@ -1874,6 +1879,9 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) strcpy(root->release_agent_path, opts.release_agent); spin_unlock(&release_agent_path_lock); } + + trace_cgroup_remount(root); + out_unlock: kfree(opts.release_agent); kfree(opts.name); @@ -2031,6 +2039,8 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) if (ret) goto destroy_root; + trace_cgroup_setup_root(root); + /* * There must be no failure case after here, since rebinding takes * care of subsystems' refcounts, which are explicitly dropped in @@ -2825,6 +2835,10 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp, ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root); cgroup_migrate_finish(&preloaded_csets); + + if (!ret) + trace_cgroup_attach_task(dst_cgrp, leader, threadgroup); + return ret; } @@ -3587,6 +3601,8 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent, mutex_lock(&cgroup_mutex); ret = kernfs_rename(kn, new_parent, new_name_str); + if (!ret) + trace_cgroup_rename(cgrp); mutex_unlock(&cgroup_mutex); @@ -4355,6 +4371,8 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) if (task) { ret = cgroup_migrate(task, false, to->root); + if (!ret) + trace_cgroup_transfer_tasks(to, task, false); put_task_struct(task); } } while (task && !ret); @@ -5020,6 +5038,8 @@ static void css_release_work_fn(struct work_struct *work) ss->css_released(css); } else { /* cgroup release path */ + trace_cgroup_release(cgrp); + cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); cgrp->id = -1; @@ -5306,6 +5326,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, if (ret) goto out_destroy; + trace_cgroup_mkdir(cgrp); + /* let's create and online css's */ kernfs_activate(kn); @@ -5481,6 +5503,9 @@ static int cgroup_rmdir(struct kernfs_node *kn) ret = cgroup_destroy_locked(cgrp); + if (!ret) + trace_cgroup_rmdir(cgrp); + cgroup_kn_unlock(kn); return ret; } From 679a5e3f12830392d14f94b04bbe0f3cabdbd773 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 29 Sep 2016 11:58:36 +0200 Subject: [PATCH 6/8] cpuset: fix error handling regression in proc_cpuset_show() 4c737b41de7f ("cgroup: make cgroup_path() and friends behave in the style of strlcpy()") botched the conversion of proc_cpuset_show() and broke its error handling. It made the function return 0 on failures and fail to handle error returns from cgroup_path_ns(). Fix it. Reported-by: Dan Carpenter Signed-off-by: Tejun Heo --- kernel/cpuset.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 793ae6fd96dc..97dd8e178786 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2698,12 +2698,13 @@ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, if (!buf) goto out; - retval = -ENAMETOOLONG; css = task_get_css(tsk, cpuset_cgrp_id); retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX, current->nsproxy->cgroup_ns); css_put(css); if (retval >= PATH_MAX) + retval = -ENAMETOOLONG; + if (retval < 0) goto out_free; seq_puts(m, buf); seq_putc(m, '\n'); @@ -2711,7 +2712,7 @@ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, out_free: kfree(buf); out: - return 0; + return retval; } #endif /* CONFIG_PROC_PID_CPUSET */ From e0223003e6e141533446d01a92784592a97a8552 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 29 Sep 2016 15:49:40 +0200 Subject: [PATCH 7/8] cgroup: fix error handling regressions in proc_cgroup_show() and cgroup_release_agent() 4c737b41de7f ("cgroup: make cgroup_path() and friends behave in the style of strlcpy()") broke error handling in proc_cgroup_show() and cgroup_release_agent() by not handling negative return values from cgroup_path_ns_locked(). Fix it. Reported-by: Dan Carpenter Signed-off-by: Tejun Heo --- kernel/cgroup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5e2e81ad9175..a7f9fb4e1fc7 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5781,10 +5781,10 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) { retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX, current->nsproxy->cgroup_ns); - if (retval >= PATH_MAX) { + if (retval >= PATH_MAX) retval = -ENAMETOOLONG; + if (retval < 0) goto out_unlock; - } seq_puts(m, buf); } else { @@ -6069,7 +6069,7 @@ static void cgroup_release_agent(struct work_struct *work) spin_lock_irq(&css_set_lock); ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns); spin_unlock_irq(&css_set_lock); - if (ret >= PATH_MAX) + if (ret < 0 || ret >= PATH_MAX) goto out; argv[0] = agentbuf; From bbb427e342495df1cda10051d0566388697499c0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 29 Sep 2016 08:33:30 -0700 Subject: [PATCH 8/8] blkcg: Unlock blkcg_pol_mutex only once when cpd == NULL Unlocking a mutex twice is wrong. Hence modify blkcg_policy_register() such that blkcg_pol_mutex is unlocked once if cpd == NULL. This patch avoids that smatch reports the following error: block/blk-cgroup.c:1378: blkcg_policy_register() error: double unlock 'mutex:&blkcg_pol_mutex' Fixes: 06b285bd1125 ("blkcg: fix blkcg_policy_data allocation bug") Signed-off-by: Bart Van Assche Cc: Tejun Heo Cc: # v4.2+ Signed-off-by: Tejun Heo --- block/blk-cgroup.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index dd38e5ced4a3..b08ccbb9393a 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1340,10 +1340,8 @@ int blkcg_policy_register(struct blkcg_policy *pol) struct blkcg_policy_data *cpd; cpd = pol->cpd_alloc_fn(GFP_KERNEL); - if (!cpd) { - mutex_unlock(&blkcg_pol_mutex); + if (!cpd) goto err_free_cpds; - } blkcg->cpd[pol->plid] = cpd; cpd->blkcg = blkcg;