Merge branch 'for-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo:
 "Nothing major. Two notable fixes are Li's second stab at fixing the
  long-standing race condition in the mount path and suppression of
  spurious warning from cgroup_get(). All other changes are trivial"

* 'for-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: mark cgroup_get() with __maybe_unused
  cgroup: avoid attaching a cgroup root to two different superblocks, take 2
  cgroup: fix spurious warnings on cgroup_is_dead() from cgroup_sk_alloc()
  cgroup: move cgroup_subsys_state parent field for cache locality
  cpuset: Remove cpuset_update_active_cpus()'s parameter.
  cgroup: switch to BUG_ON()
  cgroup: drop duplicate header nsproxy.h
  kernel: convert css_set.refcount from atomic_t to refcount_t
  kernel: convert cgroup_namespace.count from atomic_t to refcount_t
This commit is contained in:
Linus Torvalds 2017-05-01 13:52:24 -07:00
commit 9410091dd5
9 changed files with 67 additions and 41 deletions

View File

@ -13,6 +13,7 @@
#include <linux/wait.h>
#include <linux/mutex.h>
#include <linux/rcupdate.h>
#include <linux/refcount.h>
#include <linux/percpu-refcount.h>
#include <linux/percpu-rwsem.h>
#include <linux/workqueue.h>
@ -106,9 +107,6 @@ struct cgroup_subsys_state {
/* reference count - access via css_[try]get() and css_put() */
struct percpu_ref refcnt;
/* PI: the parent css */
struct cgroup_subsys_state *parent;
/* siblings list anchored at the parent's ->children */
struct list_head sibling;
struct list_head children;
@ -138,6 +136,12 @@ struct cgroup_subsys_state {
/* percpu_ref killing and RCU release */
struct rcu_head rcu_head;
struct work_struct destroy_work;
/*
* PI: the parent css. Placed here for cache proximity to following
* fields of the containing structure.
*/
struct cgroup_subsys_state *parent;
};
/*
@ -156,7 +160,7 @@ struct css_set {
struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
/* reference count */
atomic_t refcount;
refcount_t refcount;
/* the default cgroup associated with this css_set */
struct cgroup *dfl_cgrp;

View File

@ -17,11 +17,11 @@
#include <linux/seq_file.h>
#include <linux/kernfs.h>
#include <linux/jump_label.h>
#include <linux/nsproxy.h>
#include <linux/types.h>
#include <linux/ns_common.h>
#include <linux/nsproxy.h>
#include <linux/user_namespace.h>
#include <linux/refcount.h>
#include <linux/cgroup-defs.h>
@ -661,7 +661,7 @@ static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {}
#endif /* CONFIG_CGROUP_DATA */
struct cgroup_namespace {
atomic_t count;
refcount_t count;
struct ns_common ns;
struct user_namespace *user_ns;
struct ucounts *ucounts;
@ -696,12 +696,12 @@ copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns,
static inline void get_cgroup_ns(struct cgroup_namespace *ns)
{
if (ns)
atomic_inc(&ns->count);
refcount_inc(&ns->count);
}
static inline void put_cgroup_ns(struct cgroup_namespace *ns)
{
if (ns && atomic_dec_and_test(&ns->count))
if (ns && refcount_dec_and_test(&ns->count))
free_cgroup_ns(ns);
}

View File

@ -42,7 +42,7 @@ static inline void cpuset_dec(void)
extern int cpuset_init(void);
extern void cpuset_init_smp(void);
extern void cpuset_update_active_cpus(bool cpu_online);
extern void cpuset_update_active_cpus(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
extern void cpuset_cpus_allowed_fallback(struct task_struct *p);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
@ -155,7 +155,7 @@ static inline bool cpusets_enabled(void) { return false; }
static inline int cpuset_init(void) { return 0; }
static inline void cpuset_init_smp(void) {}
static inline void cpuset_update_active_cpus(bool cpu_online)
static inline void cpuset_update_active_cpus(void)
{
partition_sched_domains(1, NULL, NULL);
}

View File

@ -5,6 +5,7 @@
#include <linux/kernfs.h>
#include <linux/workqueue.h>
#include <linux/list.h>
#include <linux/refcount.h>
/*
* A cgroup can be associated with multiple css_sets as different tasks may
@ -134,7 +135,7 @@ static inline void put_css_set(struct css_set *cset)
* can see it. Similar to atomic_dec_and_lock(), but for an
* rwlock
*/
if (atomic_add_unless(&cset->refcount, -1, 1))
if (refcount_dec_not_one(&cset->refcount))
return;
spin_lock_irqsave(&css_set_lock, flags);
@ -147,7 +148,7 @@ static inline void put_css_set(struct css_set *cset)
*/
static inline void get_css_set(struct css_set *cset)
{
atomic_inc(&cset->refcount);
refcount_inc(&cset->refcount);
}
bool cgroup_ssid_enabled(int ssid);
@ -163,7 +164,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
void cgroup_free_root(struct cgroup_root *root);
void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts);
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask);
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags);
int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
struct cgroup_root *root, unsigned long magic,

View File

@ -346,7 +346,7 @@ static int cgroup_task_count(const struct cgroup *cgrp)
spin_lock_irq(&css_set_lock);
list_for_each_entry(link, &cgrp->cset_links, cset_link)
count += atomic_read(&link->cset->refcount);
count += refcount_read(&link->cset->refcount);
spin_unlock_irq(&css_set_lock);
return count;
}
@ -1072,6 +1072,7 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
struct cgroup_subsys *ss;
struct dentry *dentry;
int i, ret;
bool new_root = false;
cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
@ -1181,10 +1182,11 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
ret = -ENOMEM;
goto out_unlock;
}
new_root = true;
init_cgroup_root(root, &opts);
ret = cgroup_setup_root(root, opts.subsys_mask);
ret = cgroup_setup_root(root, opts.subsys_mask, PERCPU_REF_INIT_DEAD);
if (ret)
cgroup_free_root(root);
@ -1200,6 +1202,18 @@ out_free:
dentry = cgroup_do_mount(&cgroup_fs_type, flags, root,
CGROUP_SUPER_MAGIC, ns);
/*
* There's a race window after we release cgroup_mutex and before
* allocating a superblock. Make sure a concurrent process won't
* be able to re-use the root during this window by delaying the
* initialization of root refcnt.
*/
if (new_root) {
mutex_lock(&cgroup_mutex);
percpu_ref_reinit(&root->cgrp.self.refcnt);
mutex_unlock(&cgroup_mutex);
}
/*
* If @pinned_sb, we're reusing an existing root and holding an
* extra ref on its sb. Mount is complete. Put the extra ref.
@ -1286,7 +1300,7 @@ static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css,
u64 count;
rcu_read_lock();
count = atomic_read(&task_css_set(current)->refcount);
count = refcount_read(&task_css_set(current)->refcount);
rcu_read_unlock();
return count;
}

View File

@ -189,7 +189,7 @@ static u16 have_canfork_callback __read_mostly;
/* cgroup namespace for init task */
struct cgroup_namespace init_cgroup_ns = {
.count = { .counter = 2, },
.count = REFCOUNT_INIT(2),
.user_ns = &init_user_ns,
.ns.ops = &cgroupns_operations,
.ns.inum = PROC_CGROUP_INIT_INO,
@ -436,7 +436,12 @@ out_unlock:
return css;
}
static void cgroup_get(struct cgroup *cgrp)
static void __maybe_unused cgroup_get(struct cgroup *cgrp)
{
css_get(&cgrp->self);
}
static void cgroup_get_live(struct cgroup *cgrp)
{
WARN_ON_ONCE(cgroup_is_dead(cgrp));
css_get(&cgrp->self);
@ -554,7 +559,7 @@ EXPORT_SYMBOL_GPL(of_css);
* haven't been created.
*/
struct css_set init_css_set = {
.refcount = ATOMIC_INIT(1),
.refcount = REFCOUNT_INIT(1),
.tasks = LIST_HEAD_INIT(init_css_set.tasks),
.mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
.task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
@ -724,7 +729,7 @@ void put_css_set_locked(struct css_set *cset)
lockdep_assert_held(&css_set_lock);
if (!atomic_dec_and_test(&cset->refcount))
if (!refcount_dec_and_test(&cset->refcount))
return;
/* This css_set is dead. unlink it and release cgroup and css refs */
@ -932,7 +937,7 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
list_add_tail(&link->cgrp_link, &cset->cgrp_links);
if (cgroup_parent(cgrp))
cgroup_get(cgrp);
cgroup_get_live(cgrp);
}
/**
@ -977,7 +982,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
return NULL;
}
atomic_set(&cset->refcount, 1);
refcount_set(&cset->refcount, 1);
INIT_LIST_HEAD(&cset->tasks);
INIT_LIST_HEAD(&cset->mg_tasks);
INIT_LIST_HEAD(&cset->task_iters);
@ -1640,7 +1645,7 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
}
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
{
LIST_HEAD(tmp_links);
struct cgroup *root_cgrp = &root->cgrp;
@ -1656,8 +1661,8 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
root_cgrp->id = ret;
root_cgrp->ancestor_ids[0] = ret;
ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0,
GFP_KERNEL);
ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
ref_flags, GFP_KERNEL);
if (ret)
goto out;
@ -1802,7 +1807,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
return ERR_PTR(-EINVAL);
}
cgrp_dfl_visible = true;
cgroup_get(&cgrp_dfl_root.cgrp);
cgroup_get_live(&cgrp_dfl_root.cgrp);
dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root,
CGROUP2_SUPER_MAGIC, ns);
@ -2576,7 +2581,7 @@ restart:
if (!css || !percpu_ref_is_dying(&css->refcnt))
continue;
cgroup_get(dsct);
cgroup_get_live(dsct);
prepare_to_wait(&dsct->offline_waitq, &wait,
TASK_UNINTERRUPTIBLE);
@ -3947,7 +3952,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
{
lockdep_assert_held(&cgroup_mutex);
cgroup_get(cgrp);
cgroup_get_live(cgrp);
memset(css, 0, sizeof(*css));
css->cgroup = cgrp;
@ -4123,7 +4128,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
/* allocation complete, commit to creation */
list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
atomic_inc(&root->nr_cgrps);
cgroup_get(parent);
cgroup_get_live(parent);
/*
* @cgrp is now fully operational. If something fails after this
@ -4513,7 +4518,7 @@ int __init cgroup_init(void)
hash_add(css_set_table, &init_css_set.hlist,
css_set_hash(init_css_set.subsys));
BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0));
mutex_unlock(&cgroup_mutex);
@ -4947,7 +4952,7 @@ struct cgroup *cgroup_get_from_path(const char *path)
if (kn) {
if (kernfs_type(kn) == KERNFS_DIR) {
cgrp = kn->priv;
cgroup_get(cgrp);
cgroup_get_live(cgrp);
} else {
cgrp = ERR_PTR(-ENOTDIR);
}
@ -5027,6 +5032,11 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
/* Socket clone path */
if (skcd->val) {
/*
* We might be cloning a socket which is left in an empty
* cgroup and the cgroup might have already been rmdir'd.
* Don't use cgroup_get_live().
*/
cgroup_get(sock_cgroup_ptr(skcd));
return;
}

View File

@ -2121,10 +2121,8 @@ int __init cpuset_init(void)
{
int err = 0;
if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL))
BUG();
if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL))
BUG();
BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
cpumask_setall(top_cpuset.cpus_allowed);
nodes_setall(top_cpuset.mems_allowed);
@ -2139,8 +2137,7 @@ int __init cpuset_init(void)
if (err < 0)
return err;
if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
BUG();
BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
return 0;
}
@ -2354,7 +2351,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
rebuild_sched_domains();
}
void cpuset_update_active_cpus(bool cpu_online)
void cpuset_update_active_cpus(void)
{
/*
* We're inside cpu hotplug critical region which usually nests

View File

@ -31,7 +31,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
kfree(new_ns);
return ERR_PTR(ret);
}
atomic_set(&new_ns->count, 1);
refcount_set(&new_ns->count, 1);
new_ns->ns.ops = &cgroupns_operations;
return new_ns;
}

View File

@ -5732,7 +5732,7 @@ static void cpuset_cpu_active(void)
* cpuset configurations.
*/
}
cpuset_update_active_cpus(true);
cpuset_update_active_cpus();
}
static int cpuset_cpu_inactive(unsigned int cpu)
@ -5755,7 +5755,7 @@ static int cpuset_cpu_inactive(unsigned int cpu)
if (overflow)
return -EBUSY;
cpuset_update_active_cpus(false);
cpuset_update_active_cpus();
} else {
num_cpus_frozen++;
partition_sched_domains(1, NULL, NULL);