kernel-ark/kernel/cgroup_freezer.c
Tejun Heo ef9fe980c6 cgroup_freezer: implement proper hierarchy support
Up until now, cgroup_freezer didn't implement hierarchy properly.
cgroups could be arranged in hierarchy but it didn't make any
difference in how each cgroup_freezer behaved.  They all operated
separately.

This patch implements proper hierarchy support.  If a cgroup is
frozen, all its descendants are frozen.  A cgroup is thawed iff it and
all its ancestors are THAWED.  freezer.self_freezing shows the current
freezing state for the cgroup itself.  freezer.parent_freezing shows
whether the cgroup is freezing because any of its ancestors is
freezing.

freezer_post_create() locks the parent and new cgroup and inherits the
parent's state and freezer_change_state() applies new state top-down
using cgroup_for_each_descendant_pre() which guarantees that no child
can escape its parent's state.  update_if_frozen() uses
cgroup_for_each_descendant_post() to propagate frozen states
bottom-up.

Synchronization could be coarser and easier by using a single mutex to
protect all hierarchy operations.  Finer grained approach was used
because it wasn't too difficult for cgroup_freezer and I think it's
beneficial to have an example implementation and cgroup_freezer is
rather simple and can serve a good one.

As this makes cgroup_freezer properly hierarchical,
freezer_subsys.broken_hierarchy marking is removed.

Note that this patch changes userland visible behavior - freezing a
cgroup now freezes all its descendants too.  This behavior change is
intended and has been warned via .broken_hierarchy.

v2: Michal spotted a bug in freezer_change_state() - descendants were
    inheriting from the wrong ancestor.  Fixed.

v3: Documentation/cgroups/freezer-subsystem.txt updated.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
2012-11-09 10:52:30 -08:00

487 lines
13 KiB
C

/*
* cgroup_freezer.c - control group freezer subsystem
*
* Copyright IBM Corporation, 2007
*
* Author : Cedric Le Goater <clg@fr.ibm.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2.1 of the GNU Lesser General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/cgroup.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/freezer.h>
#include <linux/seq_file.h>
/*
* A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
* set if "FROZEN" is written to freezer.state cgroupfs file, and cleared
* for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING
* for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of
* its ancestors has FREEZING_SELF set.
*/
enum freezer_state_flags {
CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */
CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */
CGROUP_FREEZING_PARENT = (1 << 2), /* the parent freezer is freezing */
CGROUP_FROZEN = (1 << 3), /* this and its descendants frozen */
/* mask for all FREEZING flags */
CGROUP_FREEZING = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT,
};
struct freezer {
struct cgroup_subsys_state css;
unsigned int state;
spinlock_t lock;
};
static inline struct freezer *cgroup_freezer(struct cgroup *cgroup)
{
return container_of(cgroup_subsys_state(cgroup, freezer_subsys_id),
struct freezer, css);
}
static inline struct freezer *task_freezer(struct task_struct *task)
{
return container_of(task_subsys_state(task, freezer_subsys_id),
struct freezer, css);
}
static struct freezer *parent_freezer(struct freezer *freezer)
{
struct cgroup *pcg = freezer->css.cgroup->parent;
if (pcg)
return cgroup_freezer(pcg);
return NULL;
}
bool cgroup_freezing(struct task_struct *task)
{
bool ret;
rcu_read_lock();
ret = task_freezer(task)->state & CGROUP_FREEZING;
rcu_read_unlock();
return ret;
}
/*
* cgroups_write_string() limits the size of freezer state strings to
* CGROUP_LOCAL_BUFFER_SIZE
*/
static const char *freezer_state_strs(unsigned int state)
{
if (state & CGROUP_FROZEN)
return "FROZEN";
if (state & CGROUP_FREEZING)
return "FREEZING";
return "THAWED";
};
struct cgroup_subsys freezer_subsys;
static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup)
{
struct freezer *freezer;
freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
if (!freezer)
return ERR_PTR(-ENOMEM);
spin_lock_init(&freezer->lock);
return &freezer->css;
}
/**
* freezer_post_create - commit creation of a freezer cgroup
* @cgroup: cgroup being created
*
* We're committing to creation of @cgroup. Mark it online and inherit
* parent's freezing state while holding both parent's and our
* freezer->lock.
*/
static void freezer_post_create(struct cgroup *cgroup)
{
struct freezer *freezer = cgroup_freezer(cgroup);
struct freezer *parent = parent_freezer(freezer);
/*
* The following double locking and freezing state inheritance
* guarantee that @cgroup can never escape ancestors' freezing
* states. See cgroup_for_each_descendant_pre() for details.
*/
if (parent)
spin_lock_irq(&parent->lock);
spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING);
freezer->state |= CGROUP_FREEZER_ONLINE;
if (parent && (parent->state & CGROUP_FREEZING)) {
freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
atomic_inc(&system_freezing_cnt);
}
spin_unlock(&freezer->lock);
if (parent)
spin_unlock_irq(&parent->lock);
}
/**
* freezer_pre_destroy - initiate destruction of @cgroup
* @cgroup: cgroup being destroyed
*
* @cgroup is going away. Mark it dead and decrement system_freezing_count
* if it was holding one.
*/
static void freezer_pre_destroy(struct cgroup *cgroup)
{
struct freezer *freezer = cgroup_freezer(cgroup);
spin_lock_irq(&freezer->lock);
if (freezer->state & CGROUP_FREEZING)
atomic_dec(&system_freezing_cnt);
freezer->state = 0;
spin_unlock_irq(&freezer->lock);
}
static void freezer_destroy(struct cgroup *cgroup)
{
kfree(cgroup_freezer(cgroup));
}
/*
* Tasks can be migrated into a different freezer anytime regardless of its
* current state. freezer_attach() is responsible for making new tasks
* conform to the current state.
*
* Freezer state changes and task migration are synchronized via
* @freezer->lock. freezer_attach() makes the new tasks conform to the
* current state and all following state changes can see the new tasks.
*/
static void freezer_attach(struct cgroup *new_cgrp, struct cgroup_taskset *tset)
{
struct freezer *freezer = cgroup_freezer(new_cgrp);
struct task_struct *task;
bool clear_frozen = false;
spin_lock_irq(&freezer->lock);
/*
* Make the new tasks conform to the current state of @new_cgrp.
* For simplicity, when migrating any task to a FROZEN cgroup, we
* revert it to FREEZING and let update_if_frozen() determine the
* correct state later.
*
* Tasks in @tset are on @new_cgrp but may not conform to its
* current state before executing the following - !frozen tasks may
* be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
*/
cgroup_taskset_for_each(task, new_cgrp, tset) {
if (!(freezer->state & CGROUP_FREEZING)) {
__thaw_task(task);
} else {
freeze_task(task);
freezer->state &= ~CGROUP_FROZEN;
clear_frozen = true;
}
}
spin_unlock_irq(&freezer->lock);
/*
* Propagate FROZEN clearing upwards. We may race with
* update_if_frozen(), but as long as both work bottom-up, either
* update_if_frozen() sees child's FROZEN cleared or we clear the
* parent's FROZEN later. No parent w/ !FROZEN children can be
* left FROZEN.
*/
while (clear_frozen && (freezer = parent_freezer(freezer))) {
spin_lock_irq(&freezer->lock);
freezer->state &= ~CGROUP_FROZEN;
clear_frozen = freezer->state & CGROUP_FREEZING;
spin_unlock_irq(&freezer->lock);
}
}
static void freezer_fork(struct task_struct *task)
{
struct freezer *freezer;
rcu_read_lock();
freezer = task_freezer(task);
/*
* The root cgroup is non-freezable, so we can skip the
* following check.
*/
if (!freezer->css.cgroup->parent)
goto out;
spin_lock_irq(&freezer->lock);
if (freezer->state & CGROUP_FREEZING)
freeze_task(task);
spin_unlock_irq(&freezer->lock);
out:
rcu_read_unlock();
}
/**
* update_if_frozen - update whether a cgroup finished freezing
* @cgroup: cgroup of interest
*
* Once FREEZING is initiated, transition to FROZEN is lazily updated by
* calling this function. If the current state is FREEZING but not FROZEN,
* this function checks whether all tasks of this cgroup and the descendant
* cgroups finished freezing and, if so, sets FROZEN.
*
* The caller is responsible for grabbing RCU read lock and calling
* update_if_frozen() on all descendants prior to invoking this function.
*
* Task states and freezer state might disagree while tasks are being
* migrated into or out of @cgroup, so we can't verify task states against
* @freezer state here. See freezer_attach() for details.
*/
static void update_if_frozen(struct cgroup *cgroup)
{
struct freezer *freezer = cgroup_freezer(cgroup);
struct cgroup *pos;
struct cgroup_iter it;
struct task_struct *task;
WARN_ON_ONCE(!rcu_read_lock_held());
spin_lock_irq(&freezer->lock);
if (!(freezer->state & CGROUP_FREEZING) ||
(freezer->state & CGROUP_FROZEN))
goto out_unlock;
/* are all (live) children frozen? */
cgroup_for_each_child(pos, cgroup) {
struct freezer *child = cgroup_freezer(pos);
if ((child->state & CGROUP_FREEZER_ONLINE) &&
!(child->state & CGROUP_FROZEN))
goto out_unlock;
}
/* are all tasks frozen? */
cgroup_iter_start(cgroup, &it);
while ((task = cgroup_iter_next(cgroup, &it))) {
if (freezing(task)) {
/*
* freezer_should_skip() indicates that the task
* should be skipped when determining freezing
* completion. Consider it frozen in addition to
* the usual frozen condition.
*/
if (!frozen(task) && !freezer_should_skip(task))
goto out_iter_end;
}
}
freezer->state |= CGROUP_FROZEN;
out_iter_end:
cgroup_iter_end(cgroup, &it);
out_unlock:
spin_unlock_irq(&freezer->lock);
}
static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
struct seq_file *m)
{
struct cgroup *pos;
rcu_read_lock();
/* update states bottom-up */
cgroup_for_each_descendant_post(pos, cgroup)
update_if_frozen(pos);
update_if_frozen(cgroup);
rcu_read_unlock();
seq_puts(m, freezer_state_strs(cgroup_freezer(cgroup)->state));
seq_putc(m, '\n');
return 0;
}
static void freeze_cgroup(struct freezer *freezer)
{
struct cgroup *cgroup = freezer->css.cgroup;
struct cgroup_iter it;
struct task_struct *task;
cgroup_iter_start(cgroup, &it);
while ((task = cgroup_iter_next(cgroup, &it)))
freeze_task(task);
cgroup_iter_end(cgroup, &it);
}
static void unfreeze_cgroup(struct freezer *freezer)
{
struct cgroup *cgroup = freezer->css.cgroup;
struct cgroup_iter it;
struct task_struct *task;
cgroup_iter_start(cgroup, &it);
while ((task = cgroup_iter_next(cgroup, &it)))
__thaw_task(task);
cgroup_iter_end(cgroup, &it);
}
/**
* freezer_apply_state - apply state change to a single cgroup_freezer
* @freezer: freezer to apply state change to
* @freeze: whether to freeze or unfreeze
* @state: CGROUP_FREEZING_* flag to set or clear
*
* Set or clear @state on @cgroup according to @freeze, and perform
* freezing or thawing as necessary.
*/
static void freezer_apply_state(struct freezer *freezer, bool freeze,
unsigned int state)
{
/* also synchronizes against task migration, see freezer_attach() */
lockdep_assert_held(&freezer->lock);
if (!(freezer->state & CGROUP_FREEZER_ONLINE))
return;
if (freeze) {
if (!(freezer->state & CGROUP_FREEZING))
atomic_inc(&system_freezing_cnt);
freezer->state |= state;
freeze_cgroup(freezer);
} else {
bool was_freezing = freezer->state & CGROUP_FREEZING;
freezer->state &= ~state;
if (!(freezer->state & CGROUP_FREEZING)) {
if (was_freezing)
atomic_dec(&system_freezing_cnt);
freezer->state &= ~CGROUP_FROZEN;
unfreeze_cgroup(freezer);
}
}
}
/**
* freezer_change_state - change the freezing state of a cgroup_freezer
* @freezer: freezer of interest
* @freeze: whether to freeze or thaw
*
* Freeze or thaw @freezer according to @freeze. The operations are
* recursive - all descendants of @freezer will be affected.
*/
static void freezer_change_state(struct freezer *freezer, bool freeze)
{
struct cgroup *pos;
/* update @freezer */
spin_lock_irq(&freezer->lock);
freezer_apply_state(freezer, freeze, CGROUP_FREEZING_SELF);
spin_unlock_irq(&freezer->lock);
/*
* Update all its descendants in pre-order traversal. Each
* descendant will try to inherit its parent's FREEZING state as
* CGROUP_FREEZING_PARENT.
*/
rcu_read_lock();
cgroup_for_each_descendant_pre(pos, freezer->css.cgroup) {
struct freezer *pos_f = cgroup_freezer(pos);
struct freezer *parent = parent_freezer(pos_f);
/*
* Our update to @parent->state is already visible which is
* all we need. No need to lock @parent. For more info on
* synchronization, see freezer_post_create().
*/
spin_lock_irq(&pos_f->lock);
freezer_apply_state(pos_f, parent->state & CGROUP_FREEZING,
CGROUP_FREEZING_PARENT);
spin_unlock_irq(&pos_f->lock);
}
rcu_read_unlock();
}
static int freezer_write(struct cgroup *cgroup, struct cftype *cft,
const char *buffer)
{
bool freeze;
if (strcmp(buffer, freezer_state_strs(0)) == 0)
freeze = false;
else if (strcmp(buffer, freezer_state_strs(CGROUP_FROZEN)) == 0)
freeze = true;
else
return -EINVAL;
freezer_change_state(cgroup_freezer(cgroup), freeze);
return 0;
}
static u64 freezer_self_freezing_read(struct cgroup *cgroup, struct cftype *cft)
{
struct freezer *freezer = cgroup_freezer(cgroup);
return (bool)(freezer->state & CGROUP_FREEZING_SELF);
}
static u64 freezer_parent_freezing_read(struct cgroup *cgroup, struct cftype *cft)
{
struct freezer *freezer = cgroup_freezer(cgroup);
return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
}
static struct cftype files[] = {
{
.name = "state",
.flags = CFTYPE_NOT_ON_ROOT,
.read_seq_string = freezer_read,
.write_string = freezer_write,
},
{
.name = "self_freezing",
.flags = CFTYPE_NOT_ON_ROOT,
.read_u64 = freezer_self_freezing_read,
},
{
.name = "parent_freezing",
.flags = CFTYPE_NOT_ON_ROOT,
.read_u64 = freezer_parent_freezing_read,
},
{ } /* terminate */
};
struct cgroup_subsys freezer_subsys = {
.name = "freezer",
.create = freezer_create,
.post_create = freezer_post_create,
.pre_destroy = freezer_pre_destroy,
.destroy = freezer_destroy,
.subsys_id = freezer_subsys_id,
.attach = freezer_attach,
.fork = freezer_fork,
.base_cftypes = files,
};