38460b48d0
Patch for Per-CSS(Cgroup Subsys State) ID and private hierarchy code. This patch attaches unique ID to each css and provides following. - css_lookup(subsys, id) returns pointer to struct cgroup_subysys_state of id. - css_get_next(subsys, id, rootid, depth, foundid) returns the next css under "root" by scanning When cgroup_subsys->use_id is set, an id for css is maintained. The cgroup framework only parepares - css_id of root css for subsys - id is automatically attached at creation of css. - id is *not* freed automatically. Because the cgroup framework don't know lifetime of cgroup_subsys_state. free_css_id() function is provided. This must be called by subsys. There are several reasons to develop this. - Saving space .... For example, memcg's swap_cgroup is array of pointers to cgroup. But it is not necessary to be very fast. By replacing pointers(8bytes per ent) to ID (2byes per ent), we can reduce much amount of memory usage. - Scanning without lock. CSS_ID provides "scan id under this ROOT" function. By this, scanning css under root can be written without locks. ex) do { rcu_read_lock(); next = cgroup_get_next(subsys, id, root, &found); /* check sanity of next here */ css_tryget(); rcu_read_unlock(); id = found + 1 } while(...) Characteristics: - Each css has unique ID under subsys. - Lifetime of ID is controlled by subsys. - css ID contains "ID" and "Depth in hierarchy" and stack of hierarchy - Allowed ID is 1-65535, ID 0 is UNUSED ID. Design Choices: - scan-by-ID v.s. scan-by-tree-walk. As /proc's pid scan does, scan-by-ID is robust when scanning is done by following kind of routine. scan -> rest a while(release a lock) -> conitunue from interrupted memcg's hierarchical reclaim does this. - When subsys->use_id is set, # of css in the system is limited to 65535. [bharata@linux.vnet.ibm.com: remove rcu_read_lock() from css_get_next()] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Paul Menage <menage@google.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
148 lines
4.4 KiB
C
148 lines
4.4 KiB
C
/*
|
|
* include/linux/idr.h
|
|
*
|
|
* 2002-10-18 written by Jim Houston jim.houston@ccur.com
|
|
* Copyright (C) 2002 by Concurrent Computer Corporation
|
|
* Distributed under the GNU GPL license version 2.
|
|
*
|
|
* Small id to pointer translation service avoiding fixed sized
|
|
* tables.
|
|
*/
|
|
|
|
#ifndef __IDR_H__
|
|
#define __IDR_H__
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/bitops.h>
|
|
#include <linux/init.h>
|
|
#include <linux/rcupdate.h>
|
|
|
|
#if BITS_PER_LONG == 32
|
|
# define IDR_BITS 5
|
|
# define IDR_FULL 0xfffffffful
|
|
/* We can only use two of the bits in the top level because there is
|
|
only one possible bit in the top level (5 bits * 7 levels = 35
|
|
bits, but you only use 31 bits in the id). */
|
|
# define TOP_LEVEL_FULL (IDR_FULL >> 30)
|
|
#elif BITS_PER_LONG == 64
|
|
# define IDR_BITS 6
|
|
# define IDR_FULL 0xfffffffffffffffful
|
|
/* We can only use two of the bits in the top level because there is
|
|
only one possible bit in the top level (6 bits * 6 levels = 36
|
|
bits, but you only use 31 bits in the id). */
|
|
# define TOP_LEVEL_FULL (IDR_FULL >> 62)
|
|
#else
|
|
# error "BITS_PER_LONG is not 32 or 64"
|
|
#endif
|
|
|
|
#define IDR_SIZE (1 << IDR_BITS)
|
|
#define IDR_MASK ((1 << IDR_BITS)-1)
|
|
|
|
#define MAX_ID_SHIFT (sizeof(int)*8 - 1)
|
|
#define MAX_ID_BIT (1U << MAX_ID_SHIFT)
|
|
#define MAX_ID_MASK (MAX_ID_BIT - 1)
|
|
|
|
/* Leave the possibility of an incomplete final layer */
|
|
#define MAX_LEVEL (MAX_ID_SHIFT + IDR_BITS - 1) / IDR_BITS
|
|
|
|
/* Number of id_layer structs to leave in free list */
|
|
#define IDR_FREE_MAX MAX_LEVEL + MAX_LEVEL
|
|
|
|
struct idr_layer {
|
|
unsigned long bitmap; /* A zero bit means "space here" */
|
|
struct idr_layer *ary[1<<IDR_BITS];
|
|
int count; /* When zero, we can release it */
|
|
int layer; /* distance from leaf */
|
|
struct rcu_head rcu_head;
|
|
};
|
|
|
|
struct idr {
|
|
struct idr_layer *top;
|
|
struct idr_layer *id_free;
|
|
int layers; /* only valid without concurrent changes */
|
|
int id_free_cnt;
|
|
spinlock_t lock;
|
|
};
|
|
|
|
#define IDR_INIT(name) \
|
|
{ \
|
|
.top = NULL, \
|
|
.id_free = NULL, \
|
|
.layers = 0, \
|
|
.id_free_cnt = 0, \
|
|
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
|
|
}
|
|
#define DEFINE_IDR(name) struct idr name = IDR_INIT(name)
|
|
|
|
/* Actions to be taken after a call to _idr_sub_alloc */
|
|
#define IDR_NEED_TO_GROW -2
|
|
#define IDR_NOMORE_SPACE -3
|
|
|
|
#define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC)
|
|
|
|
/**
|
|
* idr synchronization (stolen from radix-tree.h)
|
|
*
|
|
* idr_find() is able to be called locklessly, using RCU. The caller must
|
|
* ensure calls to this function are made within rcu_read_lock() regions.
|
|
* Other readers (lock-free or otherwise) and modifications may be running
|
|
* concurrently.
|
|
*
|
|
* It is still required that the caller manage the synchronization and
|
|
* lifetimes of the items. So if RCU lock-free lookups are used, typically
|
|
* this would mean that the items have their own locks, or are amenable to
|
|
* lock-free access; and that the items are freed by RCU (or only freed after
|
|
* having been deleted from the idr tree *and* a synchronize_rcu() grace
|
|
* period).
|
|
*/
|
|
|
|
/*
|
|
* This is what we export.
|
|
*/
|
|
|
|
void *idr_find(struct idr *idp, int id);
|
|
int idr_pre_get(struct idr *idp, gfp_t gfp_mask);
|
|
int idr_get_new(struct idr *idp, void *ptr, int *id);
|
|
int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id);
|
|
int idr_for_each(struct idr *idp,
|
|
int (*fn)(int id, void *p, void *data), void *data);
|
|
void *idr_get_next(struct idr *idp, int *nextid);
|
|
void *idr_replace(struct idr *idp, void *ptr, int id);
|
|
void idr_remove(struct idr *idp, int id);
|
|
void idr_remove_all(struct idr *idp);
|
|
void idr_destroy(struct idr *idp);
|
|
void idr_init(struct idr *idp);
|
|
|
|
|
|
/*
|
|
* IDA - IDR based id allocator, use when translation from id to
|
|
* pointer isn't necessary.
|
|
*/
|
|
#define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */
|
|
#define IDA_BITMAP_LONGS (128 / sizeof(long) - 1)
|
|
#define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8)
|
|
|
|
struct ida_bitmap {
|
|
long nr_busy;
|
|
unsigned long bitmap[IDA_BITMAP_LONGS];
|
|
};
|
|
|
|
struct ida {
|
|
struct idr idr;
|
|
struct ida_bitmap *free_bitmap;
|
|
};
|
|
|
|
#define IDA_INIT(name) { .idr = IDR_INIT(name), .free_bitmap = NULL, }
|
|
#define DEFINE_IDA(name) struct ida name = IDA_INIT(name)
|
|
|
|
int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
|
|
int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
|
|
int ida_get_new(struct ida *ida, int *p_id);
|
|
void ida_remove(struct ida *ida, int id);
|
|
void ida_destroy(struct ida *ida);
|
|
void ida_init(struct ida *ida);
|
|
|
|
void __init idr_init_cache(void);
|
|
|
|
#endif /* __IDR_H__ */
|