kernel-ark/include/linux/memcontrol.h
KAMEZAWA Hiroyuki ae41be3742 bugfix for memory cgroup controller: migration under memory controller fix
While using memory control cgroup, page-migration under it works as following.
==
 1. uncharge all refs at try to unmap.
 2. charge regs again remove_migration_ptes()
==
This is simple but has following problems.
==
 The page is uncharged and charged back again if *mapped*.
    - This means that cgroup before migration can be different from one after
      migration
    - If page is not mapped but charged as page cache, charge is just ignored
      (because not mapped, it will not be uncharged before migration)
      This is memory leak.
==
This patch tries to keep memory cgroup at page migration by increasing
one refcnt during it. 3 functions are added.

 mem_cgroup_prepare_migration() --- increase refcnt of page->page_cgroup
 mem_cgroup_end_migration()     --- decrease refcnt of page->page_cgroup
 mem_cgroup_page_migration() --- copy page->page_cgroup from old page to
                                 new page.

During migration
  - old page is under PG_locked.
  - new page is under PG_locked, too.
  - both old page and new page is not on LRU.

These 3 facts guarantee that page_cgroup() migration has no race.

Tested and worked well in x86_64/fake-NUMA box.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-07 08:42:19 -08:00

143 lines
3.6 KiB
C

/* memcontrol.h - Memory Controller
*
* Copyright IBM Corporation, 2007
* Author Balbir Singh <balbir@linux.vnet.ibm.com>
*
* Copyright 2007 OpenVZ SWsoft Inc
* Author: Pavel Emelianov <xemul@openvz.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef _LINUX_MEMCONTROL_H
#define _LINUX_MEMCONTROL_H
#include <linux/rcupdate.h>
#include <linux/mm.h>
struct mem_cgroup;
struct page_cgroup;
struct page;
struct mm_struct;
#ifdef CONFIG_CGROUP_MEM_CONT
extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p);
extern void mm_free_cgroup(struct mm_struct *mm);
extern void page_assign_page_cgroup(struct page *page,
struct page_cgroup *pc);
extern struct page_cgroup *page_get_page_cgroup(struct page *page);
extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
extern void mem_cgroup_uncharge(struct page_cgroup *pc);
extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active);
extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
struct list_head *dst,
unsigned long *scanned, int order,
int mode, struct zone *z,
struct mem_cgroup *mem_cont,
int active);
extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
static inline struct mem_cgroup *mm_cgroup(const struct mm_struct *mm)
{
return rcu_dereference(mm->mem_cgroup);
}
static inline void mem_cgroup_uncharge_page(struct page *page)
{
mem_cgroup_uncharge(page_get_page_cgroup(page));
}
extern int mem_cgroup_prepare_migration(struct page *page);
extern void mem_cgroup_end_migration(struct page *page);
extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
#else /* CONFIG_CGROUP_MEM_CONT */
static inline void mm_init_cgroup(struct mm_struct *mm,
struct task_struct *p)
{
}
static inline void mm_free_cgroup(struct mm_struct *mm)
{
}
static inline void page_assign_page_cgroup(struct page *page,
struct page_cgroup *pc)
{
}
static inline struct page_cgroup *page_get_page_cgroup(struct page *page)
{
return NULL;
}
static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
{
return 0;
}
static inline void mem_cgroup_uncharge(struct page_cgroup *pc)
{
}
static inline void mem_cgroup_uncharge_page(struct page *page)
{
}
static inline void mem_cgroup_move_lists(struct page_cgroup *pc,
bool active)
{
}
static inline int mem_cgroup_cache_charge(struct page *page,
struct mm_struct *mm,
gfp_t gfp_mask)
{
return 0;
}
static inline struct mem_cgroup *mm_cgroup(const struct mm_struct *mm)
{
return NULL;
}
static inline int task_in_mem_cgroup(struct task_struct *task,
const struct mem_cgroup *mem)
{
return 1;
}
static inline int mem_cgroup_prepare_migration(struct page *page)
{
return 0;
}
static inline void mem_cgroup_end_migration(struct page *page)
{
}
static inline void
mem_cgroup_page_migration(struct page *page, struct page *newpage)
{
}
#endif /* CONFIG_CGROUP_MEM_CONT */
#endif /* _LINUX_MEMCONTROL_H */