kernel-ark/mm/internal.h
Nick Piggin b291f00039 mlock: mlocked pages are unevictable
Make sure that mlocked pages also live on the unevictable LRU, so kswapd
will not scan them over and over again.

This is achieved through various strategies:

1) add yet another page flag--PG_mlocked--to indicate that
   the page is locked for efficient testing in vmscan and,
   optionally, fault path.  This allows early culling of
   unevictable pages, preventing them from getting to
   page_referenced()/try_to_unmap().  Also allows separate
   accounting of mlock'd pages, as Nick's original patch
   did.

   Note:  Nick's original mlock patch used a PG_mlocked
   flag.  I had removed this in favor of the PG_unevictable
   flag + an mlock_count [new page struct member].  I
   restored the PG_mlocked flag to eliminate the new
   count field.

2) add the mlock/unevictable infrastructure to mm/mlock.c,
   with internal APIs in mm/internal.h.  This is a rework
   of Nick's original patch to these files, taking into
   account that mlocked pages are now kept on unevictable
   LRU list.

3) update vmscan.c:page_evictable() to check PageMlocked()
   and, if vma passed in, the vm_flags.  Note that the vma
   will only be passed in for new pages in the fault path;
   and then only if the "cull unevictable pages in fault
   path" patch is included.

4) add try_to_unlock() to rmap.c to walk a page's rmap and
   ClearPageMlocked() if no other vmas have it mlocked.
   Reuses as much of try_to_unmap() as possible.  This
   effectively replaces the use of one of the lru list links
   as an mlock count.  If this mechanism let's pages in mlocked
   vmas leak through w/o PG_mlocked set [I don't know that it
   does], we should catch them later in try_to_unmap().  One
   hopes this will be rare, as it will be relatively expensive.

Original mm/internal.h, mm/rmap.c and mm/mlock.c changes:
Signed-off-by: Nick Piggin <npiggin@suse.de>

splitlru: introduce __get_user_pages():

  New munlock processing need to GUP_FLAGS_IGNORE_VMA_PERMISSIONS.
  because current get_user_pages() can't grab PROT_NONE pages theresore it
  cause PROT_NONE pages can't munlock.

[akpm@linux-foundation.org: fix this for pagemap-pass-mm-into-pagewalkers.patch]
[akpm@linux-foundation.org: untangle patch interdependencies]
[akpm@linux-foundation.org: fix things after out-of-order merging]
[hugh@veritas.com: fix page-flags mess]
[lee.schermerhorn@hp.com: fix munlock page table walk - now requires 'mm']
[kosaki.motohiro@jp.fujitsu.com: build fix]
[kosaki.motohiro@jp.fujitsu.com: fix truncate race and sevaral comments]
[kosaki.motohiro@jp.fujitsu.com: splitlru: introduce __get_user_pages()]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Matt Mackall <mpm@selenic.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-10-20 08:52:30 -07:00

223 lines
5.9 KiB
C

/* internal.h: mm/ internal definitions
*
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef __MM_INTERNAL_H
#define __MM_INTERNAL_H
#include <linux/mm.h>
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
unsigned long floor, unsigned long ceiling);
extern void prep_compound_page(struct page *page, unsigned long order);
static inline void set_page_count(struct page *page, int v)
{
atomic_set(&page->_count, v);
}
/*
* Turn a non-refcounted page (->_count == 0) into refcounted with
* a count of one.
*/
static inline void set_page_refcounted(struct page *page)
{
VM_BUG_ON(PageTail(page));
VM_BUG_ON(atomic_read(&page->_count));
set_page_count(page, 1);
}
static inline void __put_page(struct page *page)
{
atomic_dec(&page->_count);
}
/*
* in mm/vmscan.c:
*/
extern int isolate_lru_page(struct page *page);
extern void putback_lru_page(struct page *page);
/*
* in mm/page_alloc.c
*/
extern void __free_pages_bootmem(struct page *page, unsigned int order);
/*
* function for dealing with page's order in buddy system.
* zone->lock is already acquired when we use these.
* So, we don't need atomic page->flags operations here.
*/
static inline unsigned long page_order(struct page *page)
{
VM_BUG_ON(!PageBuddy(page));
return page_private(page);
}
extern int mlock_vma_pages_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
extern void munlock_vma_pages_all(struct vm_area_struct *vma);
#ifdef CONFIG_UNEVICTABLE_LRU
/*
* unevictable_migrate_page() called only from migrate_page_copy() to
* migrate unevictable flag to new page.
* Note that the old page has been isolated from the LRU lists at this
* point so we don't need to worry about LRU statistics.
*/
static inline void unevictable_migrate_page(struct page *new, struct page *old)
{
if (TestClearPageUnevictable(old))
SetPageUnevictable(new);
}
#else
static inline void unevictable_migrate_page(struct page *new, struct page *old)
{
}
#endif
#ifdef CONFIG_UNEVICTABLE_LRU
/*
* Called only in fault path via page_evictable() for a new page
* to determine if it's being mapped into a LOCKED vma.
* If so, mark page as mlocked.
*/
static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page)
{
VM_BUG_ON(PageLRU(page));
if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
return 0;
SetPageMlocked(page);
return 1;
}
/*
* must be called with vma's mmap_sem held for read, and page locked.
*/
extern void mlock_vma_page(struct page *page);
/*
* Clear the page's PageMlocked(). This can be useful in a situation where
* we want to unconditionally remove a page from the pagecache -- e.g.,
* on truncation or freeing.
*
* It is legal to call this function for any page, mlocked or not.
* If called for a page that is still mapped by mlocked vmas, all we do
* is revert to lazy LRU behaviour -- semantics are not broken.
*/
extern void __clear_page_mlock(struct page *page);
static inline void clear_page_mlock(struct page *page)
{
if (unlikely(TestClearPageMlocked(page)))
__clear_page_mlock(page);
}
/*
* mlock_migrate_page - called only from migrate_page_copy() to
* migrate the Mlocked page flag
*/
static inline void mlock_migrate_page(struct page *newpage, struct page *page)
{
if (TestClearPageMlocked(page))
SetPageMlocked(newpage);
}
#else /* CONFIG_UNEVICTABLE_LRU */
static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p)
{
return 0;
}
static inline void clear_page_mlock(struct page *page) { }
static inline void mlock_vma_page(struct page *page) { }
static inline void mlock_migrate_page(struct page *new, struct page *old) { }
#endif /* CONFIG_UNEVICTABLE_LRU */
/*
* FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node,
* so all functions starting at paging_init should be marked __init
* in those cases. SPARSEMEM, however, allows for memory hotplug,
* and alloc_bootmem_node is not used.
*/
#ifdef CONFIG_SPARSEMEM
#define __paginginit __meminit
#else
#define __paginginit __init
#endif
/* Memory initialisation debug and verification */
enum mminit_level {
MMINIT_WARNING,
MMINIT_VERIFY,
MMINIT_TRACE
};
#ifdef CONFIG_DEBUG_MEMORY_INIT
extern int mminit_loglevel;
#define mminit_dprintk(level, prefix, fmt, arg...) \
do { \
if (level < mminit_loglevel) { \
printk(level <= MMINIT_WARNING ? KERN_WARNING : KERN_DEBUG); \
printk(KERN_CONT "mminit::" prefix " " fmt, ##arg); \
} \
} while (0)
extern void mminit_verify_pageflags_layout(void);
extern void mminit_verify_page_links(struct page *page,
enum zone_type zone, unsigned long nid, unsigned long pfn);
extern void mminit_verify_zonelist(void);
#else
static inline void mminit_dprintk(enum mminit_level level,
const char *prefix, const char *fmt, ...)
{
}
static inline void mminit_verify_pageflags_layout(void)
{
}
static inline void mminit_verify_page_links(struct page *page,
enum zone_type zone, unsigned long nid, unsigned long pfn)
{
}
static inline void mminit_verify_zonelist(void)
{
}
#endif /* CONFIG_DEBUG_MEMORY_INIT */
/* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */
#if defined(CONFIG_SPARSEMEM)
extern void mminit_validate_memmodel_limits(unsigned long *start_pfn,
unsigned long *end_pfn);
#else
static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
unsigned long *end_pfn)
{
}
#endif /* CONFIG_SPARSEMEM */
#define GUP_FLAGS_WRITE 0x1
#define GUP_FLAGS_FORCE 0x2
#define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int len, int flags,
struct page **pages, struct vm_area_struct **vmas);
#endif