42288fe366
Sasha was fuzzing with trinity and reported the following problem: BUG: sleeping function called from invalid context at kernel/mutex.c:269 in_atomic(): 1, irqs_disabled(): 0, pid: 6361, name: trinity-main 2 locks held by trinity-main/6361: #0: (&mm->mmap_sem){++++++}, at: [<ffffffff810aa314>] __do_page_fault+0x1e4/0x4f0 #1: (&(&mm->page_table_lock)->rlock){+.+...}, at: [<ffffffff8122f017>] handle_pte_fault+0x3f7/0x6a0 Pid: 6361, comm: trinity-main Tainted: G W 3.7.0-rc2-next-20121024-sasha-00001-gd95ef01-dirty #74 Call Trace: __might_sleep+0x1c3/0x1e0 mutex_lock_nested+0x29/0x50 mpol_shared_policy_lookup+0x2e/0x90 shmem_get_policy+0x2e/0x30 get_vma_policy+0x5a/0xa0 mpol_misplaced+0x41/0x1d0 handle_pte_fault+0x465/0x6a0 This was triggered by a different version of automatic NUMA balancing but in theory the current version is vunerable to the same problem. do_numa_page -> numa_migrate_prep -> mpol_misplaced -> get_vma_policy -> shmem_get_policy It's very unlikely this will happen as shared pages are not marked pte_numa -- see the page_mapcount() check in change_pte_range() -- but it is possible. To address this, this patch restores sp->lock as originally implemented by Kosaki Motohiro. In the path where get_vma_policy() is called, it should not be calling sp_alloc() so it is not necessary to treat the PTL specially. Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Mel Gorman <mgorman@suse.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
317 lines
7.5 KiB
C
317 lines
7.5 KiB
C
/*
|
|
* NUMA memory policies for Linux.
|
|
* Copyright 2003,2004 Andi Kleen SuSE Labs
|
|
*/
|
|
#ifndef _LINUX_MEMPOLICY_H
|
|
#define _LINUX_MEMPOLICY_H 1
|
|
|
|
|
|
#include <linux/mmzone.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/rbtree.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/nodemask.h>
|
|
#include <linux/pagemap.h>
|
|
#include <uapi/linux/mempolicy.h>
|
|
|
|
struct mm_struct;
|
|
|
|
#ifdef CONFIG_NUMA
|
|
|
|
/*
|
|
* Describe a memory policy.
|
|
*
|
|
* A mempolicy can be either associated with a process or with a VMA.
|
|
* For VMA related allocations the VMA policy is preferred, otherwise
|
|
* the process policy is used. Interrupts ignore the memory policy
|
|
* of the current process.
|
|
*
|
|
* Locking policy for interlave:
|
|
* In process context there is no locking because only the process accesses
|
|
* its own state. All vma manipulation is somewhat protected by a down_read on
|
|
* mmap_sem.
|
|
*
|
|
* Freeing policy:
|
|
* Mempolicy objects are reference counted. A mempolicy will be freed when
|
|
* mpol_put() decrements the reference count to zero.
|
|
*
|
|
* Duplicating policy objects:
|
|
* mpol_dup() allocates a new mempolicy and copies the specified mempolicy
|
|
* to the new storage. The reference count of the new object is initialized
|
|
* to 1, representing the caller of mpol_dup().
|
|
*/
|
|
struct mempolicy {
|
|
atomic_t refcnt;
|
|
unsigned short mode; /* See MPOL_* above */
|
|
unsigned short flags; /* See set_mempolicy() MPOL_F_* above */
|
|
union {
|
|
short preferred_node; /* preferred */
|
|
nodemask_t nodes; /* interleave/bind */
|
|
/* undefined for default */
|
|
} v;
|
|
union {
|
|
nodemask_t cpuset_mems_allowed; /* relative to these nodes */
|
|
nodemask_t user_nodemask; /* nodemask passed by user */
|
|
} w;
|
|
};
|
|
|
|
/*
|
|
* Support for managing mempolicy data objects (clone, copy, destroy)
|
|
* The default fast path of a NULL MPOL_DEFAULT policy is always inlined.
|
|
*/
|
|
|
|
extern void __mpol_put(struct mempolicy *pol);
|
|
static inline void mpol_put(struct mempolicy *pol)
|
|
{
|
|
if (pol)
|
|
__mpol_put(pol);
|
|
}
|
|
|
|
/*
|
|
* Does mempolicy pol need explicit unref after use?
|
|
* Currently only needed for shared policies.
|
|
*/
|
|
static inline int mpol_needs_cond_ref(struct mempolicy *pol)
|
|
{
|
|
return (pol && (pol->flags & MPOL_F_SHARED));
|
|
}
|
|
|
|
static inline void mpol_cond_put(struct mempolicy *pol)
|
|
{
|
|
if (mpol_needs_cond_ref(pol))
|
|
__mpol_put(pol);
|
|
}
|
|
|
|
extern struct mempolicy *__mpol_dup(struct mempolicy *pol);
|
|
static inline struct mempolicy *mpol_dup(struct mempolicy *pol)
|
|
{
|
|
if (pol)
|
|
pol = __mpol_dup(pol);
|
|
return pol;
|
|
}
|
|
|
|
#define vma_policy(vma) ((vma)->vm_policy)
|
|
#define vma_set_policy(vma, pol) ((vma)->vm_policy = (pol))
|
|
|
|
static inline void mpol_get(struct mempolicy *pol)
|
|
{
|
|
if (pol)
|
|
atomic_inc(&pol->refcnt);
|
|
}
|
|
|
|
extern bool __mpol_equal(struct mempolicy *a, struct mempolicy *b);
|
|
static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
|
|
{
|
|
if (a == b)
|
|
return true;
|
|
return __mpol_equal(a, b);
|
|
}
|
|
|
|
/*
|
|
* Tree of shared policies for a shared memory region.
|
|
* Maintain the policies in a pseudo mm that contains vmas. The vmas
|
|
* carry the policy. As a special twist the pseudo mm is indexed in pages, not
|
|
* bytes, so that we can work with shared memory segments bigger than
|
|
* unsigned long.
|
|
*/
|
|
|
|
struct sp_node {
|
|
struct rb_node nd;
|
|
unsigned long start, end;
|
|
struct mempolicy *policy;
|
|
};
|
|
|
|
struct shared_policy {
|
|
struct rb_root root;
|
|
spinlock_t lock;
|
|
};
|
|
|
|
void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
|
|
int mpol_set_shared_policy(struct shared_policy *info,
|
|
struct vm_area_struct *vma,
|
|
struct mempolicy *new);
|
|
void mpol_free_shared_policy(struct shared_policy *p);
|
|
struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
|
|
unsigned long idx);
|
|
|
|
struct mempolicy *get_vma_policy(struct task_struct *tsk,
|
|
struct vm_area_struct *vma, unsigned long addr);
|
|
|
|
extern void numa_default_policy(void);
|
|
extern void numa_policy_init(void);
|
|
extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new,
|
|
enum mpol_rebind_step step);
|
|
extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
|
|
extern void mpol_fix_fork_child_flag(struct task_struct *p);
|
|
|
|
extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
|
|
unsigned long addr, gfp_t gfp_flags,
|
|
struct mempolicy **mpol, nodemask_t **nodemask);
|
|
extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
|
|
extern bool mempolicy_nodemask_intersects(struct task_struct *tsk,
|
|
const nodemask_t *mask);
|
|
extern unsigned slab_node(void);
|
|
|
|
extern enum zone_type policy_zone;
|
|
|
|
static inline void check_highest_zone(enum zone_type k)
|
|
{
|
|
if (k > policy_zone && k != ZONE_MOVABLE)
|
|
policy_zone = k;
|
|
}
|
|
|
|
int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
|
|
const nodemask_t *to, int flags);
|
|
|
|
|
|
#ifdef CONFIG_TMPFS
|
|
extern int mpol_parse_str(char *str, struct mempolicy **mpol);
|
|
#endif
|
|
|
|
extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
|
|
|
|
/* Check if a vma is migratable */
|
|
static inline int vma_migratable(struct vm_area_struct *vma)
|
|
{
|
|
if (vma->vm_flags & (VM_IO | VM_HUGETLB | VM_PFNMAP))
|
|
return 0;
|
|
/*
|
|
* Migration allocates pages in the highest zone. If we cannot
|
|
* do so then migration (at least from node to node) is not
|
|
* possible.
|
|
*/
|
|
if (vma->vm_file &&
|
|
gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping))
|
|
< policy_zone)
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
|
|
|
|
#else
|
|
|
|
struct mempolicy {};
|
|
|
|
static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
static inline void mpol_put(struct mempolicy *p)
|
|
{
|
|
}
|
|
|
|
static inline void mpol_cond_put(struct mempolicy *pol)
|
|
{
|
|
}
|
|
|
|
static inline void mpol_get(struct mempolicy *pol)
|
|
{
|
|
}
|
|
|
|
static inline struct mempolicy *mpol_dup(struct mempolicy *old)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
struct shared_policy {};
|
|
|
|
static inline int mpol_set_shared_policy(struct shared_policy *info,
|
|
struct vm_area_struct *vma,
|
|
struct mempolicy *new)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
static inline void mpol_shared_policy_init(struct shared_policy *sp,
|
|
struct mempolicy *mpol)
|
|
{
|
|
}
|
|
|
|
static inline void mpol_free_shared_policy(struct shared_policy *p)
|
|
{
|
|
}
|
|
|
|
static inline struct mempolicy *
|
|
mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
#define vma_policy(vma) NULL
|
|
#define vma_set_policy(vma, pol) do {} while(0)
|
|
|
|
static inline void numa_policy_init(void)
|
|
{
|
|
}
|
|
|
|
static inline void numa_default_policy(void)
|
|
{
|
|
}
|
|
|
|
static inline void mpol_rebind_task(struct task_struct *tsk,
|
|
const nodemask_t *new,
|
|
enum mpol_rebind_step step)
|
|
{
|
|
}
|
|
|
|
static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
|
|
{
|
|
}
|
|
|
|
static inline void mpol_fix_fork_child_flag(struct task_struct *p)
|
|
{
|
|
}
|
|
|
|
static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
|
|
unsigned long addr, gfp_t gfp_flags,
|
|
struct mempolicy **mpol, nodemask_t **nodemask)
|
|
{
|
|
*mpol = NULL;
|
|
*nodemask = NULL;
|
|
return node_zonelist(0, gfp_flags);
|
|
}
|
|
|
|
static inline bool init_nodemask_of_mempolicy(nodemask_t *m)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline bool mempolicy_nodemask_intersects(struct task_struct *tsk,
|
|
const nodemask_t *mask)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
|
|
const nodemask_t *to, int flags)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void check_highest_zone(int k)
|
|
{
|
|
}
|
|
|
|
#ifdef CONFIG_TMPFS
|
|
static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
|
|
{
|
|
return 1; /* error */
|
|
}
|
|
#endif
|
|
|
|
static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
|
|
unsigned long address)
|
|
{
|
|
return -1; /* no node preference */
|
|
}
|
|
|
|
#endif /* CONFIG_NUMA */
|
|
#endif
|