Merge branch 'akpm' (patches from Andrew)

Mergr misc fixes from Andrew Morton:
 "28 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (28 commits)
  fs/hugetlbfs/inode.c: change put_page/unlock_page order in hugetlbfs_fallocate()
  mm/hugetlb: fix NULL-pointer dereference on 5-level paging machine
  autofs: revert "autofs: fix AT_NO_AUTOMOUNT not being honored"
  autofs: revert "autofs: take more care to not update last_used on path walk"
  fs/fat/inode.c: fix sb_rdonly() change
  mm, memcg: fix mem_cgroup_swapout() for THPs
  mm: migrate: fix an incorrect call of prep_transhuge_page()
  kmemleak: add scheduling point to kmemleak_scan()
  scripts/bloat-o-meter: don't fail with division by 0
  fs/mbcache.c: make count_objects() more robust
  Revert "mm/page-writeback.c: print a warning if the vm dirtiness settings are illogical"
  mm/madvise.c: fix madvise() infinite loop under special circumstances
  exec: avoid RLIMIT_STACK races with prlimit()
  IB/core: disable memory registration of filesystem-dax vmas
  v4l2: disable filesystem-dax mapping support
  mm: fail get_vaddr_frames() for filesystem-dax mappings
  mm: introduce get_user_pages_longterm
  device-dax: implement ->split() to catch invalid munmap attempts
  mm, hugetlbfs: introduce ->split() to vm_operations_struct
  scripts/faddr2line: extend usage on generic arch
  ...
This commit is contained in:
Linus Torvalds 2017-11-29 19:12:44 -08:00
commit a0908a1b7d
40 changed files with 238 additions and 101 deletions

View File

@ -158,10 +158,6 @@ Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any
value lower than this limit will be ignored and the old configuration will be value lower than this limit will be ignored and the old configuration will be
retained. retained.
Note: the value of dirty_bytes also must be set greater than
dirty_background_bytes or the amount of memory corresponding to
dirty_background_ratio.
============================================================== ==============================================================
dirty_expire_centisecs dirty_expire_centisecs
@ -181,9 +177,6 @@ generating disk writes will itself start writing out dirty data.
The total available memory is not equal to total system memory. The total available memory is not equal to total system memory.
Note: dirty_ratio must be set greater than dirty_background_ratio or
ratio corresponding to dirty_background_bytes.
============================================================== ==============================================================
dirty_writeback_centisecs dirty_writeback_centisecs

View File

@ -221,7 +221,6 @@ static inline pte_t pte_mkspecial(pte_t pte)
} }
#define __HAVE_ARCH_PTE_SPECIAL #define __HAVE_ARCH_PTE_SPECIAL
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY)) #define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY))
#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) #define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY))
#define pud_page(pud) pmd_page(__pmd(pud_val(pud))) #define pud_page(pud) pmd_page(__pmd(pud_val(pud)))

View File

@ -345,7 +345,6 @@ static inline int pmd_protnone(pmd_t pmd)
#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) pte_write(pmd_pte(pmd)) #define pmd_write(pmd) pte_write(pmd_pte(pmd))
#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))

View File

@ -552,7 +552,7 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd); pmd_t *pmdp, pmd_t pmd);
#define __HAVE_ARCH_PMD_WRITE #define pmd_write pmd_write
static inline int pmd_write(pmd_t pmd) static inline int pmd_write(pmd_t pmd)
{ {
return !!(pmd_val(pmd) & _PAGE_WRITE); return !!(pmd_val(pmd) & _PAGE_WRITE);

View File

@ -1005,7 +1005,6 @@ static inline int pmd_protnone(pmd_t pmd)
} }
#endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_NUMA_BALANCING */
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) pte_write(pmd_pte(pmd)) #define pmd_write(pmd) pte_write(pmd_pte(pmd))
#define __pmd_write(pmd) __pte_write(pmd_pte(pmd)) #define __pmd_write(pmd) __pte_write(pmd_pte(pmd))
#define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd)) #define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd))

View File

@ -709,7 +709,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT; return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
} }
#define __HAVE_ARCH_PMD_WRITE #define pmd_write pmd_write
static inline int pmd_write(pmd_t pmd) static inline int pmd_write(pmd_t pmd)
{ {
return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0; return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0;
@ -1264,6 +1264,12 @@ static inline pud_t pud_mkwrite(pud_t pud)
return pud; return pud;
} }
#define pud_write pud_write
static inline int pud_write(pud_t pud)
{
return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
}
static inline pud_t pud_mkclean(pud_t pud) static inline pud_t pud_mkclean(pud_t pud)
{ {
if (pud_large(pud)) { if (pud_large(pud)) {

View File

@ -715,7 +715,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
return pte_pfn(pte); return pte_pfn(pte);
} }
#define __HAVE_ARCH_PMD_WRITE #define pmd_write pmd_write
static inline unsigned long pmd_write(pmd_t pmd) static inline unsigned long pmd_write(pmd_t pmd)
{ {
pte_t pte = __pte(pmd_val(pmd)); pte_t pte = __pte(pmd_val(pmd));

View File

@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
if (!(pmd_val(pmd) & _PAGE_VALID)) if (!(pmd_val(pmd) & _PAGE_VALID))
return 0; return 0;
if (write && !pmd_write(pmd)) if (!pmd_access_permitted(pmd, write))
return 0; return 0;
refs = 0; refs = 0;
@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
if (!(pud_val(pud) & _PAGE_VALID)) if (!(pud_val(pud) & _PAGE_VALID))
return 0; return 0;
if (write && !pud_write(pud)) if (!pud_access_permitted(pud, write))
return 0; return 0;
refs = 0; refs = 0;

View File

@ -475,7 +475,6 @@ static inline void pmd_clear(pmd_t *pmdp)
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_huge_page(pmd) pte_huge(pmd_pte(pmd)) #define pmd_huge_page(pmd) pte_huge(pmd_pte(pmd))
#define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd))) #define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd)))
#define __HAVE_ARCH_PMD_WRITE
#define pfn_pmd(pfn, pgprot) pte_pmd(pfn_pte((pfn), (pgprot))) #define pfn_pmd(pfn, pgprot) pte_pmd(pfn_pte((pfn), (pgprot)))
#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) #define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))

View File

@ -1061,7 +1061,7 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp); unsigned long address, pmd_t *pmdp);
#define __HAVE_ARCH_PMD_WRITE #define pmd_write pmd_write
static inline int pmd_write(pmd_t pmd) static inline int pmd_write(pmd_t pmd)
{ {
return pmd_flags(pmd) & _PAGE_RW; return pmd_flags(pmd) & _PAGE_RW;
@ -1088,6 +1088,12 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp); clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
} }
#define pud_write pud_write
static inline int pud_write(pud_t pud)
{
return pud_flags(pud) & _PAGE_RW;
}
/* /*
* clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
* *

View File

@ -428,9 +428,21 @@ static int dev_dax_fault(struct vm_fault *vmf)
return dev_dax_huge_fault(vmf, PE_SIZE_PTE); return dev_dax_huge_fault(vmf, PE_SIZE_PTE);
} }
static int dev_dax_split(struct vm_area_struct *vma, unsigned long addr)
{
struct file *filp = vma->vm_file;
struct dev_dax *dev_dax = filp->private_data;
struct dax_region *dax_region = dev_dax->region;
if (!IS_ALIGNED(addr, dax_region->align))
return -EINVAL;
return 0;
}
static const struct vm_operations_struct dax_vm_ops = { static const struct vm_operations_struct dax_vm_ops = {
.fault = dev_dax_fault, .fault = dev_dax_fault,
.huge_fault = dev_dax_huge_fault, .huge_fault = dev_dax_huge_fault,
.split = dev_dax_split,
}; };
static int dax_mmap(struct file *filp, struct vm_area_struct *vma) static int dax_mmap(struct file *filp, struct vm_area_struct *vma)

View File

@ -191,7 +191,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
sg_list_start = umem->sg_head.sgl; sg_list_start = umem->sg_head.sgl;
while (npages) { while (npages) {
ret = get_user_pages(cur_base, ret = get_user_pages_longterm(cur_base,
min_t(unsigned long, npages, min_t(unsigned long, npages,
PAGE_SIZE / sizeof (struct page *)), PAGE_SIZE / sizeof (struct page *)),
gup_flags, page_list, vma_list); gup_flags, page_list, vma_list);

View File

@ -185,12 +185,13 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n", dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n",
data, size, dma->nr_pages); data, size, dma->nr_pages);
err = get_user_pages(data & PAGE_MASK, dma->nr_pages, err = get_user_pages_longterm(data & PAGE_MASK, dma->nr_pages,
flags, dma->pages, NULL); flags, dma->pages, NULL);
if (err != dma->nr_pages) { if (err != dma->nr_pages) {
dma->nr_pages = (err >= 0) ? err : 0; dma->nr_pages = (err >= 0) ? err : 0;
dprintk(1, "get_user_pages: err=%d [%d]\n", err, dma->nr_pages); dprintk(1, "get_user_pages_longterm: err=%d [%d]\n", err,
dma->nr_pages);
return err < 0 ? err : -EINVAL; return err < 0 ? err : -EINVAL;
} }
return 0; return 0;

View File

@ -281,8 +281,8 @@ static int autofs4_mount_wait(const struct path *path, bool rcu_walk)
pr_debug("waiting for mount name=%pd\n", path->dentry); pr_debug("waiting for mount name=%pd\n", path->dentry);
status = autofs4_wait(sbi, path, NFY_MOUNT); status = autofs4_wait(sbi, path, NFY_MOUNT);
pr_debug("mount wait done status=%d\n", status); pr_debug("mount wait done status=%d\n", status);
ino->last_used = jiffies;
} }
ino->last_used = jiffies;
return status; return status;
} }
@ -321,21 +321,16 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
*/ */
if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
struct dentry *parent = dentry->d_parent; struct dentry *parent = dentry->d_parent;
struct autofs_info *ino;
struct dentry *new; struct dentry *new;
new = d_lookup(parent, &dentry->d_name); new = d_lookup(parent, &dentry->d_name);
if (!new) if (!new)
return NULL; return NULL;
if (new == dentry) ino = autofs4_dentry_ino(new);
dput(new); ino->last_used = jiffies;
else { dput(path->dentry);
struct autofs_info *ino; path->dentry = new;
ino = autofs4_dentry_ino(new);
ino->last_used = jiffies;
dput(path->dentry);
path->dentry = new;
}
} }
return path->dentry; return path->dentry;
} }

View File

@ -627,7 +627,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
if (pfn != pmd_pfn(*pmdp)) if (pfn != pmd_pfn(*pmdp))
goto unlock_pmd; goto unlock_pmd;
if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp)) if (!pmd_dirty(*pmdp)
&& !pmd_access_permitted(*pmdp, WRITE))
goto unlock_pmd; goto unlock_pmd;
flush_cache_page(vma, address, pfn); flush_cache_page(vma, address, pfn);

View File

@ -1340,10 +1340,15 @@ void setup_new_exec(struct linux_binprm * bprm)
* avoid bad behavior from the prior rlimits. This has to * avoid bad behavior from the prior rlimits. This has to
* happen before arch_pick_mmap_layout(), which examines * happen before arch_pick_mmap_layout(), which examines
* RLIMIT_STACK, but after the point of no return to avoid * RLIMIT_STACK, but after the point of no return to avoid
* needing to clean up the change on failure. * races from other threads changing the limits. This also
* must be protected from races with prlimit() calls.
*/ */
task_lock(current->group_leader);
if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM) if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM; current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
if (current->signal->rlim[RLIMIT_STACK].rlim_max > _STK_LIM)
current->signal->rlim[RLIMIT_STACK].rlim_max = _STK_LIM;
task_unlock(current->group_leader);
} }
arch_pick_mmap_layout(current->mm); arch_pick_mmap_layout(current->mm);

View File

@ -779,7 +779,7 @@ static void __exit fat_destroy_inodecache(void)
static int fat_remount(struct super_block *sb, int *flags, char *data) static int fat_remount(struct super_block *sb, int *flags, char *data)
{ {
int new_rdonly; bool new_rdonly;
struct msdos_sb_info *sbi = MSDOS_SB(sb); struct msdos_sb_info *sbi = MSDOS_SB(sb);
*flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME); *flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME);

View File

@ -639,11 +639,11 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
mutex_unlock(&hugetlb_fault_mutex_table[hash]); mutex_unlock(&hugetlb_fault_mutex_table[hash]);
/* /*
* page_put due to reference from alloc_huge_page()
* unlock_page because locked by add_to_page_cache() * unlock_page because locked by add_to_page_cache()
* page_put due to reference from alloc_huge_page()
*/ */
put_page(page);
unlock_page(page); unlock_page(page);
put_page(page);
} }
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)

View File

@ -269,6 +269,9 @@ static unsigned long mb_cache_count(struct shrinker *shrink,
struct mb_cache *cache = container_of(shrink, struct mb_cache, struct mb_cache *cache = container_of(shrink, struct mb_cache,
c_shrink); c_shrink);
/* Unlikely, but not impossible */
if (unlikely(cache->c_entry_count < 0))
return 0;
return cache->c_entry_count; return cache->c_entry_count;
} }

View File

@ -1129,18 +1129,9 @@ static int follow_automount(struct path *path, struct nameidata *nd,
* of the daemon to instantiate them before they can be used. * of the daemon to instantiate them before they can be used.
*/ */
if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
LOOKUP_AUTOMOUNT))) { path->dentry->d_inode)
/* Positive dentry that isn't meant to trigger an return -EISDIR;
* automount, EISDIR will allow it to be used,
* otherwise there's no mount here "now" so return
* ENOENT.
*/
if (path->dentry->d_inode)
return -EISDIR;
else
return -ENOENT;
}
if (path->dentry->d_sb->s_user_ns != &init_user_ns) if (path->dentry->d_sb->s_user_ns != &init_user_ns)
return -EACCES; return -EACCES;

View File

@ -805,15 +805,23 @@ static inline int pmd_trans_huge(pmd_t pmd)
{ {
return 0; return 0;
} }
#ifndef __HAVE_ARCH_PMD_WRITE #ifndef pmd_write
static inline int pmd_write(pmd_t pmd) static inline int pmd_write(pmd_t pmd)
{ {
BUG(); BUG();
return 0; return 0;
} }
#endif /* __HAVE_ARCH_PMD_WRITE */ #endif /* pmd_write */
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#ifndef pud_write
static inline int pud_write(pud_t pud)
{
BUG();
return 0;
}
#endif /* pud_write */
#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \ #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
(defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
!defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))

View File

@ -3088,7 +3088,8 @@ static inline int vfs_lstat(const char __user *name, struct kstat *stat)
static inline int vfs_fstatat(int dfd, const char __user *filename, static inline int vfs_fstatat(int dfd, const char __user *filename,
struct kstat *stat, int flags) struct kstat *stat, int flags)
{ {
return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS); return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT,
stat, STATX_BASIC_STATS);
} }
static inline int vfs_fstat(int fd, struct kstat *stat) static inline int vfs_fstat(int fd, struct kstat *stat)
{ {
@ -3194,6 +3195,20 @@ static inline bool vma_is_dax(struct vm_area_struct *vma)
return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
} }
static inline bool vma_is_fsdax(struct vm_area_struct *vma)
{
struct inode *inode;
if (!vma->vm_file)
return false;
if (!vma_is_dax(vma))
return false;
inode = file_inode(vma->vm_file);
if (inode->i_mode == S_IFCHR)
return false; /* device-dax */
return true;
}
static inline int iocb_flags(struct file *file) static inline int iocb_flags(struct file *file)
{ {
int res = 0; int res = 0;

View File

@ -239,14 +239,6 @@ static inline int pgd_write(pgd_t pgd)
} }
#endif #endif
#ifndef pud_write
static inline int pud_write(pud_t pud)
{
BUG();
return 0;
}
#endif
#define HUGETLB_ANON_FILE "anon_hugepage" #define HUGETLB_ANON_FILE "anon_hugepage"
enum { enum {

View File

@ -54,7 +54,7 @@ static inline struct page *new_page_nodemask(struct page *page,
new_page = __alloc_pages_nodemask(gfp_mask, order, new_page = __alloc_pages_nodemask(gfp_mask, order,
preferred_nid, nodemask); preferred_nid, nodemask);
if (new_page && PageTransHuge(page)) if (new_page && PageTransHuge(new_page))
prep_transhuge_page(new_page); prep_transhuge_page(new_page);
return new_page; return new_page;

View File

@ -377,6 +377,7 @@ enum page_entry_size {
struct vm_operations_struct { struct vm_operations_struct {
void (*open)(struct vm_area_struct * area); void (*open)(struct vm_area_struct * area);
void (*close)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area);
int (*split)(struct vm_area_struct * area, unsigned long addr);
int (*mremap)(struct vm_area_struct * area); int (*mremap)(struct vm_area_struct * area);
int (*fault)(struct vm_fault *vmf); int (*fault)(struct vm_fault *vmf);
int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size);
@ -1379,6 +1380,19 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages, int *locked); unsigned int gup_flags, struct page **pages, int *locked);
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags); struct page **pages, unsigned int gup_flags);
#ifdef CONFIG_FS_DAX
long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas);
#else
static inline long get_user_pages_longterm(unsigned long start,
unsigned long nr_pages, unsigned int gup_flags,
struct page **pages, struct vm_area_struct **vmas)
{
return get_user_pages(start, nr_pages, gup_flags, pages, vmas);
}
#endif /* CONFIG_FS_DAX */
int get_user_pages_fast(unsigned long start, int nr_pages, int write, int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages); struct page **pages);

View File

@ -53,6 +53,18 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
ret = -EFAULT; ret = -EFAULT;
goto out; goto out;
} }
/*
* While get_vaddr_frames() could be used for transient (kernel
* controlled lifetime) pinning of memory pages all current
* users establish long term (userspace controlled lifetime)
* page pinning. Treat get_vaddr_frames() like
* get_user_pages_longterm() and disallow it for filesystem-dax
* mappings.
*/
if (vma_is_fsdax(vma))
return -EOPNOTSUPP;
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) { if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) {
vec->got_ref = true; vec->got_ref = true;
vec->is_pfns = false; vec->is_pfns = false;

View File

@ -66,7 +66,7 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
*/ */
static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
{ {
return pte_write(pte) || return pte_access_permitted(pte, WRITE) ||
((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
} }
@ -1095,6 +1095,70 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
} }
EXPORT_SYMBOL(get_user_pages); EXPORT_SYMBOL(get_user_pages);
#ifdef CONFIG_FS_DAX
/*
* This is the same as get_user_pages() in that it assumes we are
* operating on the current task's mm, but it goes further to validate
* that the vmas associated with the address range are suitable for
* longterm elevated page reference counts. For example, filesystem-dax
* mappings are subject to the lifetime enforced by the filesystem and
* we need guarantees that longterm users like RDMA and V4L2 only
* establish mappings that have a kernel enforced revocation mechanism.
*
* "longterm" == userspace controlled elevated page count lifetime.
* Contrast this to iov_iter_get_pages() usages which are transient.
*/
long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas_arg)
{
struct vm_area_struct **vmas = vmas_arg;
struct vm_area_struct *vma_prev = NULL;
long rc, i;
if (!pages)
return -EINVAL;
if (!vmas) {
vmas = kcalloc(nr_pages, sizeof(struct vm_area_struct *),
GFP_KERNEL);
if (!vmas)
return -ENOMEM;
}
rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
for (i = 0; i < rc; i++) {
struct vm_area_struct *vma = vmas[i];
if (vma == vma_prev)
continue;
vma_prev = vma;
if (vma_is_fsdax(vma))
break;
}
/*
* Either get_user_pages() failed, or the vma validation
* succeeded, in either case we don't need to put_page() before
* returning.
*/
if (i >= rc)
goto out;
for (i = 0; i < rc; i++)
put_page(pages[i]);
rc = -EOPNOTSUPP;
out:
if (vmas != vmas_arg)
kfree(vmas);
return rc;
}
EXPORT_SYMBOL(get_user_pages_longterm);
#endif /* CONFIG_FS_DAX */
/** /**
* populate_vma_page_range() - populate a range of pages in the vma. * populate_vma_page_range() - populate a range of pages in the vma.
* @vma: target vma * @vma: target vma

View File

@ -391,11 +391,11 @@ again:
if (pmd_protnone(pmd)) if (pmd_protnone(pmd))
return hmm_vma_walk_clear(start, end, walk); return hmm_vma_walk_clear(start, end, walk);
if (write_fault && !pmd_write(pmd)) if (!pmd_access_permitted(pmd, write_fault))
return hmm_vma_walk_clear(start, end, walk); return hmm_vma_walk_clear(start, end, walk);
pfn = pmd_pfn(pmd) + pte_index(addr); pfn = pmd_pfn(pmd) + pte_index(addr);
flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0; flag |= pmd_access_permitted(pmd, WRITE) ? HMM_PFN_WRITE : 0;
for (; addr < end; addr += PAGE_SIZE, i++, pfn++) for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag; pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag;
return 0; return 0;
@ -456,11 +456,11 @@ again:
continue; continue;
} }
if (write_fault && !pte_write(pte)) if (!pte_access_permitted(pte, write_fault))
goto fault; goto fault;
pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag; pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag;
pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0; pfns[i] |= pte_access_permitted(pte, WRITE) ? HMM_PFN_WRITE : 0;
continue; continue;
fault: fault:

View File

@ -870,7 +870,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
*/ */
WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set"); WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
if (flags & FOLL_WRITE && !pmd_write(*pmd)) if (!pmd_access_permitted(*pmd, flags & FOLL_WRITE))
return NULL; return NULL;
if (pmd_present(*pmd) && pmd_devmap(*pmd)) if (pmd_present(*pmd) && pmd_devmap(*pmd))
@ -1012,7 +1012,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
assert_spin_locked(pud_lockptr(mm, pud)); assert_spin_locked(pud_lockptr(mm, pud));
if (flags & FOLL_WRITE && !pud_write(*pud)) if (!pud_access_permitted(*pud, flags & FOLL_WRITE))
return NULL; return NULL;
if (pud_present(*pud) && pud_devmap(*pud)) if (pud_present(*pud) && pud_devmap(*pud))
@ -1386,7 +1386,7 @@ out_unlock:
*/ */
static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags) static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
{ {
return pmd_write(pmd) || return pmd_access_permitted(pmd, WRITE) ||
((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd)); ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
} }

View File

@ -3125,6 +3125,13 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
} }
} }
static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
{
if (addr & ~(huge_page_mask(hstate_vma(vma))))
return -EINVAL;
return 0;
}
/* /*
* We cannot handle pagefaults against hugetlb pages at all. They cause * We cannot handle pagefaults against hugetlb pages at all. They cause
* handle_mm_fault() to try to instantiate regular-sized pages in the * handle_mm_fault() to try to instantiate regular-sized pages in the
@ -3141,6 +3148,7 @@ const struct vm_operations_struct hugetlb_vm_ops = {
.fault = hugetlb_vm_op_fault, .fault = hugetlb_vm_op_fault,
.open = hugetlb_vm_op_open, .open = hugetlb_vm_op_open,
.close = hugetlb_vm_op_close, .close = hugetlb_vm_op_close,
.split = hugetlb_vm_op_split,
}; };
static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
@ -4627,7 +4635,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
pte_t *pte = NULL; pte_t *pte = NULL;
pgd = pgd_offset(mm, addr); pgd = pgd_offset(mm, addr);
p4d = p4d_offset(pgd, addr); p4d = p4d_alloc(mm, pgd, addr);
if (!p4d)
return NULL;
pud = pud_alloc(mm, p4d, addr); pud = pud_alloc(mm, p4d, addr);
if (pud) { if (pud) {
if (sz == PUD_SIZE) { if (sz == PUD_SIZE) {

View File

@ -1523,6 +1523,8 @@ static void kmemleak_scan(void)
if (page_count(page) == 0) if (page_count(page) == 0)
continue; continue;
scan_block(page, page + 1, NULL); scan_block(page, page + 1, NULL);
if (!(pfn % (MAX_SCAN_SIZE / sizeof(*page))))
cond_resched();
} }
} }
put_online_mems(); put_online_mems();

View File

@ -276,15 +276,14 @@ static long madvise_willneed(struct vm_area_struct *vma,
{ {
struct file *file = vma->vm_file; struct file *file = vma->vm_file;
*prev = vma;
#ifdef CONFIG_SWAP #ifdef CONFIG_SWAP
if (!file) { if (!file) {
*prev = vma;
force_swapin_readahead(vma, start, end); force_swapin_readahead(vma, start, end);
return 0; return 0;
} }
if (shmem_mapping(file->f_mapping)) { if (shmem_mapping(file->f_mapping)) {
*prev = vma;
force_shm_swapin_readahead(vma, start, end, force_shm_swapin_readahead(vma, start, end,
file->f_mapping); file->f_mapping);
return 0; return 0;
@ -299,7 +298,6 @@ static long madvise_willneed(struct vm_area_struct *vma,
return 0; return 0;
} }
*prev = vma;
start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
if (end > vma->vm_end) if (end > vma->vm_end)
end = vma->vm_end; end = vma->vm_end;

View File

@ -6044,7 +6044,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
memcg_check_events(memcg, page); memcg_check_events(memcg, page);
if (!mem_cgroup_is_root(memcg)) if (!mem_cgroup_is_root(memcg))
css_put(&memcg->css); css_put_many(&memcg->css, nr_entries);
} }
/** /**

View File

@ -3948,7 +3948,7 @@ static int handle_pte_fault(struct vm_fault *vmf)
if (unlikely(!pte_same(*vmf->pte, entry))) if (unlikely(!pte_same(*vmf->pte, entry)))
goto unlock; goto unlock;
if (vmf->flags & FAULT_FLAG_WRITE) { if (vmf->flags & FAULT_FLAG_WRITE) {
if (!pte_write(entry)) if (!pte_access_permitted(entry, WRITE))
return do_wp_page(vmf); return do_wp_page(vmf);
entry = pte_mkdirty(entry); entry = pte_mkdirty(entry);
} }
@ -4013,7 +4013,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
/* NUMA case for anonymous PUDs would go here */ /* NUMA case for anonymous PUDs would go here */
if (dirty && !pud_write(orig_pud)) { if (dirty && !pud_access_permitted(orig_pud, WRITE)) {
ret = wp_huge_pud(&vmf, orig_pud); ret = wp_huge_pud(&vmf, orig_pud);
if (!(ret & VM_FAULT_FALLBACK)) if (!(ret & VM_FAULT_FALLBACK))
return ret; return ret;
@ -4046,7 +4046,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
if (pmd_protnone(orig_pmd) && vma_is_accessible(vma)) if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
return do_huge_pmd_numa_page(&vmf, orig_pmd); return do_huge_pmd_numa_page(&vmf, orig_pmd);
if (dirty && !pmd_write(orig_pmd)) { if (dirty && !pmd_access_permitted(orig_pmd, WRITE)) {
ret = wp_huge_pmd(&vmf, orig_pmd); ret = wp_huge_pmd(&vmf, orig_pmd);
if (!(ret & VM_FAULT_FALLBACK)) if (!(ret & VM_FAULT_FALLBACK))
return ret; return ret;
@ -4336,7 +4336,7 @@ int follow_phys(struct vm_area_struct *vma,
goto out; goto out;
pte = *ptep; pte = *ptep;
if ((flags & FOLL_WRITE) && !pte_write(pte)) if (!pte_access_permitted(pte, flags & FOLL_WRITE))
goto unlock; goto unlock;
*prot = pgprot_val(pte_pgprot(pte)); *prot = pgprot_val(pte_pgprot(pte));

View File

@ -2555,9 +2555,11 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *new; struct vm_area_struct *new;
int err; int err;
if (is_vm_hugetlb_page(vma) && (addr & if (vma->vm_ops && vma->vm_ops->split) {
~(huge_page_mask(hstate_vma(vma))))) err = vma->vm_ops->split(vma, addr);
return -EINVAL; if (err)
return err;
}
new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (!new) if (!new)

View File

@ -550,7 +550,6 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
*/ */
set_bit(MMF_UNSTABLE, &mm->flags); set_bit(MMF_UNSTABLE, &mm->flags);
tlb_gather_mmu(&tlb, mm, 0, -1);
for (vma = mm->mmap ; vma; vma = vma->vm_next) { for (vma = mm->mmap ; vma; vma = vma->vm_next) {
if (!can_madv_dontneed_vma(vma)) if (!can_madv_dontneed_vma(vma))
continue; continue;
@ -565,11 +564,13 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
* we do not want to block exit_mmap by keeping mm ref * we do not want to block exit_mmap by keeping mm ref
* count elevated without a good reason. * count elevated without a good reason.
*/ */
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end);
unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end, unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
NULL); NULL);
tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end);
}
} }
tlb_finish_mmu(&tlb, 0, -1);
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
task_pid_nr(tsk), tsk->comm, task_pid_nr(tsk), tsk->comm,
K(get_mm_counter(mm, MM_ANONPAGES)), K(get_mm_counter(mm, MM_ANONPAGES)),

View File

@ -433,11 +433,8 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)
else else
bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
if (unlikely(bg_thresh >= thresh)) { if (bg_thresh >= thresh)
pr_warn("vm direct limit must be set greater than background limit.\n");
bg_thresh = thresh / 2; bg_thresh = thresh / 2;
}
tsk = current; tsk = current;
if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;

View File

@ -2507,10 +2507,6 @@ void drain_all_pages(struct zone *zone)
if (WARN_ON_ONCE(!mm_percpu_wq)) if (WARN_ON_ONCE(!mm_percpu_wq))
return; return;
/* Workqueues cannot recurse */
if (current->flags & PF_WQ_WORKER)
return;
/* /*
* Do not drain if one is already in progress unless it's specific to * Do not drain if one is already in progress unless it's specific to
* a zone. Such callers are primarily CMA and memory hotplug and need * a zone. Such callers are primarily CMA and memory hotplug and need
@ -7656,11 +7652,18 @@ int alloc_contig_range(unsigned long start, unsigned long end,
/* /*
* In case of -EBUSY, we'd like to know which page causes problem. * In case of -EBUSY, we'd like to know which page causes problem.
* So, just fall through. We will check it in test_pages_isolated(). * So, just fall through. test_pages_isolated() has a tracepoint
* which will report the busy page.
*
* It is possible that busy pages could become available before
* the call to test_pages_isolated, and the range will actually be
* allocated. So, if we fall through be sure to clear ret so that
* -EBUSY is not accidentally used or returned to caller.
*/ */
ret = __alloc_contig_migrate_range(&cc, start, end); ret = __alloc_contig_migrate_range(&cc, start, end);
if (ret && ret != -EBUSY) if (ret && ret != -EBUSY)
goto done; goto done;
ret =0;
/* /*
* Pages from [start, end) are within a MAX_ORDER_NR_PAGES * Pages from [start, end) are within a MAX_ORDER_NR_PAGES

View File

@ -83,8 +83,11 @@ def print_result(symboltype, symbolformat, argc):
for d, n in delta: for d, n in delta:
if d: print("%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d)) if d: print("%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d))
print("Total: Before=%d, After=%d, chg %+.2f%%" % \ if otot:
(otot, ntot, (ntot - otot)*100.0/otot)) percent = (ntot - otot) * 100.0 / otot
else:
percent = 0
print("Total: Before=%d, After=%d, chg %+.2f%%" % (otot, ntot, percent))
if sys.argv[1] == "-c": if sys.argv[1] == "-c":
print_result("Function", "tT", 3) print_result("Function", "tT", 3)

View File

@ -44,9 +44,16 @@
set -o errexit set -o errexit
set -o nounset set -o nounset
READELF="${CROSS_COMPILE}readelf"
ADDR2LINE="${CROSS_COMPILE}addr2line"
SIZE="${CROSS_COMPILE}size"
NM="${CROSS_COMPILE}nm"
command -v awk >/dev/null 2>&1 || die "awk isn't installed" command -v awk >/dev/null 2>&1 || die "awk isn't installed"
command -v readelf >/dev/null 2>&1 || die "readelf isn't installed" command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed"
command -v addr2line >/dev/null 2>&1 || die "addr2line isn't installed" command -v ${ADDR2LINE} >/dev/null 2>&1 || die "addr2line isn't installed"
command -v ${SIZE} >/dev/null 2>&1 || die "size isn't installed"
command -v ${NM} >/dev/null 2>&1 || die "nm isn't installed"
usage() { usage() {
echo "usage: faddr2line <object file> <func+offset> <func+offset>..." >&2 echo "usage: faddr2line <object file> <func+offset> <func+offset>..." >&2
@ -69,10 +76,10 @@ die() {
find_dir_prefix() { find_dir_prefix() {
local objfile=$1 local objfile=$1
local start_kernel_addr=$(readelf -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}') local start_kernel_addr=$(${READELF} -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}')
[[ -z $start_kernel_addr ]] && return [[ -z $start_kernel_addr ]] && return
local file_line=$(addr2line -e $objfile $start_kernel_addr) local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr)
[[ -z $file_line ]] && return [[ -z $file_line ]] && return
local prefix=${file_line%init/main.c:*} local prefix=${file_line%init/main.c:*}
@ -104,7 +111,7 @@ __faddr2line() {
# Go through each of the object's symbols which match the func name. # Go through each of the object's symbols which match the func name.
# In rare cases there might be duplicates. # In rare cases there might be duplicates.
file_end=$(size -Ax $objfile | awk '$1 == ".text" {print $2}') file_end=$(${SIZE} -Ax $objfile | awk '$1 == ".text" {print $2}')
while read symbol; do while read symbol; do
local fields=($symbol) local fields=($symbol)
local sym_base=0x${fields[0]} local sym_base=0x${fields[0]}
@ -156,10 +163,10 @@ __faddr2line() {
# pass real address to addr2line # pass real address to addr2line
echo "$func+$offset/$sym_size:" echo "$func+$offset/$sym_size:"
addr2line -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;" ${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;"
DONE=1 DONE=1
done < <(nm -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }') done < <(${NM} -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }')
} }
[[ $# -lt 2 ]] && usage [[ $# -lt 2 ]] && usage