Add patch to fix BUG_ON mm->nr_ptes (rhbz 786632)
This commit is contained in:
parent
3bb94782bd
commit
d826f97738
|
@ -728,6 +728,9 @@ Patch21105: disable-threading-in-compression-for-hibernate.patch
|
|||
Patch21106: regset-Prevent-null-pointer-reference-on-readonly-re.patch
|
||||
Patch21107: regset-Return-EFAULT-not-EIO-on-host-side-memory-fau.patch
|
||||
|
||||
#rhbz 786632
|
||||
Patch21108: mm-thp-fix-BUG-on-mm-nr_ptes.patch
|
||||
|
||||
Patch21200: unhandled-irqs-switch-to-polling.patch
|
||||
|
||||
%endif
|
||||
|
@ -1351,6 +1354,8 @@ ApplyPatch regset-Return-EFAULT-not-EIO-on-host-side-memory-fau.patch
|
|||
|
||||
ApplyPatch unhandled-irqs-switch-to-polling.patch
|
||||
|
||||
ApplyPatch mm-thp-fix-BUG-on-mm-nr_ptes.patch
|
||||
|
||||
# END OF PATCH APPLICATIONS
|
||||
|
||||
%endif
|
||||
|
@ -2000,6 +2005,7 @@ fi
|
|||
%changelog
|
||||
* Mon Mar 05 2012 Josh Boyer <jwboyer@redhat.com>
|
||||
- CVE-2012-1097 regset: Prevent null pointer reference on readonly regsets
|
||||
- Add patch to fix BUG_ON mm->nr_ptes (rhbz 786632)
|
||||
|
||||
* Fri Mar 02 2012 Dave Jones <davej@redhat.com>
|
||||
- Enable VM debugging in non-debug kernels too.
|
||||
|
|
|
@ -0,0 +1,121 @@
|
|||
On Thu, 16 Feb 2012, Andrea Arcangeli wrote:
|
||||
> On Thu, Feb 16, 2012 at 01:53:04AM -0800, Hugh Dickins wrote:
|
||||
> > Yes (and I think less troublesome than most BUGs, coming at exit
|
||||
> > while not holding locks; though we could well make it a WARN_ON,
|
||||
> > I don't think that existed back in the day).
|
||||
>
|
||||
> A WARN_ON would be fine with me, go ahead if you prefer it... only
|
||||
> risk would be to go unnoticed or be underestimated. I am ok with the
|
||||
> BUG_ON too (even if this time it triggered false positives... sigh).
|
||||
>
|
||||
> > Acked-by: Hugh Dickins <hughd@google.com>
|
||||
>
|
||||
> Thanks for the quick review!
|
||||
>
|
||||
> > In looking into the bug, it had actually bothered me a little that you
|
||||
> > were setting aside those pages, yet not counting them into nr_ptes;
|
||||
> > though the only thing that cares is oom_kill.c, and the count of pages
|
||||
> > in each hugepage can only dwarf the count in nr_ptes (whereas, without
|
||||
> > hugepages, it's possible to populate very sparsely and nr_ptes become
|
||||
> > significant).
|
||||
>
|
||||
> Agreed, it's not significant either ways.
|
||||
>
|
||||
> Running my two primary systems with this applied for half a day and no
|
||||
> problem so far so it should be good foro -mm at least.
|
||||
|
||||
And I've had no trouble running your patch since then (but I never hit
|
||||
the bug it fixes either). But we've all forgottent about it, so let me
|
||||
bring your patch back inline (I've added one introductory sentence) and
|
||||
address to akpm...
|
||||
|
||||
|
||||
From: Andrea Arcangeli <aarcange@redhat.com>
|
||||
Subject: [PATCH] mm: thp: fix BUG on mm->nr_ptes
|
||||
|
||||
Dave Jones reports a few Fedora users hitting the BUG_ON(mm->nr_ptes...)
|
||||
in exit_mmap() recently.
|
||||
|
||||
Quoting Hugh's discovery and explanation of the SMP race condition:
|
||||
|
||||
===
|
||||
mm->nr_ptes had unusual locking: down_read mmap_sem plus
|
||||
page_table_lock when incrementing, down_write mmap_sem (or mm_users 0)
|
||||
when decrementing; whereas THP is careful to increment and decrement
|
||||
it under page_table_lock.
|
||||
|
||||
Now most of those paths in THP also hold mmap_sem for read or write
|
||||
(with appropriate checks on mm_users), but two do not: when
|
||||
split_huge_page() is called by hwpoison_user_mappings(), and when
|
||||
called by add_to_swap().
|
||||
|
||||
It's conceivable that the latter case is responsible for the
|
||||
exit_mmap() BUG_ON mm->nr_ptes that has been reported on Fedora.
|
||||
===
|
||||
|
||||
The simplest way to fix it without having to alter the locking is to
|
||||
make split_huge_page() a noop in nr_ptes terms, so by counting the
|
||||
preallocated pagetables that exists for every mapped hugepage. It was
|
||||
an arbitrary choice not to count them and either way is not wrong or
|
||||
right, because they are not used but they're still allocated.
|
||||
|
||||
Reported-by: Dave Jones <davej@redhat.com>
|
||||
Reported-by: Hugh Dickins <hughd@google.com>
|
||||
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
|
||||
Acked-by: Hugh Dickins <hughd@google.com>
|
||||
---
|
||||
mm/huge_memory.c | 6 +++---
|
||||
1 files changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
|
||||
index 91d3efb..8f7fc39 100644
|
||||
--- a/mm/huge_memory.c
|
||||
+++ b/mm/huge_memory.c
|
||||
@@ -671,6 +671,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
|
||||
set_pmd_at(mm, haddr, pmd, entry);
|
||||
prepare_pmd_huge_pte(pgtable, mm);
|
||||
add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
|
||||
+ mm->nr_ptes++;
|
||||
spin_unlock(&mm->page_table_lock);
|
||||
}
|
||||
|
||||
@@ -789,6 +790,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
||||
pmd = pmd_mkold(pmd_wrprotect(pmd));
|
||||
set_pmd_at(dst_mm, addr, dst_pmd, pmd);
|
||||
prepare_pmd_huge_pte(pgtable, dst_mm);
|
||||
+ dst_mm->nr_ptes++;
|
||||
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
@@ -887,7 +889,6 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
|
||||
}
|
||||
kfree(pages);
|
||||
|
||||
- mm->nr_ptes++;
|
||||
smp_wmb(); /* make pte visible before pmd */
|
||||
pmd_populate(mm, pmd, pgtable);
|
||||
page_remove_rmap(page);
|
||||
@@ -1047,6 +1048,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||
VM_BUG_ON(page_mapcount(page) < 0);
|
||||
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
|
||||
VM_BUG_ON(!PageHead(page));
|
||||
+ tlb->mm->nr_ptes--;
|
||||
spin_unlock(&tlb->mm->page_table_lock);
|
||||
tlb_remove_page(tlb, page);
|
||||
pte_free(tlb->mm, pgtable);
|
||||
@@ -1375,7 +1377,6 @@ static int __split_huge_page_map(struct page *page,
|
||||
pte_unmap(pte);
|
||||
}
|
||||
|
||||
- mm->nr_ptes++;
|
||||
smp_wmb(); /* make pte visible before pmd */
|
||||
/*
|
||||
* Up to this point the pmd is present and huge and
|
||||
@@ -1988,7 +1989,6 @@ static void collapse_huge_page(struct mm_struct *mm,
|
||||
set_pmd_at(mm, address, pmd, _pmd);
|
||||
update_mmu_cache(vma, address, _pmd);
|
||||
prepare_pmd_huge_pte(pgtable, mm);
|
||||
- mm->nr_ptes--;
|
||||
spin_unlock(&mm->page_table_lock);
|
||||
|
||||
#ifndef CONFIG_NUMA
|
Loading…
Reference in New Issue