42ef73fe13
On -rt we were seeing spurious bad page states like: Bad page state in process 'firefox' page:c1bc2380 flags:0x40000000 mapping:c1bc2390 mapcount:0 count:0 Trying to fix it up, but a reboot is needed Backtrace: Pid: 503, comm: firefox Not tainted 2.6.26.8-rt13 #3 [<c043d0f3>] ? printk+0x14/0x19 [<c0272d4e>] bad_page+0x4e/0x79 [<c0273831>] free_hot_cold_page+0x5b/0x1d3 [<c02739f6>] free_hot_page+0xf/0x11 [<c0273a18>] __free_pages+0x20/0x2b [<c027d170>] __pte_alloc+0x87/0x91 [<c027d25e>] handle_mm_fault+0xe4/0x733 [<c043f680>] ? rt_mutex_down_read_trylock+0x57/0x63 [<c043f680>] ? rt_mutex_down_read_trylock+0x57/0x63 [<c0218875>] do_page_fault+0x36f/0x88a This is the case where a concurrent fault already installed the PTE and we get to free the newly allocated one. This is due to pgtable_page_ctor() doing the spin_lock_init(&page->ptl) which is overlaid with the {private, mapping} struct. union { struct { unsigned long private; struct address_space *mapping; }; spinlock_t ptl; struct kmem_cache *slab; struct page *first_page; }; Normally the spinlock is small enough to not stomp on page->mapping, but PREEMPT_RT=y has huge 'spin'locks. But lockdep kernels should also be able to trigger this splat, as the lock tracking code grows the spinlock to cover page->mapping. The obvious fix is calling pgtable_page_dtor() like the regular pte free path __pte_free_tlb() does. It seems all architectures except x86 and nm10300 already do this, and nm10300 doesn't seem to use pgtable_page_ctor(), which suggests it doesn't do SMP or simply doesnt do MMU at all or something. Signed-off-by: Peter Zijlstra <a.p.zijlsta@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu> Cc: <stable@kernel.org>
116 lines
3.5 KiB
C
116 lines
3.5 KiB
C
#ifndef _ASM_X86_PGALLOC_H
|
|
#define _ASM_X86_PGALLOC_H
|
|
|
|
#include <linux/threads.h>
|
|
#include <linux/mm.h> /* for struct page */
|
|
#include <linux/pagemap.h>
|
|
|
|
static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
|
|
|
|
#ifdef CONFIG_PARAVIRT
|
|
#include <asm/paravirt.h>
|
|
#else
|
|
#define paravirt_pgd_alloc(mm) __paravirt_pgd_alloc(mm)
|
|
static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) {}
|
|
static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {}
|
|
static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {}
|
|
static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
|
|
unsigned long start, unsigned long count) {}
|
|
static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) {}
|
|
static inline void paravirt_release_pte(unsigned long pfn) {}
|
|
static inline void paravirt_release_pmd(unsigned long pfn) {}
|
|
static inline void paravirt_release_pud(unsigned long pfn) {}
|
|
#endif
|
|
|
|
/*
|
|
* Allocate and free page tables.
|
|
*/
|
|
extern pgd_t *pgd_alloc(struct mm_struct *);
|
|
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
|
|
|
|
extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
|
|
extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
|
|
|
|
/* Should really implement gc for free page table pages. This could be
|
|
done with a reference count in struct page. */
|
|
|
|
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
|
|
{
|
|
BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
|
|
free_page((unsigned long)pte);
|
|
}
|
|
|
|
static inline void pte_free(struct mm_struct *mm, struct page *pte)
|
|
{
|
|
pgtable_page_dtor(pte);
|
|
__free_page(pte);
|
|
}
|
|
|
|
extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
|
|
|
|
static inline void pmd_populate_kernel(struct mm_struct *mm,
|
|
pmd_t *pmd, pte_t *pte)
|
|
{
|
|
paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT);
|
|
set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
|
|
}
|
|
|
|
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
|
|
struct page *pte)
|
|
{
|
|
unsigned long pfn = page_to_pfn(pte);
|
|
|
|
paravirt_alloc_pte(mm, pfn);
|
|
set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
|
|
}
|
|
|
|
#define pmd_pgtable(pmd) pmd_page(pmd)
|
|
|
|
#if PAGETABLE_LEVELS > 2
|
|
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
|
|
{
|
|
return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
|
|
}
|
|
|
|
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
|
|
{
|
|
BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
|
|
free_page((unsigned long)pmd);
|
|
}
|
|
|
|
extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd);
|
|
#else /* !CONFIG_X86_PAE */
|
|
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
|
|
{
|
|
paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
|
|
set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
|
|
}
|
|
#endif /* CONFIG_X86_PAE */
|
|
|
|
#if PAGETABLE_LEVELS > 3
|
|
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
|
|
{
|
|
paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
|
|
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
|
|
}
|
|
|
|
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
|
|
{
|
|
return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
|
|
}
|
|
|
|
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
|
|
{
|
|
BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
|
|
free_page((unsigned long)pud);
|
|
}
|
|
|
|
extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
|
|
#endif /* PAGETABLE_LEVELS > 3 */
|
|
#endif /* PAGETABLE_LEVELS > 2 */
|
|
|
|
#endif /* _ASM_X86_PGALLOC_H */
|