kernel-ark/arch/sparc64/mm/hugetlbpage.c
David Gibson dec4ad86c2 hugepage: fix broken check for offset alignment in hugepage mappings
For hugepage mappings, the file offset, like the address and size, needs to
be aligned to the size of a hugepage.

In commit 68589bc353, the check for this was
moved into prepare_hugepage_range() along with the address and size checks.
 But since BenH's rework of the get_unmapped_area() paths leading up to
commit 4b1d89290b, prepare_hugepage_range()
is only called for MAP_FIXED mappings, not for other mappings.  This means
we're no longer ever checking for an aligned offset - I've confirmed that
mmap() will (apparently) succeed with a misaligned offset on both powerpc
and i386 at least.

This patch restores the check, removing it from prepare_hugepage_range()
and putting it back into hugetlbfs_file_mmap().  I'm putting it there,
rather than in the get_unmapped_area() path so it only needs to go in one
place, than separately in the half-dozen or so arch-specific
implementations of hugetlb_get_unmapped_area().

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andi Kleen <ak@suse.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-08-31 01:42:23 -07:00

352 lines
8.3 KiB
C

/*
* SPARC64 Huge TLB page support.
*
* Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net)
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <asm/mman.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
/* Slightly simplified from the non-hugepage variant because by
* definition we don't have to worry about any page coloring stuff
*/
#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL))
#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL))
static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
unsigned long addr,
unsigned long len,
unsigned long pgoff,
unsigned long flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct * vma;
unsigned long task_size = TASK_SIZE;
unsigned long start_addr;
if (test_thread_flag(TIF_32BIT))
task_size = STACK_TOP32;
if (unlikely(len >= VA_EXCLUDE_START))
return -ENOMEM;
if (len > mm->cached_hole_size) {
start_addr = addr = mm->free_area_cache;
} else {
start_addr = addr = TASK_UNMAPPED_BASE;
mm->cached_hole_size = 0;
}
task_size -= len;
full_search:
addr = ALIGN(addr, HPAGE_SIZE);
for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
/* At this point: (!vma || addr < vma->vm_end). */
if (addr < VA_EXCLUDE_START &&
(addr + len) >= VA_EXCLUDE_START) {
addr = VA_EXCLUDE_END;
vma = find_vma(mm, VA_EXCLUDE_END);
}
if (unlikely(task_size < addr)) {
if (start_addr != TASK_UNMAPPED_BASE) {
start_addr = addr = TASK_UNMAPPED_BASE;
mm->cached_hole_size = 0;
goto full_search;
}
return -ENOMEM;
}
if (likely(!vma || addr + len <= vma->vm_start)) {
/*
* Remember the place where we stopped the search:
*/
mm->free_area_cache = addr + len;
return addr;
}
if (addr + mm->cached_hole_size < vma->vm_start)
mm->cached_hole_size = vma->vm_start - addr;
addr = ALIGN(vma->vm_end, HPAGE_SIZE);
}
}
static unsigned long
hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
const unsigned long len,
const unsigned long pgoff,
const unsigned long flags)
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
/* This should only ever run for 32-bit processes. */
BUG_ON(!test_thread_flag(TIF_32BIT));
/* check if free_area_cache is useful for us */
if (len <= mm->cached_hole_size) {
mm->cached_hole_size = 0;
mm->free_area_cache = mm->mmap_base;
}
/* either no address requested or can't fit in requested address hole */
addr = mm->free_area_cache & HPAGE_MASK;
/* make sure it can fit in the remaining address space */
if (likely(addr > len)) {
vma = find_vma(mm, addr-len);
if (!vma || addr <= vma->vm_start) {
/* remember the address as a hint for next time */
return (mm->free_area_cache = addr-len);
}
}
if (unlikely(mm->mmap_base < len))
goto bottomup;
addr = (mm->mmap_base-len) & HPAGE_MASK;
do {
/*
* Lookup failure means no vma is above this address,
* else if new region fits below vma->vm_start,
* return with success:
*/
vma = find_vma(mm, addr);
if (likely(!vma || addr+len <= vma->vm_start)) {
/* remember the address as a hint for next time */
return (mm->free_area_cache = addr);
}
/* remember the largest hole we saw so far */
if (addr + mm->cached_hole_size < vma->vm_start)
mm->cached_hole_size = vma->vm_start - addr;
/* try just below the current vma->vm_start */
addr = (vma->vm_start-len) & HPAGE_MASK;
} while (likely(len < vma->vm_start));
bottomup:
/*
* A failed mmap() very likely causes application failure,
* so fall back to the bottom-up function here. This scenario
* can happen with large stack limits and large mmap()
* allocations.
*/
mm->cached_hole_size = ~0UL;
mm->free_area_cache = TASK_UNMAPPED_BASE;
addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
/*
* Restore the topdown base:
*/
mm->free_area_cache = mm->mmap_base;
mm->cached_hole_size = ~0UL;
return addr;
}
unsigned long
hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long task_size = TASK_SIZE;
if (test_thread_flag(TIF_32BIT))
task_size = STACK_TOP32;
if (len & ~HPAGE_MASK)
return -EINVAL;
if (len > task_size)
return -ENOMEM;
if (flags & MAP_FIXED) {
if (prepare_hugepage_range(addr, len))
return -EINVAL;
return addr;
}
if (addr) {
addr = ALIGN(addr, HPAGE_SIZE);
vma = find_vma(mm, addr);
if (task_size - len >= addr &&
(!vma || addr + len <= vma->vm_start))
return addr;
}
if (mm->get_unmapped_area == arch_get_unmapped_area)
return hugetlb_get_unmapped_area_bottomup(file, addr, len,
pgoff, flags);
else
return hugetlb_get_unmapped_area_topdown(file, addr, len,
pgoff, flags);
}
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte = NULL;
/* We must align the address, because our caller will run
* set_huge_pte_at() on whatever we return, which writes out
* all of the sub-ptes for the hugepage range. So we have
* to give it the first such sub-pte.
*/
addr &= HPAGE_MASK;
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
if (pud) {
pmd = pmd_alloc(mm, pud, addr);
if (pmd)
pte = pte_alloc_map(mm, pmd, addr);
}
return pte;
}
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte = NULL;
addr &= HPAGE_MASK;
pgd = pgd_offset(mm, addr);
if (!pgd_none(*pgd)) {
pud = pud_offset(pgd, addr);
if (!pud_none(*pud)) {
pmd = pmd_offset(pud, addr);
if (!pmd_none(*pmd))
pte = pte_offset_map(pmd, addr);
}
}
return pte;
}
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
{
return 0;
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t entry)
{
int i;
if (!pte_present(*ptep) && pte_present(entry))
mm->context.huge_pte_count++;
addr &= HPAGE_MASK;
for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
set_pte_at(mm, addr, ptep, entry);
ptep++;
addr += PAGE_SIZE;
pte_val(entry) += PAGE_SIZE;
}
}
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
pte_t entry;
int i;
entry = *ptep;
if (pte_present(entry))
mm->context.huge_pte_count--;
addr &= HPAGE_MASK;
for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
pte_clear(mm, addr, ptep);
addr += PAGE_SIZE;
ptep++;
}
return entry;
}
struct page *follow_huge_addr(struct mm_struct *mm,
unsigned long address, int write)
{
return ERR_PTR(-EINVAL);
}
int pmd_huge(pmd_t pmd)
{
return 0;
}
struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
pmd_t *pmd, int write)
{
return NULL;
}
static void context_reload(void *__data)
{
struct mm_struct *mm = __data;
if (mm == current->mm)
load_secondary_context(mm);
}
void hugetlb_prefault_arch_hook(struct mm_struct *mm)
{
struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
if (likely(tp->tsb != NULL))
return;
tsb_grow(mm, MM_TSB_HUGE, 0);
tsb_context_switch(mm);
smp_tsb_sync(mm);
/* On UltraSPARC-III+ and later, configure the second half of
* the Data-TLB for huge pages.
*/
if (tlb_type == cheetah_plus) {
unsigned long ctx;
spin_lock(&ctx_alloc_lock);
ctx = mm->context.sparc64_ctx_val;
ctx &= ~CTX_PGSZ_MASK;
ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
if (ctx != mm->context.sparc64_ctx_val) {
/* When changing the page size fields, we
* must perform a context flush so that no
* stale entries match. This flush must
* occur with the original context register
* settings.
*/
do_flush_tlb_mm(mm);
/* Reload the context register of all processors
* also executing in this address space.
*/
mm->context.sparc64_ctx_val = ctx;
on_each_cpu(context_reload, mm, 0, 0);
}
spin_unlock(&ctx_alloc_lock);
}
}