5c1f6ee9a3
We allocate one page for the last level of linux page table. With THP and large page size of 16MB, that would mean we are wasting large part of that page. To map 16MB area, we only need a PTE space of 2K with 64K page size. This patch reduce the space wastage by sharing the page allocated for the last level of linux page table with multiple pmd entries. We call these smaller chunks PTE page fragments and allocated page, PTE page. In order to support systems which doesn't have 64K HPTE support, we also add another 2K to PTE page fragment. The second half of the PTE fragments is used for storing slot and secondary bit information of an HPTE. With this we now have a 4K PTE fragment. We use a simple approach to share the PTE page. On allocation, we bump the PTE page refcount to 16 and share the PTE page with the next 16 pte alloc request. This should help in the node locality of the PTE page fragment, assuming that the immediate pte alloc request will mostly come from the same NUMA node. We don't try to reuse the freed PTE page fragment. Hence we could be waisting some space. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Acked-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
147 lines
3.2 KiB
C
147 lines
3.2 KiB
C
/*
|
|
* MMU context allocation for 64-bit kernels.
|
|
*
|
|
* Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
*/
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/string.h>
|
|
#include <linux/types.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/idr.h>
|
|
#include <linux/export.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/slab.h>
|
|
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/pgalloc.h>
|
|
|
|
#include "icswx.h"
|
|
|
|
static DEFINE_SPINLOCK(mmu_context_lock);
|
|
static DEFINE_IDA(mmu_context_ida);
|
|
|
|
int __init_new_context(void)
|
|
{
|
|
int index;
|
|
int err;
|
|
|
|
again:
|
|
if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL))
|
|
return -ENOMEM;
|
|
|
|
spin_lock(&mmu_context_lock);
|
|
err = ida_get_new_above(&mmu_context_ida, 1, &index);
|
|
spin_unlock(&mmu_context_lock);
|
|
|
|
if (err == -EAGAIN)
|
|
goto again;
|
|
else if (err)
|
|
return err;
|
|
|
|
if (index > MAX_USER_CONTEXT) {
|
|
spin_lock(&mmu_context_lock);
|
|
ida_remove(&mmu_context_ida, index);
|
|
spin_unlock(&mmu_context_lock);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
return index;
|
|
}
|
|
EXPORT_SYMBOL_GPL(__init_new_context);
|
|
|
|
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
|
|
{
|
|
int index;
|
|
|
|
index = __init_new_context();
|
|
if (index < 0)
|
|
return index;
|
|
|
|
/* The old code would re-promote on fork, we don't do that
|
|
* when using slices as it could cause problem promoting slices
|
|
* that have been forced down to 4K
|
|
*/
|
|
if (slice_mm_new_context(mm))
|
|
slice_set_user_psize(mm, mmu_virtual_psize);
|
|
subpage_prot_init_new_context(mm);
|
|
mm->context.id = index;
|
|
#ifdef CONFIG_PPC_ICSWX
|
|
mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
|
|
if (!mm->context.cop_lockp) {
|
|
__destroy_context(index);
|
|
subpage_prot_free(mm);
|
|
mm->context.id = MMU_NO_CONTEXT;
|
|
return -ENOMEM;
|
|
}
|
|
spin_lock_init(mm->context.cop_lockp);
|
|
#endif /* CONFIG_PPC_ICSWX */
|
|
|
|
#ifdef CONFIG_PPC_64K_PAGES
|
|
mm->context.pte_frag = NULL;
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
void __destroy_context(int context_id)
|
|
{
|
|
spin_lock(&mmu_context_lock);
|
|
ida_remove(&mmu_context_ida, context_id);
|
|
spin_unlock(&mmu_context_lock);
|
|
}
|
|
EXPORT_SYMBOL_GPL(__destroy_context);
|
|
|
|
#ifdef CONFIG_PPC_64K_PAGES
|
|
static void destroy_pagetable_page(struct mm_struct *mm)
|
|
{
|
|
int count;
|
|
void *pte_frag;
|
|
struct page *page;
|
|
|
|
pte_frag = mm->context.pte_frag;
|
|
if (!pte_frag)
|
|
return;
|
|
|
|
page = virt_to_page(pte_frag);
|
|
/* drop all the pending references */
|
|
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
|
|
/* We allow PTE_FRAG_NR fragments from a PTE page */
|
|
count = atomic_sub_return(PTE_FRAG_NR - count, &page->_count);
|
|
if (!count) {
|
|
pgtable_page_dtor(page);
|
|
free_hot_cold_page(page, 0);
|
|
}
|
|
}
|
|
|
|
#else
|
|
static inline void destroy_pagetable_page(struct mm_struct *mm)
|
|
{
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
|
|
void destroy_context(struct mm_struct *mm)
|
|
{
|
|
|
|
#ifdef CONFIG_PPC_ICSWX
|
|
drop_cop(mm->context.acop, mm);
|
|
kfree(mm->context.cop_lockp);
|
|
mm->context.cop_lockp = NULL;
|
|
#endif /* CONFIG_PPC_ICSWX */
|
|
|
|
destroy_pagetable_page(mm);
|
|
__destroy_context(mm->context.id);
|
|
subpage_prot_free(mm);
|
|
mm->context.id = MMU_NO_CONTEXT;
|
|
}
|