kernel-ark/include/asm-i386/pgtable-2level.h
Jeremy Fitzhardinge 5311ab62cd [PATCH] i386: PARAVIRT: Allow paravirt backend to choose kernel PMD sharing
Normally when running in PAE mode, the 4th PMD maps the kernel address space,
which can be shared among all processes (since they all need the same kernel
mappings).

Xen, however, does not allow guests to have the kernel pmd shared between page
tables, so parameterize pgtable.c to allow both modes of operation.

There are several side-effects of this.  One is that vmalloc will update the
kernel address space mappings, and those updates need to be propagated into
all processes if the kernel mappings are not intrinsically shared.  In the
non-PAE case, this is done by maintaining a pgd_list of all processes; this
list is used when all process pagetables must be updated.  pgd_list is
threaded via otherwise unused entries in the page structure for the pgd, which
means that the pgd must be page-sized for this to work.

Normally the PAE pgd is only 4x64 byte entries large, but Xen requires the PAE
pgd to page aligned anyway, so this patch forces the pgd to be page
aligned+sized when the kernel pmd is unshared, to accomodate both these
requirements.

Also, since there may be several distinct kernel pmds (if the user/kernel
split is below 3G), there's no point in allocating them from a slab cache;
they're just allocated with get_free_page and initialized appropriately.  (Of
course the could be cached if there is just a single kernel pmd - which is the
default with a 3G user/kernel split - but it doesn't seem worthwhile to add
yet another case into this code).

[ Many thanks to wli for review comments. ]

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: William Lee Irwin III <wli@holomorphy.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Christoph Lameter <clameter@sgi.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2007-05-02 19:27:13 +02:00

86 lines
2.5 KiB
C

#ifndef _I386_PGTABLE_2LEVEL_H
#define _I386_PGTABLE_2LEVEL_H
#define pte_ERROR(e) \
printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low)
#define pgd_ERROR(e) \
printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
/*
* Certain architectures need to do special things when PTEs
* within a page table are directly modified. Thus, the following
* hook is made available.
*/
static inline void native_set_pte(pte_t *ptep , pte_t pte)
{
*ptep = pte;
}
static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep , pte_t pte)
{
native_set_pte(ptep, pte);
}
static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
*pmdp = pmd;
}
#ifndef CONFIG_PARAVIRT
#define set_pte(pteptr, pteval) native_set_pte(pteptr, pteval)
#define set_pte_at(mm,addr,ptep,pteval) native_set_pte_at(mm, addr, ptep, pteval)
#define set_pmd(pmdptr, pmdval) native_set_pmd(pmdptr, pmdval)
#endif
#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
#define set_pte_present(mm,addr,ptep,pteval) set_pte_at(mm,addr,ptep,pteval)
#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
static inline pte_t native_ptep_get_and_clear(pte_t *xp)
{
return __pte(xchg(&xp->pte_low, 0));
}
#define pte_page(x) pfn_to_page(pte_pfn(x))
#define pte_none(x) (!(x).pte_low)
#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
/*
* All present user pages are user-executable:
*/
static inline int pte_exec(pte_t pte)
{
return pte_user(pte);
}
/*
* All present pages are kernel-executable:
*/
static inline int pte_exec_kernel(pte_t pte)
{
return 1;
}
/*
* Bits 0, 6 and 7 are taken, split up the 29 bits of offset
* into this range:
*/
#define PTE_FILE_MAX_BITS 29
#define pte_to_pgoff(pte) \
((((pte).pte_low >> 1) & 0x1f ) + (((pte).pte_low >> 8) << 5 ))
#define pgoff_to_pte(off) \
((pte_t) { (((off) & 0x1f) << 1) + (((off) >> 5) << 8) + _PAGE_FILE })
/* Encode and de-code a swap entry */
#define __swp_type(x) (((x).val >> 1) & 0x1f)
#define __swp_offset(x) ((x).val >> 8)
#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#endif /* _I386_PGTABLE_2LEVEL_H */