37d22a0d79
It's possible for pages to become visible prior to update_mmu_cache running if a thread within the same address space preempts the current thread or runs simultaneously on another CPU. That is, the following scenario is possible: CPU0 CPU1 write to page flush_dcache_page flush_icache_page set_pte_at map page update_mmu_cache If CPU1 maps the page in between CPU0's set_pte_at, which marks it valid & visible, and update_mmu_cache where the dcache flush occurs then CPU1s icache will fill from stale data (unless it fills from the dcache, in which case all is good, but most MIPS CPUs don't have this property). Commit4d46a67a3e
("MIPS: Fix race condition in lazy cache flushing.") attempted to fix that by performing the dcache flush in flush_icache_page such that it occurs before the set_pte_at call makes the page visible. However it has the problem that not all code that writes to pages exposed to userland call flush_icache_page. There are many callers of set_pte_at under mm/ and only 2 of them do call flush_icache_page. Thus the race window between a page becoming visible & being coherent between the icache & dcache remains open in some cases. To illustrate some of the cases, a WARN was added to __update_cache with this patch applied that triggered in cases where a page about to be flushed from the dcache was not the last page provided to flush_icache_page. That is, backtraces were obtained for cases in which the race window is left open without this patch. The 2 standout examples follow. When forking a process: [ 15.271842] [<80417630>] __update_cache+0xcc/0x188 [ 15.277274] [<80530394>] copy_page_range+0x56c/0x6ac [ 15.282861] [<8042936c>] copy_process.part.54+0xd40/0x17ac [ 15.289028] [<80429f80>] do_fork+0xe4/0x420 [ 15.293747] [<80413808>] handle_sys+0x128/0x14c When exec'ing an ELF binary: [ 14.445964] [<80417630>] __update_cache+0xcc/0x188 [ 14.451369] [<80538d88>] move_page_tables+0x414/0x498 [ 14.457075] [<8055d848>] setup_arg_pages+0x220/0x318 [ 14.462685] [<805b0f38>] load_elf_binary+0x530/0x12a0 [ 14.468374] [<8055ec3c>] search_binary_handler+0xbc/0x214 [ 14.474444] [<8055f6c0>] do_execveat_common+0x43c/0x67c [ 14.480324] [<8055f938>] do_execve+0x38/0x44 [ 14.485137] [<80413808>] handle_sys+0x128/0x14c These code paths write into a page, call flush_dcache_page then call set_pte_at without flush_icache_page inbetween. The end result is that the icache can become corrupted & userland processes may execute unexpected or invalid code, typically resulting in a reserved instruction exception, a trap or a segfault. Fix this race condition fully by performing any cache maintenance required to keep the icache & dcache in sync in set_pte_at, before the page is made valid. This has the added bonus of ensuring the cache maintenance always happens in one location, rather than being duplicated in flush_icache_page & update_mmu_cache. It also matches the way other architectures solve the same problem (see arm, ia64 & powerpc). Signed-off-by: Paul Burton <paul.burton@imgtec.com> Reported-by: Ionela Voinescu <ionela.voinescu@imgtec.com> Cc: Lars Persson <lars.persson@axis.com> Fixes:4d46a67a3e
("MIPS: Fix race condition in lazy cache flushing.") Cc: Steven J. Hill <sjhill@realitydiluted.com> Cc: David Daney <david.daney@cavium.com> Cc: Huacai Chen <chenhc@lemote.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable <stable@vger.kernel.org> # v4.1+ Patchwork: https://patchwork.linux-mips.org/patch/12722/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
242 lines
7.0 KiB
C
242 lines
7.0 KiB
C
/*
|
|
* This file is subject to the terms and conditions of the GNU General Public
|
|
* License. See the file "COPYING" in the main directory of this archive
|
|
* for more details.
|
|
*
|
|
* Copyright (C) 1994 - 2003, 06, 07 by Ralf Baechle (ralf@linux-mips.org)
|
|
* Copyright (C) 2007 MIPS Technologies, Inc.
|
|
*/
|
|
#include <linux/fs.h>
|
|
#include <linux/fcntl.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/linkage.h>
|
|
#include <linux/module.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/mm.h>
|
|
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/highmem.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/cpu.h>
|
|
#include <asm/cpu-features.h>
|
|
|
|
/* Cache operations. */
|
|
void (*flush_cache_all)(void);
|
|
void (*__flush_cache_all)(void);
|
|
void (*flush_cache_mm)(struct mm_struct *mm);
|
|
void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start,
|
|
unsigned long end);
|
|
void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page,
|
|
unsigned long pfn);
|
|
void (*flush_icache_range)(unsigned long start, unsigned long end);
|
|
EXPORT_SYMBOL_GPL(flush_icache_range);
|
|
void (*local_flush_icache_range)(unsigned long start, unsigned long end);
|
|
EXPORT_SYMBOL_GPL(local_flush_icache_range);
|
|
|
|
void (*__flush_cache_vmap)(void);
|
|
void (*__flush_cache_vunmap)(void);
|
|
|
|
void (*__flush_kernel_vmap_range)(unsigned long vaddr, int size);
|
|
EXPORT_SYMBOL_GPL(__flush_kernel_vmap_range);
|
|
void (*__invalidate_kernel_vmap_range)(unsigned long vaddr, int size);
|
|
|
|
/* MIPS specific cache operations */
|
|
void (*flush_cache_sigtramp)(unsigned long addr);
|
|
void (*local_flush_data_cache_page)(void * addr);
|
|
void (*flush_data_cache_page)(unsigned long addr);
|
|
void (*flush_icache_all)(void);
|
|
|
|
EXPORT_SYMBOL_GPL(local_flush_data_cache_page);
|
|
EXPORT_SYMBOL(flush_data_cache_page);
|
|
EXPORT_SYMBOL(flush_icache_all);
|
|
|
|
#if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_DMA_MAYBE_COHERENT)
|
|
|
|
/* DMA cache operations. */
|
|
void (*_dma_cache_wback_inv)(unsigned long start, unsigned long size);
|
|
void (*_dma_cache_wback)(unsigned long start, unsigned long size);
|
|
void (*_dma_cache_inv)(unsigned long start, unsigned long size);
|
|
|
|
EXPORT_SYMBOL(_dma_cache_wback_inv);
|
|
|
|
#endif /* CONFIG_DMA_NONCOHERENT || CONFIG_DMA_MAYBE_COHERENT */
|
|
|
|
/*
|
|
* We could optimize the case where the cache argument is not BCACHE but
|
|
* that seems very atypical use ...
|
|
*/
|
|
SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes,
|
|
unsigned int, cache)
|
|
{
|
|
if (bytes == 0)
|
|
return 0;
|
|
if (!access_ok(VERIFY_WRITE, (void __user *) addr, bytes))
|
|
return -EFAULT;
|
|
|
|
flush_icache_range(addr, addr + bytes);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void __flush_dcache_page(struct page *page)
|
|
{
|
|
struct address_space *mapping = page_mapping(page);
|
|
unsigned long addr;
|
|
|
|
if (mapping && !mapping_mapped(mapping)) {
|
|
SetPageDcacheDirty(page);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* We could delay the flush for the !page_mapping case too. But that
|
|
* case is for exec env/arg pages and those are %99 certainly going to
|
|
* get faulted into the tlb (and thus flushed) anyways.
|
|
*/
|
|
if (PageHighMem(page))
|
|
addr = (unsigned long)kmap_atomic(page);
|
|
else
|
|
addr = (unsigned long)page_address(page);
|
|
|
|
flush_data_cache_page(addr);
|
|
|
|
if (PageHighMem(page))
|
|
__kunmap_atomic((void *)addr);
|
|
}
|
|
|
|
EXPORT_SYMBOL(__flush_dcache_page);
|
|
|
|
void __flush_anon_page(struct page *page, unsigned long vmaddr)
|
|
{
|
|
unsigned long addr = (unsigned long) page_address(page);
|
|
|
|
if (pages_do_alias(addr, vmaddr)) {
|
|
if (page_mapcount(page) && !Page_dcache_dirty(page)) {
|
|
void *kaddr;
|
|
|
|
kaddr = kmap_coherent(page, vmaddr);
|
|
flush_data_cache_page((unsigned long)kaddr);
|
|
kunmap_coherent();
|
|
} else
|
|
flush_data_cache_page(addr);
|
|
}
|
|
}
|
|
|
|
EXPORT_SYMBOL(__flush_anon_page);
|
|
|
|
void __update_cache(unsigned long address, pte_t pte)
|
|
{
|
|
struct page *page;
|
|
unsigned long pfn, addr;
|
|
int exec = !pte_no_exec(pte) && !cpu_has_ic_fills_f_dc;
|
|
|
|
pfn = pte_pfn(pte);
|
|
if (unlikely(!pfn_valid(pfn)))
|
|
return;
|
|
page = pfn_to_page(pfn);
|
|
if (Page_dcache_dirty(page)) {
|
|
if (PageHighMem(page))
|
|
addr = (unsigned long)kmap_atomic(page);
|
|
else
|
|
addr = (unsigned long)page_address(page);
|
|
|
|
if (exec || pages_do_alias(addr, address & PAGE_MASK))
|
|
flush_data_cache_page(addr);
|
|
|
|
if (PageHighMem(page))
|
|
__kunmap_atomic((void *)addr);
|
|
|
|
ClearPageDcacheDirty(page);
|
|
}
|
|
}
|
|
|
|
unsigned long _page_cachable_default;
|
|
EXPORT_SYMBOL(_page_cachable_default);
|
|
|
|
static inline void setup_protection_map(void)
|
|
{
|
|
if (cpu_has_rixi) {
|
|
protection_map[0] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
|
|
protection_map[1] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC);
|
|
protection_map[2] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
|
|
protection_map[3] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC);
|
|
protection_map[4] = __pgprot(_page_cachable_default | _PAGE_PRESENT);
|
|
protection_map[5] = __pgprot(_page_cachable_default | _PAGE_PRESENT);
|
|
protection_map[6] = __pgprot(_page_cachable_default | _PAGE_PRESENT);
|
|
protection_map[7] = __pgprot(_page_cachable_default | _PAGE_PRESENT);
|
|
|
|
protection_map[8] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
|
|
protection_map[9] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC);
|
|
protection_map[10] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | _PAGE_NO_READ);
|
|
protection_map[11] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
|
|
protection_map[12] = __pgprot(_page_cachable_default | _PAGE_PRESENT);
|
|
protection_map[13] = __pgprot(_page_cachable_default | _PAGE_PRESENT);
|
|
protection_map[14] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_WRITE);
|
|
protection_map[15] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_WRITE);
|
|
|
|
} else {
|
|
protection_map[0] = PAGE_NONE;
|
|
protection_map[1] = PAGE_READONLY;
|
|
protection_map[2] = PAGE_COPY;
|
|
protection_map[3] = PAGE_COPY;
|
|
protection_map[4] = PAGE_READONLY;
|
|
protection_map[5] = PAGE_READONLY;
|
|
protection_map[6] = PAGE_COPY;
|
|
protection_map[7] = PAGE_COPY;
|
|
protection_map[8] = PAGE_NONE;
|
|
protection_map[9] = PAGE_READONLY;
|
|
protection_map[10] = PAGE_SHARED;
|
|
protection_map[11] = PAGE_SHARED;
|
|
protection_map[12] = PAGE_READONLY;
|
|
protection_map[13] = PAGE_READONLY;
|
|
protection_map[14] = PAGE_SHARED;
|
|
protection_map[15] = PAGE_SHARED;
|
|
}
|
|
}
|
|
|
|
void cpu_cache_init(void)
|
|
{
|
|
if (cpu_has_3k_cache) {
|
|
extern void __weak r3k_cache_init(void);
|
|
|
|
r3k_cache_init();
|
|
}
|
|
if (cpu_has_6k_cache) {
|
|
extern void __weak r6k_cache_init(void);
|
|
|
|
r6k_cache_init();
|
|
}
|
|
if (cpu_has_4k_cache) {
|
|
extern void __weak r4k_cache_init(void);
|
|
|
|
r4k_cache_init();
|
|
}
|
|
if (cpu_has_8k_cache) {
|
|
extern void __weak r8k_cache_init(void);
|
|
|
|
r8k_cache_init();
|
|
}
|
|
if (cpu_has_tx39_cache) {
|
|
extern void __weak tx39_cache_init(void);
|
|
|
|
tx39_cache_init();
|
|
}
|
|
|
|
if (cpu_has_octeon_cache) {
|
|
extern void __weak octeon_cache_init(void);
|
|
|
|
octeon_cache_init();
|
|
}
|
|
|
|
setup_protection_map();
|
|
}
|
|
|
|
int __weak __uncached_access(struct file *file, unsigned long addr)
|
|
{
|
|
if (file->f_flags & O_DSYNC)
|
|
return 1;
|
|
|
|
return addr >= __pa(high_memory);
|
|
}
|