diff --git a/arch/Kconfig b/arch/Kconfig index c35668fbf4d4..98de654b79b3 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -393,8 +393,12 @@ config HAVE_ARCH_JUMP_LABEL config HAVE_ARCH_JUMP_LABEL_RELATIVE bool +config MMU_GATHER_TABLE_FREE + bool + config MMU_GATHER_RCU_TABLE_FREE bool + select MMU_GATHER_TABLE_FREE config MMU_GATHER_PAGE_SIZE bool @@ -404,6 +408,7 @@ config MMU_GATHER_NO_RANGE config MMU_GATHER_NO_GATHER bool + depends on MMU_GATHER_TABLE_FREE config ARCH_HAVE_NMI_SAFE_CMPXCHG bool diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 46a21cee3442..4d4e7b6aabff 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -37,10 +37,6 @@ static inline void __tlb_remove_table(void *_table) #include -#ifndef CONFIG_MMU_GATHER_RCU_TABLE_FREE -#define tlb_remove_table(tlb, entry) tlb_remove_page(tlb, entry) -#endif - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index ca0fe75b5355..f391f6b500b4 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -56,6 +56,15 @@ * Defaults to flushing at tlb_end_vma() to reset the range; helps when * there's large holes between the VMAs. * + * - tlb_remove_table() + * + * tlb_remove_table() is the basic primitive to free page-table directories + * (__p*_free_tlb()). In it's most primitive form it is an alias for + * tlb_remove_page() below, for when page directories are pages and have no + * additional constraints. + * + * See also MMU_GATHER_TABLE_FREE and MMU_GATHER_RCU_TABLE_FREE. + * * - tlb_remove_page() / __tlb_remove_page() * - tlb_remove_page_size() / __tlb_remove_page_size() * @@ -129,17 +138,24 @@ * This might be useful if your architecture has size specific TLB * invalidation instructions. * - * MMU_GATHER_RCU_TABLE_FREE + * MMU_GATHER_TABLE_FREE * * This provides tlb_remove_table(), to be used instead of tlb_remove_page() - * for page directores (__p*_free_tlb()). This provides separate freeing of - * the page-table pages themselves in a semi-RCU fashion (see comment below). - * Useful if your architecture doesn't use IPIs for remote TLB invalidates - * and therefore doesn't naturally serialize with software page-table walkers. + * for page directores (__p*_free_tlb()). + * + * Useful if your architecture has non-page page directories. * * When used, an architecture is expected to provide __tlb_remove_table() * which does the actual freeing of these pages. * + * MMU_GATHER_RCU_TABLE_FREE + * + * Like MMU_GATHER_TABLE_FREE, and adds semi-RCU semantics to the free (see + * comment below). + * + * Useful if your architecture doesn't use IPIs for remote TLB invalidates + * and therefore doesn't naturally serialize with software page-table walkers. + * * MMU_GATHER_NO_RANGE * * Use this if your architecture lacks an efficient flush_tlb_range(). @@ -155,37 +171,12 @@ * various ptep_get_and_clear() functions. */ -#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE -/* - * Semi RCU freeing of the page directories. - * - * This is needed by some architectures to implement software pagetable walkers. - * - * gup_fast() and other software pagetable walkers do a lockless page-table - * walk and therefore needs some synchronization with the freeing of the page - * directories. The chosen means to accomplish that is by disabling IRQs over - * the walk. - * - * Architectures that use IPIs to flush TLBs will then automagically DTRT, - * since we unlink the page, flush TLBs, free the page. Since the disabling of - * IRQs delays the completion of the TLB flush we can never observe an already - * freed page. - * - * Architectures that do not have this (PPC) need to delay the freeing by some - * other means, this is that means. - * - * What we do is batch the freed directory pages (tables) and RCU free them. - * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling - * holds off grace periods. - * - * However, in order to batch these pages we need to allocate storage, this - * allocation is deep inside the MM code and can thus easily fail on memory - * pressure. To guarantee progress we fall back to single table freeing, see - * the implementation of tlb_remove_table_one(). - * - */ +#ifdef CONFIG_MMU_GATHER_TABLE_FREE + struct mmu_table_batch { +#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE struct rcu_head rcu; +#endif unsigned int nr; void *tables[0]; }; @@ -195,6 +186,17 @@ struct mmu_table_batch { extern void tlb_remove_table(struct mmu_gather *tlb, void *table); +#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ + +/* + * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have page based + * page directories and we can use the normal page batching to free them. + */ +#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page)) + +#endif /* CONFIG_MMU_GATHER_TABLE_FREE */ + +#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE /* * This allows an architecture that does not use the linux page-tables for * hardware to skip the TLBI when freeing page tables. @@ -248,7 +250,7 @@ extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, struct mmu_gather { struct mm_struct *mm; -#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE +#ifdef CONFIG_MMU_GATHER_TABLE_FREE struct mmu_table_batch *batch; #endif diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index a28c74328085..a3538cb2bcbe 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -91,12 +91,87 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ #endif /* MMU_GATHER_NO_GATHER */ +#ifdef CONFIG_MMU_GATHER_TABLE_FREE + +static void __tlb_remove_table_free(struct mmu_table_batch *batch) +{ + int i; + + for (i = 0; i < batch->nr; i++) + __tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); +} + #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE /* - * See the comment near struct mmu_table_batch. + * Semi RCU freeing of the page directories. + * + * This is needed by some architectures to implement software pagetable walkers. + * + * gup_fast() and other software pagetable walkers do a lockless page-table + * walk and therefore needs some synchronization with the freeing of the page + * directories. The chosen means to accomplish that is by disabling IRQs over + * the walk. + * + * Architectures that use IPIs to flush TLBs will then automagically DTRT, + * since we unlink the page, flush TLBs, free the page. Since the disabling of + * IRQs delays the completion of the TLB flush we can never observe an already + * freed page. + * + * Architectures that do not have this (PPC) need to delay the freeing by some + * other means, this is that means. + * + * What we do is batch the freed directory pages (tables) and RCU free them. + * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling + * holds off grace periods. + * + * However, in order to batch these pages we need to allocate storage, this + * allocation is deep inside the MM code and can thus easily fail on memory + * pressure. To guarantee progress we fall back to single table freeing, see + * the implementation of tlb_remove_table_one(). + * */ +static void tlb_remove_table_smp_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +static void tlb_remove_table_sync_one(void) +{ + /* + * This isn't an RCU grace period and hence the page-tables cannot be + * assumed to be actually RCU-freed. + * + * It is however sufficient for software page-table walkers that rely on + * IRQ disabling. + */ + smp_call_function(tlb_remove_table_smp_sync, NULL, 1); +} + +static void tlb_remove_table_rcu(struct rcu_head *head) +{ + __tlb_remove_table_free(container_of(head, struct mmu_table_batch, rcu)); +} + +static void tlb_remove_table_free(struct mmu_table_batch *batch) +{ + call_rcu(&batch->rcu, tlb_remove_table_rcu); +} + +#else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */ + +static void tlb_remove_table_sync_one(void) { } + +static void tlb_remove_table_free(struct mmu_table_batch *batch) +{ + __tlb_remove_table_free(batch); +} + +#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ + /* * If we want tlb_remove_table() to imply TLB invalidates. */ @@ -112,44 +187,19 @@ static inline void tlb_table_invalidate(struct mmu_gather *tlb) } } -static void tlb_remove_table_smp_sync(void *arg) -{ - /* Simply deliver the interrupt */ -} - static void tlb_remove_table_one(void *table) { - /* - * This isn't an RCU grace period and hence the page-tables cannot be - * assumed to be actually RCU-freed. - * - * It is however sufficient for software page-table walkers that rely on - * IRQ disabling. See the comment near struct mmu_table_batch. - */ - smp_call_function(tlb_remove_table_smp_sync, NULL, 1); + tlb_remove_table_sync_one(); __tlb_remove_table(table); } -static void tlb_remove_table_rcu(struct rcu_head *head) -{ - struct mmu_table_batch *batch; - int i; - - batch = container_of(head, struct mmu_table_batch, rcu); - - for (i = 0; i < batch->nr; i++) - __tlb_remove_table(batch->tables[i]); - - free_page((unsigned long)batch); -} - static void tlb_table_flush(struct mmu_gather *tlb) { struct mmu_table_batch **batch = &tlb->batch; if (*batch) { tlb_table_invalidate(tlb); - call_rcu(&(*batch)->rcu, tlb_remove_table_rcu); + tlb_remove_table_free(*batch); *batch = NULL; } } @@ -173,13 +223,21 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) tlb_table_flush(tlb); } -#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ +static inline void tlb_table_init(struct mmu_gather *tlb) +{ + tlb->batch = NULL; +} + +#else /* !CONFIG_MMU_GATHER_TABLE_FREE */ + +static inline void tlb_table_flush(struct mmu_gather *tlb) { } +static inline void tlb_table_init(struct mmu_gather *tlb) { } + +#endif /* CONFIG_MMU_GATHER_TABLE_FREE */ static void tlb_flush_mmu_free(struct mmu_gather *tlb) { -#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE tlb_table_flush(tlb); -#endif #ifndef CONFIG_MMU_GATHER_NO_GATHER tlb_batch_pages_flush(tlb); #endif @@ -220,9 +278,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, tlb->batch_count = 0; #endif -#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE - tlb->batch = NULL; -#endif + tlb_table_init(tlb); #ifdef CONFIG_MMU_GATHER_PAGE_SIZE tlb->page_size = 0; #endif