8f2af155b5
Patch series "exec: Pin stack limit during exec". Attempts to solve problems with the stack limit changing during exec continue to be frustrated[1][2]. In addition to the specific issues around the Stack Clash family of flaws, Andy Lutomirski pointed out[3] other places during exec where the stack limit is used and is assumed to be unchanging. Given the many places it gets used and the fact that it can be manipulated/raced via setrlimit() and prlimit(), I think the only way to handle this is to move away from the "current" view of the stack limit and instead attach it to the bprm, and plumb this down into the functions that need to know the stack limits. This series implements the approach. [1]04e35f4495
("exec: avoid RLIMIT_STACK races with prlimit()") [2]779f4e1c6c
("Revert "exec: avoid RLIMIT_STACK races with prlimit()"") [3] to security@kernel.org, "Subject: existing rlimit races?" This patch (of 3): Since it is possible that the stack rlimit can change externally during exec (either via another thread calling setrlimit() or another process calling prlimit()), provide a way to pass the rlimit down into the per-architecture mm layout functions so that the rlimit can stay in the bprm structure instead of sitting in the signal structure until exec is finalized. Link: http://lkml.kernel.org/r/1518638796-20819-2-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook <keescook@chromium.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Ben Hutchings <ben@decadent.org.uk> Cc: Willy Tarreau <w@1wt.eu> Cc: Hugh Dickins <hughd@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: "Jason A. Donenfeld" <Jason@zx2c4.com> Cc: Rik van Riel <riel@redhat.com> Cc: Laura Abbott <labbott@redhat.com> Cc: Greg KH <greg@kroah.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Ben Hutchings <ben.hutchings@codethink.co.uk> Cc: Brad Spengler <spender@grsecurity.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
209 lines
5.0 KiB
C
209 lines
5.0 KiB
C
/*
|
|
* This file is subject to the terms and conditions of the GNU General Public
|
|
* License. See the file "COPYING" in the main directory of this archive
|
|
* for more details.
|
|
*
|
|
* Copyright (C) 2011 Wind River Systems,
|
|
* written by Ralf Baechle <ralf@linux-mips.org>
|
|
*/
|
|
#include <linux/compiler.h>
|
|
#include <linux/elf-randomize.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/export.h>
|
|
#include <linux/personality.h>
|
|
#include <linux/random.h>
|
|
#include <linux/sched/signal.h>
|
|
#include <linux/sched/mm.h>
|
|
|
|
unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */
|
|
EXPORT_SYMBOL(shm_align_mask);
|
|
|
|
/* gap between mmap and stack */
|
|
#define MIN_GAP (128*1024*1024UL)
|
|
#define MAX_GAP ((TASK_SIZE)/6*5)
|
|
|
|
static int mmap_is_legacy(struct rlimit *rlim_stack)
|
|
{
|
|
if (current->personality & ADDR_COMPAT_LAYOUT)
|
|
return 1;
|
|
|
|
if (rlim_stack->rlim_cur == RLIM_INFINITY)
|
|
return 1;
|
|
|
|
return sysctl_legacy_va_layout;
|
|
}
|
|
|
|
static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
|
|
{
|
|
unsigned long gap = rlim_stack->rlim_cur;
|
|
|
|
if (gap < MIN_GAP)
|
|
gap = MIN_GAP;
|
|
else if (gap > MAX_GAP)
|
|
gap = MAX_GAP;
|
|
|
|
return PAGE_ALIGN(TASK_SIZE - gap - rnd);
|
|
}
|
|
|
|
#define COLOUR_ALIGN(addr, pgoff) \
|
|
((((addr) + shm_align_mask) & ~shm_align_mask) + \
|
|
(((pgoff) << PAGE_SHIFT) & shm_align_mask))
|
|
|
|
enum mmap_allocation_direction {UP, DOWN};
|
|
|
|
static unsigned long arch_get_unmapped_area_common(struct file *filp,
|
|
unsigned long addr0, unsigned long len, unsigned long pgoff,
|
|
unsigned long flags, enum mmap_allocation_direction dir)
|
|
{
|
|
struct mm_struct *mm = current->mm;
|
|
struct vm_area_struct *vma;
|
|
unsigned long addr = addr0;
|
|
int do_color_align;
|
|
struct vm_unmapped_area_info info;
|
|
|
|
if (unlikely(len > TASK_SIZE))
|
|
return -ENOMEM;
|
|
|
|
if (flags & MAP_FIXED) {
|
|
/* Even MAP_FIXED mappings must reside within TASK_SIZE */
|
|
if (TASK_SIZE - len < addr)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* We do not accept a shared mapping if it would violate
|
|
* cache aliasing constraints.
|
|
*/
|
|
if ((flags & MAP_SHARED) &&
|
|
((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
|
|
return -EINVAL;
|
|
return addr;
|
|
}
|
|
|
|
do_color_align = 0;
|
|
if (filp || (flags & MAP_SHARED))
|
|
do_color_align = 1;
|
|
|
|
/* requesting a specific address */
|
|
if (addr) {
|
|
if (do_color_align)
|
|
addr = COLOUR_ALIGN(addr, pgoff);
|
|
else
|
|
addr = PAGE_ALIGN(addr);
|
|
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr &&
|
|
(!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
info.length = len;
|
|
info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0;
|
|
info.align_offset = pgoff << PAGE_SHIFT;
|
|
|
|
if (dir == DOWN) {
|
|
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
|
|
info.low_limit = PAGE_SIZE;
|
|
info.high_limit = mm->mmap_base;
|
|
addr = vm_unmapped_area(&info);
|
|
|
|
if (!(addr & ~PAGE_MASK))
|
|
return addr;
|
|
|
|
/*
|
|
* A failed mmap() very likely causes application failure,
|
|
* so fall back to the bottom-up function here. This scenario
|
|
* can happen with large stack limits and large mmap()
|
|
* allocations.
|
|
*/
|
|
}
|
|
|
|
info.flags = 0;
|
|
info.low_limit = mm->mmap_base;
|
|
info.high_limit = TASK_SIZE;
|
|
return vm_unmapped_area(&info);
|
|
}
|
|
|
|
unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr0,
|
|
unsigned long len, unsigned long pgoff, unsigned long flags)
|
|
{
|
|
return arch_get_unmapped_area_common(filp,
|
|
addr0, len, pgoff, flags, UP);
|
|
}
|
|
|
|
/*
|
|
* There is no need to export this but sched.h declares the function as
|
|
* extern so making it static here results in an error.
|
|
*/
|
|
unsigned long arch_get_unmapped_area_topdown(struct file *filp,
|
|
unsigned long addr0, unsigned long len, unsigned long pgoff,
|
|
unsigned long flags)
|
|
{
|
|
return arch_get_unmapped_area_common(filp,
|
|
addr0, len, pgoff, flags, DOWN);
|
|
}
|
|
|
|
unsigned long arch_mmap_rnd(void)
|
|
{
|
|
unsigned long rnd;
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
if (TASK_IS_32BIT_ADDR)
|
|
rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
|
|
else
|
|
#endif /* CONFIG_COMPAT */
|
|
rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
|
|
|
|
return rnd << PAGE_SHIFT;
|
|
}
|
|
|
|
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
|
|
{
|
|
unsigned long random_factor = 0UL;
|
|
|
|
if (current->flags & PF_RANDOMIZE)
|
|
random_factor = arch_mmap_rnd();
|
|
|
|
if (mmap_is_legacy(rlim_stack)) {
|
|
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
|
|
mm->get_unmapped_area = arch_get_unmapped_area;
|
|
} else {
|
|
mm->mmap_base = mmap_base(random_factor, rlim_stack);
|
|
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
|
|
}
|
|
}
|
|
|
|
static inline unsigned long brk_rnd(void)
|
|
{
|
|
unsigned long rnd = get_random_long();
|
|
|
|
rnd = rnd << PAGE_SHIFT;
|
|
/* 8MB for 32bit, 256MB for 64bit */
|
|
if (TASK_IS_32BIT_ADDR)
|
|
rnd = rnd & 0x7ffffful;
|
|
else
|
|
rnd = rnd & 0xffffffful;
|
|
|
|
return rnd;
|
|
}
|
|
|
|
unsigned long arch_randomize_brk(struct mm_struct *mm)
|
|
{
|
|
unsigned long base = mm->brk;
|
|
unsigned long ret;
|
|
|
|
ret = PAGE_ALIGN(base + brk_rnd());
|
|
|
|
if (ret < mm->brk)
|
|
return mm->brk;
|
|
|
|
return ret;
|
|
}
|
|
|
|
int __virt_addr_valid(const volatile void *kaddr)
|
|
{
|
|
return pfn_valid(PFN_DOWN(virt_to_phys(kaddr)));
|
|
}
|
|
EXPORT_SYMBOL_GPL(__virt_addr_valid);
|