7467be68cd
Drop merged patches: matroxfb-fix-font-corruption.patch
995 lines
30 KiB
Diff
995 lines
30 KiB
Diff
From 5006dd0fae6126c149868102c100cd90a20ef2e3 Mon Sep 17 00:00:00 2001
|
|
From: Kyle McMartin <kyle@phobos.i.jkkm.org>
|
|
Date: Mon, 29 Mar 2010 23:20:18 -0400
|
|
Subject: execshield
|
|
|
|
cebbert@redhat.com: added fix for bz#220892
|
|
|
|
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
|
|
index 617bd56..526248d 100644
|
|
--- a/arch/x86/include/asm/desc.h
|
|
+++ b/arch/x86/include/asm/desc.h
|
|
@@ -5,6 +5,7 @@
|
|
#include <asm/ldt.h>
|
|
#include <asm/mmu.h>
|
|
#include <linux/smp.h>
|
|
+#include <linux/mm_types.h>
|
|
|
|
static inline void fill_ldt(struct desc_struct *desc,
|
|
const struct user_desc *info)
|
|
@@ -93,6 +94,9 @@ static inline int desc_empty(const void *ptr)
|
|
|
|
#define load_TLS(t, cpu) native_load_tls(t, cpu)
|
|
#define set_ldt native_set_ldt
|
|
+#ifdef CONFIG_X86_32
|
|
+#define load_user_cs_desc native_load_user_cs_desc
|
|
+#endif /*CONFIG_X86_32*/
|
|
|
|
#define write_ldt_entry(dt, entry, desc) \
|
|
native_write_ldt_entry(dt, entry, desc)
|
|
@@ -392,4 +396,25 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
|
|
_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
|
|
}
|
|
|
|
+#ifdef CONFIG_X86_32
|
|
+static inline void set_user_cs(struct desc_struct *desc, unsigned long limit)
|
|
+{
|
|
+ limit = (limit - 1) / PAGE_SIZE;
|
|
+ desc->a = limit & 0xffff;
|
|
+ desc->b = (limit & 0xf0000) | 0x00c0fb00;
|
|
+}
|
|
+
|
|
+static inline void native_load_user_cs_desc(int cpu, struct mm_struct *mm)
|
|
+{
|
|
+ get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs;
|
|
+}
|
|
+
|
|
+#define arch_add_exec_range arch_add_exec_range
|
|
+#define arch_remove_exec_range arch_remove_exec_range
|
|
+#define arch_flush_exec_range arch_flush_exec_range
|
|
+extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit);
|
|
+extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit);
|
|
+extern void arch_flush_exec_range(struct mm_struct *mm);
|
|
+#endif /* CONFIG_X86_32 */
|
|
+
|
|
#endif /* _ASM_X86_DESC_H */
|
|
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
|
|
index 80a1dee..8314c66 100644
|
|
--- a/arch/x86/include/asm/mmu.h
|
|
+++ b/arch/x86/include/asm/mmu.h
|
|
@@ -7,12 +7,19 @@
|
|
/*
|
|
* The x86 doesn't have a mmu context, but
|
|
* we put the segment information here.
|
|
+ *
|
|
+ * exec_limit is used to track the range PROT_EXEC
|
|
+ * mappings span.
|
|
*/
|
|
typedef struct {
|
|
void *ldt;
|
|
int size;
|
|
struct mutex lock;
|
|
void *vdso;
|
|
+#ifdef CONFIG_X86_32
|
|
+ struct desc_struct user_cs;
|
|
+ unsigned long exec_limit;
|
|
+#endif
|
|
} mm_context_t;
|
|
|
|
#ifdef CONFIG_SMP
|
|
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
|
|
index 5653f43..55dadb2 100644
|
|
--- a/arch/x86/include/asm/paravirt.h
|
|
+++ b/arch/x86/include/asm/paravirt.h
|
|
@@ -289,6 +289,12 @@ static inline void set_ldt(const void *addr, unsigned entries)
|
|
{
|
|
PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
|
|
}
|
|
+#ifdef CONFIG_X86_32
|
|
+static inline void load_user_cs_desc(unsigned int cpu, struct mm_struct *mm)
|
|
+{
|
|
+ PVOP_VCALL2(pv_cpu_ops.load_user_cs_desc, cpu, mm);
|
|
+}
|
|
+#endif /*CONFIG_X86_32*/
|
|
static inline void store_gdt(struct desc_ptr *dtr)
|
|
{
|
|
PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
|
|
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
|
|
index db9ef55..19c2793 100644
|
|
--- a/arch/x86/include/asm/paravirt_types.h
|
|
+++ b/arch/x86/include/asm/paravirt_types.h
|
|
@@ -118,6 +118,9 @@ struct pv_cpu_ops {
|
|
void (*store_gdt)(struct desc_ptr *);
|
|
void (*store_idt)(struct desc_ptr *);
|
|
void (*set_ldt)(const void *desc, unsigned entries);
|
|
+#ifdef CONFIG_X86_32
|
|
+ void (*load_user_cs_desc)(int cpu, struct mm_struct *mm);
|
|
+#endif
|
|
unsigned long (*store_tr)(void);
|
|
void (*load_tls)(struct thread_struct *t, unsigned int cpu);
|
|
#ifdef CONFIG_X86_64
|
|
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
|
|
index b753ea5..4893156 100644
|
|
--- a/arch/x86/include/asm/processor.h
|
|
+++ b/arch/x86/include/asm/processor.h
|
|
@@ -162,6 +162,9 @@ static inline int hlt_works(int cpu)
|
|
|
|
#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
|
|
|
|
+#define __HAVE_ARCH_ALIGN_STACK
|
|
+extern unsigned long arch_align_stack(unsigned long sp);
|
|
+
|
|
extern void cpu_detect(struct cpuinfo_x86 *c);
|
|
|
|
extern struct pt_regs *idle_regs(struct pt_regs *);
|
|
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
|
index 4868e4a..6c8d2ca 100644
|
|
--- a/arch/x86/kernel/cpu/common.c
|
|
+++ b/arch/x86/kernel/cpu/common.c
|
|
@@ -802,6 +802,20 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
|
/* Filter out anything that depends on CPUID levels we don't have */
|
|
filter_cpuid_features(c, true);
|
|
|
|
+ /*
|
|
+ * emulation of NX with segment limits unfortunately means
|
|
+ * we have to disable the fast system calls, due to the way that
|
|
+ * sysexit clears the segment limits on return.
|
|
+ * If we have either disabled exec-shield on the boot command line,
|
|
+ * or we have NX, then we don't need to do this.
|
|
+ */
|
|
+ if (exec_shield != 0) {
|
|
+#ifdef CONFIG_X86_PAE
|
|
+ if (!test_cpu_cap(c, X86_FEATURE_NX))
|
|
+#endif
|
|
+ clear_cpu_cap(c, X86_FEATURE_SEP);
|
|
+ }
|
|
+
|
|
/* If the model name is still unset, do table lookup. */
|
|
if (!c->x86_model_id[0]) {
|
|
const char *p;
|
|
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
|
|
index 1db183e..238b97d 100644
|
|
--- a/arch/x86/kernel/paravirt.c
|
|
+++ b/arch/x86/kernel/paravirt.c
|
|
@@ -345,6 +345,9 @@ struct pv_cpu_ops pv_cpu_ops = {
|
|
.read_tscp = native_read_tscp,
|
|
.load_tr_desc = native_load_tr_desc,
|
|
.set_ldt = native_set_ldt,
|
|
+#ifdef CONFIG_X86_32
|
|
+ .load_user_cs_desc = native_load_user_cs_desc,
|
|
+#endif /*CONFIG_X86_32*/
|
|
.load_gdt = native_load_gdt,
|
|
.load_idt = native_load_idt,
|
|
.store_gdt = native_store_gdt,
|
|
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
|
|
index f6c6266..8ac2589 100644
|
|
--- a/arch/x86/kernel/process_32.c
|
|
+++ b/arch/x86/kernel/process_32.c
|
|
@@ -251,7 +251,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
|
|
void
|
|
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
|
|
{
|
|
+ int cpu;
|
|
+
|
|
set_user_gs(regs, 0);
|
|
+
|
|
regs->fs = 0;
|
|
set_fs(USER_DS);
|
|
regs->ds = __USER_DS;
|
|
@@ -260,6 +263,11 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
|
|
regs->cs = __USER_CS;
|
|
regs->ip = new_ip;
|
|
regs->sp = new_sp;
|
|
+
|
|
+ cpu = get_cpu();
|
|
+ load_user_cs_desc(cpu, current->mm);
|
|
+ put_cpu();
|
|
+
|
|
/*
|
|
* Free the old FP and other extended state
|
|
*/
|
|
@@ -319,6 +327,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|
if (preload_fpu)
|
|
prefetch(next->xstate);
|
|
|
|
+ if (next_p->mm)
|
|
+ load_user_cs_desc(cpu, next_p->mm);
|
|
+
|
|
/*
|
|
* Reload esp0.
|
|
*/
|
|
@@ -412,3 +423,40 @@ unsigned long get_wchan(struct task_struct *p)
|
|
return 0;
|
|
}
|
|
|
|
+static void modify_cs(struct mm_struct *mm, unsigned long limit)
|
|
+{
|
|
+ mm->context.exec_limit = limit;
|
|
+ set_user_cs(&mm->context.user_cs, limit);
|
|
+ if (mm == current->mm) {
|
|
+ int cpu;
|
|
+
|
|
+ cpu = get_cpu();
|
|
+ load_user_cs_desc(cpu, mm);
|
|
+ put_cpu();
|
|
+ }
|
|
+}
|
|
+
|
|
+void arch_add_exec_range(struct mm_struct *mm, unsigned long limit)
|
|
+{
|
|
+ if (limit > mm->context.exec_limit)
|
|
+ modify_cs(mm, limit);
|
|
+}
|
|
+
|
|
+void arch_remove_exec_range(struct mm_struct *mm, unsigned long old_end)
|
|
+{
|
|
+ struct vm_area_struct *vma;
|
|
+ unsigned long limit = PAGE_SIZE;
|
|
+
|
|
+ if (old_end == mm->context.exec_limit) {
|
|
+ for (vma = mm->mmap; vma; vma = vma->vm_next)
|
|
+ if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
|
|
+ limit = vma->vm_end;
|
|
+ modify_cs(mm, limit);
|
|
+ }
|
|
+}
|
|
+
|
|
+void arch_flush_exec_range(struct mm_struct *mm)
|
|
+{
|
|
+ mm->context.exec_limit = 0;
|
|
+ set_user_cs(&mm->context.user_cs, 0);
|
|
+}
|
|
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
|
|
index 1168e44..c452918 100644
|
|
--- a/arch/x86/kernel/traps.c
|
|
+++ b/arch/x86/kernel/traps.c
|
|
@@ -115,6 +115,76 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err)
|
|
if (!user_mode_vm(regs))
|
|
die(str, regs, err);
|
|
}
|
|
+
|
|
+static inline int
|
|
+__compare_user_cs_desc(const struct desc_struct *desc1,
|
|
+ const struct desc_struct *desc2)
|
|
+{
|
|
+ return ((desc1->limit0 != desc2->limit0) ||
|
|
+ (desc1->limit != desc2->limit) ||
|
|
+ (desc1->base0 != desc2->base0) ||
|
|
+ (desc1->base1 != desc2->base1) ||
|
|
+ (desc1->base2 != desc2->base2));
|
|
+}
|
|
+
|
|
+/*
|
|
+ * lazy-check for CS validity on exec-shield binaries:
|
|
+ *
|
|
+ * the original non-exec stack patch was written by
|
|
+ * Solar Designer <solar at openwall.com>. Thanks!
|
|
+ */
|
|
+static int
|
|
+check_lazy_exec_limit(int cpu, struct pt_regs *regs, long error_code)
|
|
+{
|
|
+ struct desc_struct *desc1, *desc2;
|
|
+ struct vm_area_struct *vma;
|
|
+ unsigned long limit;
|
|
+
|
|
+ if (current->mm == NULL)
|
|
+ return 0;
|
|
+
|
|
+ limit = -1UL;
|
|
+ if (current->mm->context.exec_limit != -1UL) {
|
|
+ limit = PAGE_SIZE;
|
|
+ spin_lock(¤t->mm->page_table_lock);
|
|
+ for (vma = current->mm->mmap; vma; vma = vma->vm_next)
|
|
+ if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
|
|
+ limit = vma->vm_end;
|
|
+ vma = get_gate_vma(current);
|
|
+ if (vma && (vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
|
|
+ limit = vma->vm_end;
|
|
+ spin_unlock(¤t->mm->page_table_lock);
|
|
+ if (limit >= TASK_SIZE)
|
|
+ limit = -1UL;
|
|
+ current->mm->context.exec_limit = limit;
|
|
+ }
|
|
+ set_user_cs(¤t->mm->context.user_cs, limit);
|
|
+
|
|
+ desc1 = ¤t->mm->context.user_cs;
|
|
+ desc2 = get_cpu_gdt_table(cpu) + GDT_ENTRY_DEFAULT_USER_CS;
|
|
+
|
|
+ if (__compare_user_cs_desc(desc1, desc2)) {
|
|
+ /*
|
|
+ * The CS was not in sync - reload it and retry the
|
|
+ * instruction. If the instruction still faults then
|
|
+ * we won't hit this branch next time around.
|
|
+ */
|
|
+ if (print_fatal_signals >= 2) {
|
|
+ printk(KERN_ERR "#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n",
|
|
+ error_code, error_code/8, regs->ip,
|
|
+ smp_processor_id());
|
|
+ printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x, CPU_cs: %08x/%08x.\n",
|
|
+ current->mm->context.exec_limit,
|
|
+ desc1->a, desc1->b, desc2->a, desc2->b);
|
|
+ }
|
|
+
|
|
+ load_user_cs_desc(cpu, current->mm);
|
|
+
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
#endif
|
|
|
|
static void __kprobes
|
|
@@ -273,6 +343,29 @@ do_general_protection(struct pt_regs *regs, long error_code)
|
|
if (!user_mode(regs))
|
|
goto gp_in_kernel;
|
|
|
|
+#ifdef CONFIG_X86_32
|
|
+{
|
|
+ int cpu;
|
|
+ int ok;
|
|
+
|
|
+ cpu = get_cpu();
|
|
+ ok = check_lazy_exec_limit(cpu, regs, error_code);
|
|
+ put_cpu();
|
|
+
|
|
+ if (ok)
|
|
+ return;
|
|
+
|
|
+ if (print_fatal_signals) {
|
|
+ printk(KERN_ERR "#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n",
|
|
+ error_code, error_code/8, regs->ip, smp_processor_id());
|
|
+ printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x.\n",
|
|
+ current->mm->context.exec_limit,
|
|
+ current->mm->context.user_cs.a,
|
|
+ current->mm->context.user_cs.b);
|
|
+ }
|
|
+}
|
|
+#endif /*CONFIG_X86_32*/
|
|
+
|
|
tsk->thread.error_code = error_code;
|
|
tsk->thread.trap_no = 13;
|
|
|
|
@@ -863,19 +956,37 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
|
}
|
|
|
|
#ifdef CONFIG_X86_32
|
|
+/*
|
|
+ * The fixup code for errors in iret jumps to here (iret_exc). It loses
|
|
+ * the original trap number and erorr code. The bogus trap 32 and error
|
|
+ * code 0 are what the vanilla kernel delivers via:
|
|
+ * DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
|
|
+ *
|
|
+ * NOTE: Because of the final "1" in the macro we need to enable interrupts.
|
|
+ *
|
|
+ * In case of a general protection fault in the iret instruction, we
|
|
+ * need to check for a lazy CS update for exec-shield.
|
|
+ */
|
|
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
|
|
{
|
|
- siginfo_t info;
|
|
+ int ok;
|
|
+ int cpu;
|
|
+
|
|
local_irq_enable();
|
|
|
|
- info.si_signo = SIGILL;
|
|
- info.si_errno = 0;
|
|
- info.si_code = ILL_BADSTK;
|
|
- info.si_addr = NULL;
|
|
- if (notify_die(DIE_TRAP, "iret exception",
|
|
- regs, error_code, 32, SIGILL) == NOTIFY_STOP)
|
|
- return;
|
|
- do_trap(32, SIGILL, "iret exception", regs, error_code, &info);
|
|
+ cpu = get_cpu();
|
|
+ ok = check_lazy_exec_limit(cpu, regs, error_code);
|
|
+ put_cpu();
|
|
+
|
|
+ if (!ok && notify_die(DIE_TRAP, "iret exception", regs,
|
|
+ error_code, 32, SIGSEGV) != NOTIFY_STOP) {
|
|
+ siginfo_t info;
|
|
+ info.si_signo = SIGSEGV;
|
|
+ info.si_errno = 0;
|
|
+ info.si_code = ILL_BADSTK;
|
|
+ info.si_addr = 0;
|
|
+ do_trap(32, SIGSEGV, "iret exception", regs, error_code, &info);
|
|
+ }
|
|
}
|
|
#endif
|
|
|
|
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
|
|
index 1dab519..360f39d 100644
|
|
--- a/arch/x86/mm/mmap.c
|
|
+++ b/arch/x86/mm/mmap.c
|
|
@@ -124,13 +124,16 @@ static unsigned long mmap_legacy_base(void)
|
|
*/
|
|
void arch_pick_mmap_layout(struct mm_struct *mm)
|
|
{
|
|
- if (mmap_is_legacy()) {
|
|
+ if (!(2 & exec_shield) && mmap_is_legacy()) {
|
|
mm->mmap_base = mmap_legacy_base();
|
|
mm->get_unmapped_area = arch_get_unmapped_area;
|
|
mm->unmap_area = arch_unmap_area;
|
|
} else {
|
|
mm->mmap_base = mmap_base();
|
|
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
|
|
+ if (!(current->personality & READ_IMPLIES_EXEC)
|
|
+ && mmap_is_ia32())
|
|
+ mm->get_unmapped_exec_area = arch_get_unmapped_exec_area;
|
|
mm->unmap_area = arch_unmap_area_topdown;
|
|
}
|
|
}
|
|
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
|
|
index a3250aa..e0d9cce 100644
|
|
--- a/arch/x86/mm/setup_nx.c
|
|
+++ b/arch/x86/mm/setup_nx.c
|
|
@@ -1,3 +1,4 @@
|
|
+#include <linux/sched.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/init.h>
|
|
@@ -23,6 +24,7 @@ static int __init noexec_setup(char *str)
|
|
disable_nx = 0;
|
|
} else if (!strncmp(str, "off", 3)) {
|
|
disable_nx = 1;
|
|
+ exec_shield = 0;
|
|
}
|
|
x86_configure_nx();
|
|
return 0;
|
|
@@ -40,6 +42,10 @@ void __cpuinit x86_configure_nx(void)
|
|
void __init x86_report_nx(void)
|
|
{
|
|
if (!cpu_has_nx) {
|
|
+ if (exec_shield)
|
|
+ printk(KERN_INFO "Using x86 segment limits to approximate NX protection\n");
|
|
+ else
|
|
+
|
|
printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
|
|
"missing in CPU or disabled in BIOS!\n");
|
|
} else {
|
|
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
|
|
index 426f3a1..e0286b1 100644
|
|
--- a/arch/x86/mm/tlb.c
|
|
+++ b/arch/x86/mm/tlb.c
|
|
@@ -6,6 +6,7 @@
|
|
#include <linux/interrupt.h>
|
|
#include <linux/module.h>
|
|
|
|
+#include <asm/desc.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/cache.h>
|
|
@@ -131,6 +132,12 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
|
|
union smp_flush_state *f;
|
|
|
|
cpu = smp_processor_id();
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ if (current->active_mm)
|
|
+ load_user_cs_desc(cpu, current->active_mm);
|
|
+#endif
|
|
+
|
|
/*
|
|
* orig_rax contains the negated interrupt vector.
|
|
* Use that to determine where the sender put the data.
|
|
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
|
|
index 02b442e..957bb67 100644
|
|
--- a/arch/x86/vdso/vdso32-setup.c
|
|
+++ b/arch/x86/vdso/vdso32-setup.c
|
|
@@ -331,7 +331,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
|
|
if (compat)
|
|
addr = VDSO_HIGH_BASE;
|
|
else {
|
|
- addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
|
|
+ addr = get_unmapped_area_prot(NULL, 0, PAGE_SIZE, 0, 0, 1);
|
|
if (IS_ERR_VALUE(addr)) {
|
|
ret = addr;
|
|
goto up_fail;
|
|
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
|
|
index b607239..e426a3f 100644
|
|
--- a/arch/x86/xen/enlighten.c
|
|
+++ b/arch/x86/xen/enlighten.c
|
|
@@ -334,6 +334,24 @@ static void xen_set_ldt(const void *addr, unsigned entries)
|
|
xen_mc_issue(PARAVIRT_LAZY_CPU);
|
|
}
|
|
|
|
+#ifdef CONFIG_X86_32
|
|
+static void xen_load_user_cs_desc(int cpu, struct mm_struct *mm)
|
|
+{
|
|
+ void *gdt;
|
|
+ xmaddr_t mgdt;
|
|
+ u64 descriptor;
|
|
+ struct desc_struct user_cs;
|
|
+
|
|
+ gdt = &get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS];
|
|
+ mgdt = virt_to_machine(gdt);
|
|
+
|
|
+ user_cs = mm->context.user_cs;
|
|
+ descriptor = (u64) user_cs.a | ((u64) user_cs.b) << 32;
|
|
+
|
|
+ HYPERVISOR_update_descriptor(mgdt.maddr, descriptor);
|
|
+}
|
|
+#endif /*CONFIG_X86_32*/
|
|
+
|
|
static void xen_load_gdt(const struct desc_ptr *dtr)
|
|
{
|
|
unsigned long va = dtr->address;
|
|
@@ -960,6 +978,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
|
|
|
|
.load_tr_desc = paravirt_nop,
|
|
.set_ldt = xen_set_ldt,
|
|
+#ifdef CONFIG_X86_32
|
|
+ .load_user_cs_desc = xen_load_user_cs_desc,
|
|
+#endif /*CONFIG_X86_32*/
|
|
.load_gdt = xen_load_gdt,
|
|
.load_idt = xen_load_idt,
|
|
.load_tls = xen_load_tls,
|
|
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
|
|
index 535e763..d114af6 100644
|
|
--- a/fs/binfmt_elf.c
|
|
+++ b/fs/binfmt_elf.c
|
|
@@ -74,7 +74,7 @@ static struct linux_binfmt elf_format = {
|
|
.hasvdso = 1
|
|
};
|
|
|
|
-#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
|
|
+#define BAD_ADDR(x) IS_ERR_VALUE(x)
|
|
|
|
static int set_brk(unsigned long start, unsigned long end)
|
|
{
|
|
@@ -701,6 +701,11 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
|
|
break;
|
|
}
|
|
|
|
+ if (current->personality == PER_LINUX && (exec_shield & 2)) {
|
|
+ executable_stack = EXSTACK_DISABLE_X;
|
|
+ current->flags |= PF_RANDOMIZE;
|
|
+ }
|
|
+
|
|
/* Some simple consistency checks for the interpreter */
|
|
if (elf_interpreter) {
|
|
retval = -ELIBBAD;
|
|
@@ -717,6 +722,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
|
|
if (retval)
|
|
goto out_free_dentry;
|
|
|
|
+#ifdef CONFIG_X86_32
|
|
+ /*
|
|
+ * Turn off the CS limit completely if exec-shield disabled or
|
|
+ * NX active:
|
|
+ */
|
|
+ if (!exec_shield || executable_stack != EXSTACK_DISABLE_X || (__supported_pte_mask & _PAGE_NX))
|
|
+ arch_add_exec_range(current->mm, -1);
|
|
+#endif
|
|
+
|
|
/* OK, This is the point of no return */
|
|
current->flags &= ~PF_FORKNOEXEC;
|
|
current->mm->def_flags = def_flags;
|
|
@@ -724,7 +738,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
|
|
/* Do this immediately, since STACK_TOP as used in setup_arg_pages
|
|
may depend on the personality. */
|
|
SET_PERSONALITY(loc->elf_ex);
|
|
- if (elf_read_implies_exec(loc->elf_ex, executable_stack))
|
|
+ if (!(exec_shield & 2) &&
|
|
+ elf_read_implies_exec(loc->elf_ex, executable_stack))
|
|
current->personality |= READ_IMPLIES_EXEC;
|
|
|
|
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
|
|
@@ -890,7 +905,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
|
|
interpreter,
|
|
&interp_map_addr,
|
|
load_bias);
|
|
- if (!IS_ERR((void *)elf_entry)) {
|
|
+ if (!BAD_ADDR(elf_entry)) {
|
|
/*
|
|
* load_elf_interp() returns relocation
|
|
* adjustment
|
|
diff --git a/include/linux/mm.h b/include/linux/mm.h
|
|
index e70f21b..44e6d63 100644
|
|
--- a/include/linux/mm.h
|
|
+++ b/include/linux/mm.h
|
|
@@ -1259,7 +1259,13 @@ extern int install_special_mapping(struct mm_struct *mm,
|
|
unsigned long addr, unsigned long len,
|
|
unsigned long flags, struct page **pages);
|
|
|
|
-extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
|
|
+extern unsigned long get_unmapped_area_prot(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, int);
|
|
+
|
|
+static inline unsigned long get_unmapped_area(struct file *file, unsigned long addr,
|
|
+ unsigned long len, unsigned long pgoff, unsigned long flags)
|
|
+{
|
|
+ return get_unmapped_area_prot(file, addr, len, pgoff, flags, 0);
|
|
+}
|
|
|
|
extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
|
|
unsigned long len, unsigned long prot,
|
|
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
|
|
index b8bb9a6..f478e39 100644
|
|
--- a/include/linux/mm_types.h
|
|
+++ b/include/linux/mm_types.h
|
|
@@ -227,6 +227,9 @@ struct mm_struct {
|
|
unsigned long (*get_unmapped_area) (struct file *filp,
|
|
unsigned long addr, unsigned long len,
|
|
unsigned long pgoff, unsigned long flags);
|
|
+ unsigned long (*get_unmapped_exec_area) (struct file *filp,
|
|
+ unsigned long addr, unsigned long len,
|
|
+ unsigned long pgoff, unsigned long flags);
|
|
void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
|
|
#endif
|
|
unsigned long mmap_base; /* base of mmap area */
|
|
diff --git a/include/linux/resource.h b/include/linux/resource.h
|
|
index f1e914e..d2aef9a 100644
|
|
--- a/include/linux/resource.h
|
|
+++ b/include/linux/resource.h
|
|
@@ -53,8 +53,11 @@ struct rlimit {
|
|
/*
|
|
* Limit the stack by to some sane default: root can always
|
|
* increase this limit if needed.. 8MB seems reasonable.
|
|
+ *
|
|
+ * (2MB more to cover randomization effects.)
|
|
*/
|
|
-#define _STK_LIM (8*1024*1024)
|
|
+#define _STK_LIM (10*1024*1024)
|
|
+#define EXEC_STACK_BIAS (2*1024*1024)
|
|
|
|
/*
|
|
* GPG2 wants 64kB of mlocked memory, to make sure pass phrases
|
|
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
|
index dad7f66..c5a3948 100644
|
|
--- a/include/linux/sched.h
|
|
+++ b/include/linux/sched.h
|
|
@@ -102,6 +102,9 @@ struct fs_struct;
|
|
struct bts_context;
|
|
struct perf_event_context;
|
|
|
|
+extern int exec_shield;
|
|
+extern int print_fatal_signals;
|
|
+
|
|
/*
|
|
* List of flags we want to share for kernel threads,
|
|
* if only because they are not used by them anyway.
|
|
@@ -390,6 +393,10 @@ extern void arch_pick_mmap_layout(struct mm_struct *mm);
|
|
extern unsigned long
|
|
arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
|
|
unsigned long, unsigned long);
|
|
+
|
|
+extern unsigned long
|
|
+arch_get_unmapped_exec_area(struct file *, unsigned long, unsigned long,
|
|
+ unsigned long, unsigned long);
|
|
extern unsigned long
|
|
arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
|
|
unsigned long len, unsigned long pgoff,
|
|
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
|
|
index 8686b0f..a4fad81 100644
|
|
--- a/kernel/sysctl.c
|
|
+++ b/kernel/sysctl.c
|
|
@@ -99,6 +99,26 @@ extern int sysctl_nr_open_min, sysctl_nr_open_max;
|
|
#ifndef CONFIG_MMU
|
|
extern int sysctl_nr_trim_pages;
|
|
#endif
|
|
+
|
|
+int exec_shield = (1<<0);
|
|
+/* exec_shield is a bitmask:
|
|
+ * 0: off; vdso at STACK_TOP, 1 page below TASK_SIZE
|
|
+ * (1<<0) 1: on [also on if !=0]
|
|
+ * (1<<1) 2: force noexecstack regardless of PT_GNU_STACK
|
|
+ * The old settings
|
|
+ * (1<<2) 4: vdso just below .text of main (unless too low)
|
|
+ * (1<<3) 8: vdso just below .text of PT_INTERP (unless too low)
|
|
+ * are ignored because the vdso is placed completely randomly
|
|
+ */
|
|
+
|
|
+static int __init setup_exec_shield(char *str)
|
|
+{
|
|
+ get_option(&str, &exec_shield);
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+__setup("exec-shield=", setup_exec_shield);
|
|
+
|
|
#ifdef CONFIG_BLOCK
|
|
extern int blk_iopoll_enabled;
|
|
#endif
|
|
@@ -400,6 +420,14 @@ static struct ctl_table kern_table[] = {
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec,
|
|
},
|
|
+ {
|
|
+ .procname = "exec-shield",
|
|
+ .data = &exec_shield,
|
|
+ .maxlen = sizeof(int),
|
|
+ .mode = 0644,
|
|
+ .proc_handler = &proc_dointvec,
|
|
+ },
|
|
+
|
|
#ifdef CONFIG_PROC_SYSCTL
|
|
{
|
|
.procname = "tainted",
|
|
diff --git a/mm/mmap.c b/mm/mmap.c
|
|
index 75557c6..8173284 100644
|
|
--- a/mm/mmap.c
|
|
+++ b/mm/mmap.c
|
|
@@ -28,6 +28,7 @@
|
|
#include <linux/rmap.h>
|
|
#include <linux/mmu_notifier.h>
|
|
#include <linux/perf_event.h>
|
|
+#include <linux/random.h>
|
|
|
|
#include <asm/uaccess.h>
|
|
#include <asm/cacheflush.h>
|
|
@@ -44,6 +45,18 @@
|
|
#define arch_rebalance_pgtables(addr, len) (addr)
|
|
#endif
|
|
|
|
+/* No sane architecture will #define these to anything else */
|
|
+#ifndef arch_add_exec_range
|
|
+#define arch_add_exec_range(mm, limit) do { ; } while (0)
|
|
+#endif
|
|
+#ifndef arch_flush_exec_range
|
|
+#define arch_flush_exec_range(mm) do { ; } while (0)
|
|
+#endif
|
|
+#ifndef arch_remove_exec_range
|
|
+#define arch_remove_exec_range(mm, limit) do { ; } while (0)
|
|
+#endif
|
|
+
|
|
+
|
|
static void unmap_region(struct mm_struct *mm,
|
|
struct vm_area_struct *vma, struct vm_area_struct *prev,
|
|
unsigned long start, unsigned long end);
|
|
@@ -388,6 +401,9 @@ static inline void
|
|
{
|
|
struct vm_area_struct *next;
|
|
|
|
+ if (vma->vm_flags & VM_EXEC)
|
|
+ arch_add_exec_range(mm, vma->vm_end);
|
|
+
|
|
vma->vm_prev = prev;
|
|
if (prev) {
|
|
next = prev->vm_next;
|
|
@@ -489,6 +504,8 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
rb_erase(&vma->vm_rb, &mm->mm_rb);
|
|
if (mm->mmap_cache == vma)
|
|
mm->mmap_cache = prev;
|
|
+ if (vma->vm_flags & VM_EXEC)
|
|
+ arch_remove_exec_range(mm, vma->vm_end);
|
|
}
|
|
|
|
/*
|
|
@@ -798,6 +815,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
|
|
} else /* cases 2, 5, 7 */
|
|
err = vma_adjust(prev, prev->vm_start,
|
|
end, prev->vm_pgoff, NULL);
|
|
+ if (prev->vm_flags & VM_EXEC)
|
|
+ arch_add_exec_range(mm, prev->vm_end);
|
|
if (err)
|
|
return NULL;
|
|
return prev;
|
|
@@ -952,7 +971,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
|
|
/* Obtain the address to map to. we verify (or select) it and ensure
|
|
* that it represents a valid section of the address space.
|
|
*/
|
|
- addr = get_unmapped_area(file, addr, len, pgoff, flags);
|
|
+ addr = get_unmapped_area_prot(file, addr, len, pgoff, flags,
|
|
+ prot & PROT_EXEC);
|
|
if (addr & ~PAGE_MASK)
|
|
return addr;
|
|
|
|
@@ -1504,8 +1524,8 @@ void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
|
|
}
|
|
|
|
unsigned long
|
|
-get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
|
|
- unsigned long pgoff, unsigned long flags)
|
|
+get_unmapped_area_prot(struct file *file, unsigned long addr, unsigned long len,
|
|
+ unsigned long pgoff, unsigned long flags, int exec)
|
|
{
|
|
unsigned long (*get_area)(struct file *, unsigned long,
|
|
unsigned long, unsigned long, unsigned long);
|
|
@@ -1518,7 +1538,11 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
|
|
if (len > TASK_SIZE)
|
|
return -ENOMEM;
|
|
|
|
- get_area = current->mm->get_unmapped_area;
|
|
+ if (exec && current->mm->get_unmapped_exec_area)
|
|
+ get_area = current->mm->get_unmapped_exec_area;
|
|
+ else
|
|
+ get_area = current->mm->get_unmapped_area;
|
|
+
|
|
if (file && file->f_op && file->f_op->get_unmapped_area)
|
|
get_area = file->f_op->get_unmapped_area;
|
|
addr = get_area(file, addr, len, pgoff, flags);
|
|
@@ -1532,8 +1556,83 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
|
|
|
|
return arch_rebalance_pgtables(addr, len);
|
|
}
|
|
+EXPORT_SYMBOL(get_unmapped_area_prot);
|
|
+
|
|
+static bool should_randomize(void)
|
|
+{
|
|
+ return (current->flags & PF_RANDOMIZE) &&
|
|
+ !(current->personality & ADDR_NO_RANDOMIZE);
|
|
+}
|
|
+
|
|
+#define SHLIB_BASE 0x00110000
|
|
+
|
|
+unsigned long
|
|
+arch_get_unmapped_exec_area(struct file *filp, unsigned long addr0,
|
|
+ unsigned long len0, unsigned long pgoff, unsigned long flags)
|
|
+{
|
|
+ unsigned long addr = addr0, len = len0;
|
|
+ struct mm_struct *mm = current->mm;
|
|
+ struct vm_area_struct *vma;
|
|
+ unsigned long tmp;
|
|
+
|
|
+ if (len > TASK_SIZE)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ if (flags & MAP_FIXED)
|
|
+ return addr;
|
|
+
|
|
+ if (!addr)
|
|
+ addr = !should_randomize() ? SHLIB_BASE :
|
|
+ randomize_range(SHLIB_BASE, 0x01000000, len);
|
|
+
|
|
+ if (addr) {
|
|
+ addr = PAGE_ALIGN(addr);
|
|
+ vma = find_vma(mm, addr);
|
|
+ if (TASK_SIZE - len >= addr &&
|
|
+ (!vma || addr + len <= vma->vm_start))
|
|
+ return addr;
|
|
+ }
|
|
+
|
|
+ addr = SHLIB_BASE;
|
|
+ for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
|
|
+ /* At this point: (!vma || addr < vma->vm_end). */
|
|
+ if (TASK_SIZE - len < addr)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ if (!vma || addr + len <= vma->vm_start) {
|
|
+ /*
|
|
+ * Must not let a PROT_EXEC mapping get into the
|
|
+ * brk area:
|
|
+ */
|
|
+ if (addr + len > mm->brk)
|
|
+ goto failed;
|
|
+
|
|
+ /*
|
|
+ * Up until the brk area we randomize addresses
|
|
+ * as much as possible:
|
|
+ */
|
|
+ if (addr >= 0x01000000 && should_randomize()) {
|
|
+ tmp = randomize_range(0x01000000,
|
|
+ PAGE_ALIGN(max(mm->start_brk,
|
|
+ (unsigned long)0x08000000)), len);
|
|
+ vma = find_vma(mm, tmp);
|
|
+ if (TASK_SIZE - len >= tmp &&
|
|
+ (!vma || tmp + len <= vma->vm_start))
|
|
+ return tmp;
|
|
+ }
|
|
+ /*
|
|
+ * Ok, randomization didnt work out - return
|
|
+ * the result of the linear search:
|
|
+ */
|
|
+ return addr;
|
|
+ }
|
|
+ addr = vma->vm_end;
|
|
+ }
|
|
+
|
|
+failed:
|
|
+ return current->mm->get_unmapped_area(filp, addr0, len0, pgoff, flags);
|
|
+}
|
|
|
|
-EXPORT_SYMBOL(get_unmapped_area);
|
|
|
|
/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
|
|
struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
|
|
@@ -1608,6 +1707,16 @@ out:
|
|
return prev ? prev->vm_next : vma;
|
|
}
|
|
|
|
+static int over_stack_limit(unsigned long sz)
|
|
+{
|
|
+ struct rlimit *rlim = current->signal->rlim;
|
|
+
|
|
+ if (sz < EXEC_STACK_BIAS)
|
|
+ return 0;
|
|
+ return (sz - EXEC_STACK_BIAS) >
|
|
+ ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur);
|
|
+}
|
|
+
|
|
/*
|
|
* Verify that the stack growth is acceptable and
|
|
* update accounting. This is shared with both the
|
|
@@ -1624,7 +1733,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
|
|
return -ENOMEM;
|
|
|
|
/* Stack limit test */
|
|
- if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
|
|
+ if (over_stack_limit(size))
|
|
return -ENOMEM;
|
|
|
|
/* mlock limit tests */
|
|
@@ -1936,10 +2045,14 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
|
|
if (new->vm_ops && new->vm_ops->open)
|
|
new->vm_ops->open(new);
|
|
|
|
- if (new_below)
|
|
+ if (new_below) {
|
|
+ unsigned long old_end = vma->vm_end;
|
|
+
|
|
err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
|
|
((addr - new->vm_start) >> PAGE_SHIFT), new);
|
|
- else
|
|
+ if (vma->vm_flags & VM_EXEC)
|
|
+ arch_remove_exec_range(mm, old_end);
|
|
+ } else
|
|
err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
|
|
|
|
/* Success. */
|
|
@@ -2223,6 +2336,7 @@ void exit_mmap(struct mm_struct *mm)
|
|
|
|
free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
|
|
tlb_finish_mmu(tlb, 0, end);
|
|
+ arch_flush_exec_range(mm);
|
|
|
|
/*
|
|
* Walk the list again, actually closing and freeing it,
|
|
diff --git a/mm/mprotect.c b/mm/mprotect.c
|
|
index 8bc969d..3c9b4fc 100644
|
|
--- a/mm/mprotect.c
|
|
+++ b/mm/mprotect.c
|
|
@@ -26,9 +26,14 @@
|
|
#include <linux/perf_event.h>
|
|
#include <asm/uaccess.h>
|
|
#include <asm/pgtable.h>
|
|
+#include <asm/pgalloc.h>
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/tlbflush.h>
|
|
|
|
+#ifndef arch_remove_exec_range
|
|
+#define arch_remove_exec_range(mm, limit) do { ; } while (0)
|
|
+#endif
|
|
+
|
|
#ifndef pgprot_modify
|
|
static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
|
|
{
|
|
@@ -139,7 +144,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
unsigned long oldflags = vma->vm_flags;
|
|
long nrpages = (end - start) >> PAGE_SHIFT;
|
|
- unsigned long charged = 0;
|
|
+ unsigned long charged = 0, old_end = vma->vm_end;
|
|
pgoff_t pgoff;
|
|
int error;
|
|
int dirty_accountable = 0;
|
|
@@ -204,6 +209,9 @@ success:
|
|
dirty_accountable = 1;
|
|
}
|
|
|
|
+ if (oldflags & VM_EXEC)
|
|
+ arch_remove_exec_range(current->mm, old_end);
|
|
+
|
|
mmu_notifier_invalidate_range_start(mm, start, end);
|
|
if (is_vm_hugetlb_page(vma))
|
|
hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
|
|
diff --git a/mm/mremap.c b/mm/mremap.c
|
|
index e9c75ef..0a5379f 100644
|
|
--- a/mm/mremap.c
|
|
+++ b/mm/mremap.c
|
|
@@ -488,10 +488,10 @@ unsigned long do_mremap(unsigned long addr,
|
|
if (vma->vm_flags & VM_MAYSHARE)
|
|
map_flags |= MAP_SHARED;
|
|
|
|
- new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
|
|
+ new_addr = get_unmapped_area_prot(vma->vm_file, 0, new_len,
|
|
vma->vm_pgoff +
|
|
((addr - vma->vm_start) >> PAGE_SHIFT),
|
|
- map_flags);
|
|
+ map_flags, vma->vm_flags & VM_EXEC);
|
|
if (new_addr & ~PAGE_MASK) {
|
|
ret = new_addr;
|
|
goto out;
|
|
--
|
|
1.7.0.1
|
|
|