kernel/linux-2.6-i386-nx-emulation.patch

--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -5,6 +5,7 @@
 #include <asm/ldt.h>
 #include <asm/mmu.h>
 #include <linux/smp.h>
+#include <linux/mm_types.h>
 
 static inline void fill_ldt(struct desc_struct *desc,
 			    const struct user_desc *info)
@@ -93,6 +94,9 @@ static inline int desc_empty(const void *ptr)
 
 #define load_TLS(t, cpu) native_load_tls(t, cpu)
 #define set_ldt native_set_ldt
+#ifdef CONFIG_X86_32
+#define load_user_cs_desc native_load_user_cs_desc
+#endif /*CONFIG_X86_32*/
 
 #define write_ldt_entry(dt, entry, desc)	\
 	native_write_ldt_entry(dt, entry, desc)
@@ -392,4 +396,25 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
 	_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
 }
 
+#ifdef CONFIG_X86_32
+static inline void set_user_cs(struct desc_struct *desc, unsigned long limit)
+{
+	limit = (limit - 1) / PAGE_SIZE;
+	desc->a = limit & 0xffff;
+	desc->b = (limit & 0xf0000) | 0x00c0fb00;
+}
+
+static inline void native_load_user_cs_desc(int cpu, struct mm_struct *mm)
+{
+	get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs;
+}
+
+#define arch_add_exec_range arch_add_exec_range
+#define arch_remove_exec_range arch_remove_exec_range
+#define arch_flush_exec_range arch_flush_exec_range
+extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit);
+extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit);
+extern void arch_flush_exec_range(struct mm_struct *mm);
+#endif /* CONFIG_X86_32 */
+
 #endif /* _ASM_X86_DESC_H */
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -7,12 +7,19 @@
 /*
  * The x86 doesn't have a mmu context, but
  * we put the segment information here.
+ *
+ * exec_limit is used to track the range PROT_EXEC
+ * mappings span.
  */
 typedef struct {
 	void *ldt;
 	int size;
 	struct mutex lock;
 	void *vdso;
+#ifdef CONFIG_X86_32
+	struct desc_struct user_cs;
+	unsigned long exec_limit;
+#endif
 } mm_context_t;
 
 #ifdef CONFIG_SMP
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -289,6 +289,12 @@ static inline void set_ldt(const void *addr, unsigned entries)
 {
 	PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
 }
+#ifdef CONFIG_X86_32
+static inline void load_user_cs_desc(unsigned int cpu, struct mm_struct *mm)
+{
+	PVOP_VCALL2(pv_cpu_ops.load_user_cs_desc, cpu, mm);
+}
+#endif /*CONFIG_X86_32*/
 static inline void store_gdt(struct desc_ptr *dtr)
 {
 	PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -118,6 +118,9 @@ struct pv_cpu_ops {
 	void (*store_gdt)(struct desc_ptr *);
 	void (*store_idt)(struct desc_ptr *);
 	void (*set_ldt)(const void *desc, unsigned entries);
+#ifdef CONFIG_X86_32
+	void (*load_user_cs_desc)(int cpu, struct mm_struct *mm);
+#endif
 	unsigned long (*store_tr)(void);
 	void (*load_tls)(struct thread_struct *t, unsigned int cpu);
 #ifdef CONFIG_X86_64
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -802,6 +802,22 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	/* Filter out anything that depends on CPUID levels we don't have */
 	filter_cpuid_features(c, true);
 
+#ifdef CONFIG_X86_32
+	/*
+	 *  emulation of NX with segment limits unfortunately means
+	 *  we have to disable the fast system calls, due to the way that
+	 *  sysexit clears the segment limits on return.
+	 *  If we have either disabled exec-shield on the boot command line,
+	 *  or we have NX, then we don't need to do this.
+	 */
+	if (!disable_nx) {
+#ifdef CONFIG_X86_PAE
+		if (!test_cpu_cap(c, X86_FEATURE_NX))
+#endif
+			clear_cpu_cap(c, X86_FEATURE_SEP);
+	}
+#endif
+
 	/* If the model name is still unset, do table lookup. */
 	if (!c->x86_model_id[0]) {
 		const char *p;
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -345,6 +345,9 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.read_tscp = native_read_tscp,
 	.load_tr_desc = native_load_tr_desc,
 	.set_ldt = native_set_ldt,
+#ifdef CONFIG_X86_32
+	.load_user_cs_desc = native_load_user_cs_desc,
+#endif /*CONFIG_X86_32*/
 	.load_gdt = native_load_gdt,
 	.load_idt = native_load_idt,
 	.store_gdt = native_store_gdt,
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -243,7 +243,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 void
 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 {
+	int cpu;
+
 	set_user_gs(regs, 0);
+
 	regs->fs		= 0;
 	set_fs(USER_DS);
 	regs->ds		= __USER_DS;
@@ -252,6 +255,11 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 	regs->cs		= __USER_CS;
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
+
+	cpu = get_cpu();
+	load_user_cs_desc(cpu, current->mm);
+	put_cpu();
+
 	/*
 	 * Free the old FP and other extended state
 	 */
@@ -311,6 +319,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	if (preload_fpu)
 		prefetch(next->fpu.state);
 
+	if (next_p->mm)
+		load_user_cs_desc(cpu, next_p->mm);
+
 	/*
 	 * Reload esp0.
 	 */
@@ -404,3 +415,40 @@ unsigned long get_wchan(struct task_struct *p)
 	return 0;
 }
 
+static void modify_cs(struct mm_struct *mm, unsigned long limit)
+{
+	mm->context.exec_limit = limit;
+	set_user_cs(&mm->context.user_cs, limit);
+	if (mm == current->mm) {
+		int cpu;
+
+		cpu = get_cpu();
+		load_user_cs_desc(cpu, mm);
+		put_cpu();
+	}
+}
+
+void arch_add_exec_range(struct mm_struct *mm, unsigned long limit)
+{
+	if (limit > mm->context.exec_limit)
+		modify_cs(mm, limit);
+}
+
+void arch_remove_exec_range(struct mm_struct *mm, unsigned long old_end)
+{
+	struct vm_area_struct *vma;
+	unsigned long limit = PAGE_SIZE;
+
+	if (old_end == mm->context.exec_limit) {
+		for (vma = mm->mmap; vma; vma = vma->vm_next)
+			if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
+				limit = vma->vm_end;
+		modify_cs(mm, limit);
+	}
+}
+
+void arch_flush_exec_range(struct mm_struct *mm)
+{
+	mm->context.exec_limit = 0;
+	set_user_cs(&mm->context.user_cs, 0);
+}
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -109,6 +109,78 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
 	dec_preempt_count();
 }
 
+#ifdef CONFIG_X86_32
+static inline int
+__compare_user_cs_desc(const struct desc_struct *desc1,
+	const struct desc_struct *desc2)
+{
+	return ((desc1->limit0 != desc2->limit0) ||
+		(desc1->limit != desc2->limit) ||
+		(desc1->base0 != desc2->base0) ||
+		(desc1->base1 != desc2->base1) ||
+		(desc1->base2 != desc2->base2));
+}
+
+/*
+ * lazy-check for CS validity on exec-shield binaries:
+ *
+ * the original non-exec stack patch was written by
+ * Solar Designer <solar at openwall.com>. Thanks!
+ */
+static int
+check_lazy_exec_limit(int cpu, struct pt_regs *regs, long error_code)
+{
+	struct desc_struct *desc1, *desc2;
+	struct vm_area_struct *vma;
+	unsigned long limit;
+
+	if (current->mm == NULL)
+		return 0;
+
+	limit = -1UL;
+	if (current->mm->context.exec_limit != -1UL) {
+		limit = PAGE_SIZE;
+		spin_lock(&current->mm->page_table_lock);
+		for (vma = current->mm->mmap; vma; vma = vma->vm_next)
+			if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
+				limit = vma->vm_end;
+		vma = get_gate_vma(current);
+		if (vma && (vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
+			limit = vma->vm_end;
+		spin_unlock(&current->mm->page_table_lock);
+		if (limit >= TASK_SIZE)
+			limit = -1UL;
+		current->mm->context.exec_limit = limit;
+	}
+	set_user_cs(&current->mm->context.user_cs, limit);
+
+	desc1 = &current->mm->context.user_cs;
+	desc2 = get_cpu_gdt_table(cpu) + GDT_ENTRY_DEFAULT_USER_CS;
+
+	if (__compare_user_cs_desc(desc1, desc2)) {
+		/*
+		 * The CS was not in sync - reload it and retry the
+		 * instruction. If the instruction still faults then
+		 * we won't hit this branch next time around.
+		 */
+		if (print_fatal_signals >= 2) {
+			printk(KERN_ERR "#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n",
+				error_code, error_code/8, regs->ip,
+				smp_processor_id());
+			printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x, CPU_cs: %08x/%08x.\n",
+				current->mm->context.exec_limit,
+				desc1->a, desc1->b, desc2->a, desc2->b);
+		}
+
+		load_user_cs_desc(cpu, current->mm);
+
+		return 1;
+	}
+
+	return 0;
+}
+#endif
+
 static void __kprobes
 do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
 	long error_code, siginfo_t *info)
@@ -265,6 +337,29 @@ do_general_protection(struct pt_regs *regs, long error_code)
 	if (!user_mode(regs))
 		goto gp_in_kernel;
 
+#ifdef CONFIG_X86_32
+{
+	int cpu;
+	int ok;
+
+	cpu = get_cpu();
+	ok = check_lazy_exec_limit(cpu, regs, error_code);
+	put_cpu();
+
+	if (ok)
+		return;
+
+	if (print_fatal_signals) {
+		printk(KERN_ERR "#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n",
+			error_code, error_code/8, regs->ip, smp_processor_id());
+		printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x.\n",
+			current->mm->context.exec_limit,
+			current->mm->context.user_cs.a,
+			current->mm->context.user_cs.b);
+	}
+}
+#endif /*CONFIG_X86_32*/
+
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_no = 13;
 
@@ -792,19 +887,37 @@ do_device_not_available(struct pt_regs *regs, long error_code)
 }
 
 #ifdef CONFIG_X86_32
+/*
+ * The fixup code for errors in iret jumps to here (iret_exc). It loses
+ * the original trap number and erorr code. The bogus trap 32 and error
+ * code 0 are what the vanilla kernel delivers via:
+ * DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
+ *
+ * NOTE: Because of the final "1" in the macro we need to enable interrupts.
+ *
+ * In case of a general protection fault in the iret instruction, we
+ * need to check for a lazy CS update for exec-shield.
+ */
 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 {
-	siginfo_t info;
+	int ok;
+	int cpu;
+
 	local_irq_enable();
 
-	info.si_signo = SIGILL;
-	info.si_errno = 0;
-	info.si_code = ILL_BADSTK;
-	info.si_addr = NULL;
-	if (notify_die(DIE_TRAP, "iret exception",
-			regs, error_code, 32, SIGILL) == NOTIFY_STOP)
-		return;
-	do_trap(32, SIGILL, "iret exception", regs, error_code, &info);
+	cpu = get_cpu();
+	ok = check_lazy_exec_limit(cpu, regs, error_code);
+	put_cpu();
+
+	if (!ok && notify_die(DIE_TRAP, "iret exception", regs,
+		error_code, 32, SIGSEGV) != NOTIFY_STOP) {
+			siginfo_t info;
+			info.si_signo = SIGSEGV;
+			info.si_errno = 0;
+			info.si_code = ILL_BADSTK;
+			info.si_addr = 0;
+			do_trap(32, SIGSEGV, "iret exception", regs, error_code, &info);
+	}
 }
 #endif
 
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -1,3 +1,4 @@
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/errno.h>
 #include <linux/init.h>
@@ -6,7 +6,7 @@
 #include <asm/pgtable.h>
 #include <asm/proto.h>

-static int disable_nx __cpuinitdata;
+int disable_nx __cpuinitdata;

 /*
  * noexec = on|off
@@ -40,6 +42,10 @@ void __cpuinit x86_configure_nx(void)
 void __init x86_report_nx(void)
 {
 	if (!cpu_has_nx) {
+		if (disable_nx)
+			printk(KERN_INFO "Using x86 segment limits to approximate NX protection\n");
+		else
+
 		printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
 		       "missing in CPU or disabled in BIOS!\n");
 	} else {
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -6,6 +6,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 
+#include <asm/desc.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/cache.h>
@@ -131,6 +132,12 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
 	union smp_flush_state *f;
 
 	cpu = smp_processor_id();
+
+#ifdef CONFIG_X86_32
+	if (current->active_mm)
+		load_user_cs_desc(cpu, current->active_mm);
+#endif
+
 	/*
 	 * orig_rax contains the negated interrupt vector.
 	 * Use that to determine where the sender put the data.
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -335,6 +335,24 @@ static void xen_set_ldt(const void *addr, unsigned entries)
 	xen_mc_issue(PARAVIRT_LAZY_CPU);
 }
 
+#ifdef CONFIG_X86_32
+static void xen_load_user_cs_desc(int cpu, struct mm_struct *mm)
+{
+	void *gdt;
+	xmaddr_t mgdt;
+	u64 descriptor;
+	struct desc_struct user_cs;
+
+	gdt = &get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS];
+	mgdt = virt_to_machine(gdt);
+
+	user_cs = mm->context.user_cs;
+	descriptor = (u64) user_cs.a | ((u64) user_cs.b) << 32;
+
+	HYPERVISOR_update_descriptor(mgdt.maddr, descriptor);
+}
+#endif /*CONFIG_X86_32*/
+
 static void xen_load_gdt(const struct desc_ptr *dtr)
 {
 	unsigned long va = dtr->address;
@@ -961,6 +979,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 
 	.load_tr_desc = paravirt_nop,
 	.set_ldt = xen_set_ldt,
+#ifdef CONFIG_X86_32
+	.load_user_cs_desc = xen_load_user_cs_desc,
+#endif /*CONFIG_X86_32*/
 	.load_gdt = xen_load_gdt,
 	.load_idt = xen_load_idt,
 	.load_tls = xen_load_tls,
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -717,6 +722,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	if (retval)
 		goto out_free_dentry;
 
+#ifdef CONFIG_X86_32
+	/*
+	 * Turn off the CS limit completely if exec-shield disabled or
+	 * NX active:
+	 */
+	if (disable_nx || executable_stack != EXSTACK_DISABLE_X || (__supported_pte_mask & _PAGE_NX))
+		arch_add_exec_range(current->mm, -1);
+#endif
+
 	/* OK, This is the point of no return */
 	current->flags &= ~PF_FORKNOEXEC;
 	current->mm->def_flags = def_flags;
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -101,6 +101,9 @@ struct bio_list;
 struct fs_struct;
 struct perf_event_context;
 
+extern int disable_nx;
+extern int print_fatal_signals;
+
 /*
  * List of flags we want to share for kernel threads,
  * if only because they are not used by them anyway.
--- b/mm/mmap.c
+++ b/mm/mmap.c
@@ -44,6 +45,18 @@
 #define arch_rebalance_pgtables(addr, len)		(addr)
 #endif
 
+/* No sane architecture will #define these to anything else */
+#ifndef arch_add_exec_range
+#define arch_add_exec_range(mm, limit)	do { ; } while (0)
+#endif
+#ifndef arch_flush_exec_range
+#define arch_flush_exec_range(mm)	do { ; } while (0)
+#endif
+#ifndef arch_remove_exec_range
+#define arch_remove_exec_range(mm, limit)	do { ; } while (0)
+#endif
+
+
 static void unmap_region(struct mm_struct *mm,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
 		unsigned long start, unsigned long end);
@@ -388,6 +401,9 @@
 {
 	struct vm_area_struct *next;
 
+	if (vma->vm_flags & VM_EXEC)
+		arch_add_exec_range(mm, vma->vm_end);
+
 	vma->vm_prev = prev;
 	if (prev) {
 		next = prev->vm_next;
@@ -489,6 +504,8 @@
 	rb_erase(&vma->vm_rb, &mm->mm_rb);
 	if (mm->mmap_cache == vma)
 		mm->mmap_cache = prev;
+	if (vma->vm_flags & VM_EXEC)
+		arch_remove_exec_range(mm, vma->vm_end);
 }
 
 /*
@@ -790,6 +807,8 @@
 		} else					/* cases 2, 5, 7 */
 			err = vma_adjust(prev, prev->vm_start,
 				end, prev->vm_pgoff, NULL);
+		if (prev->vm_flags & VM_EXEC)
+			arch_add_exec_range(mm, prev->vm_end);
 		if (err)
 			return NULL;
 		return prev;
@@ -1966,10 +2075,14 @@
 	if (new->vm_ops && new->vm_ops->open)
 		new->vm_ops->open(new);
 
-	if (new_below)
+	if (new_below) {
+		unsigned long old_end = vma->vm_end;
+
 		err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
 			((addr - new->vm_start) >> PAGE_SHIFT), new);
-	else
+		if (vma->vm_flags & VM_EXEC)
+			arch_remove_exec_range(mm, old_end);
+	} else
 		err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
 
 	/* Success. */
@@ -2254,6 +2367,7 @@
 
 	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
 	tlb_finish_mmu(tlb, 0, end);
+	arch_flush_exec_range(mm);
 
 	/*
 	 * Walk the list again, actually closing and freeing it,
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -25,9 +25,14 @@
 #include <linux/perf_event.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
+#include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#ifndef arch_remove_exec_range
+#define arch_remove_exec_range(mm, limit)      do { ; } while (0)
+#endif
+
 #ifndef pgprot_modify
 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 {
@@ -138,7 +143,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long oldflags = vma->vm_flags;
 	long nrpages = (end - start) >> PAGE_SHIFT;
-	unsigned long charged = 0;
+	unsigned long charged = 0, old_end = vma->vm_end;
 	pgoff_t pgoff;
 	int error;
 	int dirty_accountable = 0;
@@ -203,6 +208,9 @@ success:
 		dirty_accountable = 1;
 	}
 
+	if (oldflags & VM_EXEC)
+		arch_remove_exec_range(current->mm, old_end);
+
 	mmu_notifier_invalidate_range_start(mm, start, end);
 	if (is_vm_hugetlb_page(vma))
 		hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 57d1868..29c0c35 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -669,6 +669,16 @@ unsigned long arch_align_stack(unsigned long sp)
 unsigned long arch_randomize_brk(struct mm_struct *mm)
 {
 	unsigned long range_end = mm->brk + 0x02000000;
-	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+	unsigned long bump = 0;
+#ifdef CONFIG_X86_32
+	/* in the case of NX emulation, shove the brk segment way out of the
+	   way of the exec randomization area, since it can collide with
+	   future allocations if not. */
+	if ( (mm->get_unmapped_exec_area == arch_get_unmapped_exec_area) &&
+	     (mm->brk < 0x08000000) ) {
+		bump = (TASK_SIZE/6);
+	}
+#endif
+	return bump + (randomize_range(mm->brk, range_end, 0) ? : mm->brk);
 }
initial srpm import 2010-07-29 23:46:31 +00:00			`--- a/arch/x86/include/asm/desc.h`
			`+++ b/arch/x86/include/asm/desc.h`
			`@@ -5,6 +5,7 @@`
			`#include <asm/ldt.h>`
			`#include <asm/mmu.h>`
			`#include <linux/smp.h>`
			`+#include <linux/mm_types.h>`

			`static inline void fill_ldt(struct desc_struct *desc,`
			`const struct user_desc *info)`
			`@@ -93,6 +94,9 @@ static inline int desc_empty(const void *ptr)`

			`#define load_TLS(t, cpu) native_load_tls(t, cpu)`
			`#define set_ldt native_set_ldt`
			`+#ifdef CONFIG_X86_32`
			`+#define load_user_cs_desc native_load_user_cs_desc`
			`+#endif /CONFIG_X86_32/`

			`#define write_ldt_entry(dt, entry, desc) \`
			`native_write_ldt_entry(dt, entry, desc)`
			`@@ -392,4 +396,25 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)`
			`_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);`
			`}`

			`+#ifdef CONFIG_X86_32`
			`+static inline void set_user_cs(struct desc_struct *desc, unsigned long limit)`
			`+{`
			`+ limit = (limit - 1) / PAGE_SIZE;`
			`+ desc->a = limit & 0xffff;`
			`+ desc->b = (limit & 0xf0000) \| 0x00c0fb00;`
			`+}`
			`+`
			`+static inline void native_load_user_cs_desc(int cpu, struct mm_struct *mm)`
			`+{`
			`+ get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs;`
			`+}`
			`+`
			`+#define arch_add_exec_range arch_add_exec_range`
			`+#define arch_remove_exec_range arch_remove_exec_range`
			`+#define arch_flush_exec_range arch_flush_exec_range`
			`+extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit);`
			`+extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit);`
			`+extern void arch_flush_exec_range(struct mm_struct *mm);`
			`+#endif /* CONFIG_X86_32 */`
			`+`
			`#endif /* _ASM_X86_DESC_H */`
			`--- a/arch/x86/include/asm/mmu.h`
			`+++ b/arch/x86/include/asm/mmu.h`
			`@@ -7,12 +7,19 @@`
			`/*`
			`* The x86 doesn't have a mmu context, but`
			`* we put the segment information here.`
			`+ *`
			`+ * exec_limit is used to track the range PROT_EXEC`
			`+ * mappings span.`
			`*/`
			`typedef struct {`
			`void *ldt;`
			`int size;`
			`struct mutex lock;`
			`void *vdso;`
			`+#ifdef CONFIG_X86_32`
			`+ struct desc_struct user_cs;`
			`+ unsigned long exec_limit;`
			`+#endif`
			`} mm_context_t;`

			`#ifdef CONFIG_SMP`
			`--- a/arch/x86/include/asm/paravirt.h`
			`+++ b/arch/x86/include/asm/paravirt.h`
			`@@ -289,6 +289,12 @@ static inline void set_ldt(const void *addr, unsigned entries)`
			`{`
			`PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);`
			`}`
			`+#ifdef CONFIG_X86_32`
			`+static inline void load_user_cs_desc(unsigned int cpu, struct mm_struct *mm)`
			`+{`
			`+ PVOP_VCALL2(pv_cpu_ops.load_user_cs_desc, cpu, mm);`
			`+}`
			`+#endif /CONFIG_X86_32/`
			`static inline void store_gdt(struct desc_ptr *dtr)`
			`{`
			`PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);`
			`--- a/arch/x86/include/asm/paravirt_types.h`
			`+++ b/arch/x86/include/asm/paravirt_types.h`
			`@@ -118,6 +118,9 @@ struct pv_cpu_ops {`
			`void (store_gdt)(struct desc_ptr );`
			`void (store_idt)(struct desc_ptr );`
			`void (set_ldt)(const void desc, unsigned entries);`
			`+#ifdef CONFIG_X86_32`
			`+ void (load_user_cs_desc)(int cpu, struct mm_struct mm);`
			`+#endif`
			`unsigned long (*store_tr)(void);`
			`void (load_tls)(struct thread_struct t, unsigned int cpu);`
			`#ifdef CONFIG_X86_64`
			`--- a/arch/x86/kernel/cpu/common.c`
			`+++ b/arch/x86/kernel/cpu/common.c`
			`@@ -802,6 +802,22 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)`
			`/* Filter out anything that depends on CPUID levels we don't have */`
			`filter_cpuid_features(c, true);`

			`+#ifdef CONFIG_X86_32`
			`+ /*`
			`+ * emulation of NX with segment limits unfortunately means`
			`+ * we have to disable the fast system calls, due to the way that`
			`+ * sysexit clears the segment limits on return.`
			`+ * If we have either disabled exec-shield on the boot command line,`
			`+ * or we have NX, then we don't need to do this.`
			`+ */`
Remove the execshield boot parameter. Based on a patch from Kees Cook 2010-09-03 15:22:06 +00:00			`+ if (!disable_nx) {`
initial srpm import 2010-07-29 23:46:31 +00:00			`+#ifdef CONFIG_X86_PAE`
			`+ if (!test_cpu_cap(c, X86_FEATURE_NX))`
			`+#endif`
			`+ clear_cpu_cap(c, X86_FEATURE_SEP);`
			`+ }`
			`+#endif`
			`+`
			`/* If the model name is still unset, do table lookup. */`
			`if (!c->x86_model_id[0]) {`
			`const char *p;`
			`--- a/arch/x86/kernel/paravirt.c`
			`+++ b/arch/x86/kernel/paravirt.c`
			`@@ -345,6 +345,9 @@ struct pv_cpu_ops pv_cpu_ops = {`
			`.read_tscp = native_read_tscp,`
			`.load_tr_desc = native_load_tr_desc,`
			`.set_ldt = native_set_ldt,`
			`+#ifdef CONFIG_X86_32`
			`+ .load_user_cs_desc = native_load_user_cs_desc,`
			`+#endif /CONFIG_X86_32/`
			`.load_gdt = native_load_gdt,`
			`.load_idt = native_load_idt,`
			`.store_gdt = native_store_gdt,`
			`--- a/arch/x86/kernel/process_32.c`
			`+++ b/arch/x86/kernel/process_32.c`
			`@@ -243,7 +243,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,`
			`void`
			`start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)`
			`{`
			`+ int cpu;`
			`+`
			`set_user_gs(regs, 0);`
			`+`
			`regs->fs = 0;`
			`set_fs(USER_DS);`
			`regs->ds = __USER_DS;`
			`@@ -252,6 +255,11 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)`
			`regs->cs = __USER_CS;`
			`regs->ip = new_ip;`
			`regs->sp = new_sp;`
			`+`
			`+ cpu = get_cpu();`
			`+ load_user_cs_desc(cpu, current->mm);`
			`+ put_cpu();`
			`+`
			`/*`
			`* Free the old FP and other extended state`
			`*/`
			`@@ -311,6 +319,9 @@ __switch_to(struct task_struct prev_p, struct task_struct next_p)`
			`if (preload_fpu)`
			`prefetch(next->fpu.state);`

			`+ if (next_p->mm)`
			`+ load_user_cs_desc(cpu, next_p->mm);`
			`+`
			`/*`
			`* Reload esp0.`
			`*/`
			`@@ -404,3 +415,40 @@ unsigned long get_wchan(struct task_struct *p)`
			`return 0;`
			`}`

			`+static void modify_cs(struct mm_struct *mm, unsigned long limit)`
			`+{`
			`+ mm->context.exec_limit = limit;`
			`+ set_user_cs(&mm->context.user_cs, limit);`
			`+ if (mm == current->mm) {`
			`+ int cpu;`
			`+`
			`+ cpu = get_cpu();`
			`+ load_user_cs_desc(cpu, mm);`
			`+ put_cpu();`
			`+ }`
			`+}`
			`+`
			`+void arch_add_exec_range(struct mm_struct *mm, unsigned long limit)`
			`+{`
			`+ if (limit > mm->context.exec_limit)`
			`+ modify_cs(mm, limit);`
			`+}`
			`+`
			`+void arch_remove_exec_range(struct mm_struct *mm, unsigned long old_end)`
			`+{`
			`+ struct vm_area_struct *vma;`
			`+ unsigned long limit = PAGE_SIZE;`
			`+`
			`+ if (old_end == mm->context.exec_limit) {`
			`+ for (vma = mm->mmap; vma; vma = vma->vm_next)`
			`+ if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))`
			`+ limit = vma->vm_end;`
			`+ modify_cs(mm, limit);`
			`+ }`
			`+}`
			`+`
			`+void arch_flush_exec_range(struct mm_struct *mm)`
			`+{`
			`+ mm->context.exec_limit = 0;`
			`+ set_user_cs(&mm->context.user_cs, 0);`
			`+}`
			`--- a/arch/x86/kernel/traps.c`
			`+++ b/arch/x86/kernel/traps.c`
			`@@ -109,6 +109,78 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)`
			`dec_preempt_count();`
			`}`

			`+#ifdef CONFIG_X86_32`
			`+static inline int`
			`+__compare_user_cs_desc(const struct desc_struct *desc1,`
			`+ const struct desc_struct *desc2)`
			`+{`
			`+ return ((desc1->limit0 != desc2->limit0) \|\|`
			`+ (desc1->limit != desc2->limit) \|\|`
			`+ (desc1->base0 != desc2->base0) \|\|`
			`+ (desc1->base1 != desc2->base1) \|\|`
			`+ (desc1->base2 != desc2->base2));`
			`+}`
			`+`
			`+/*`
			`+ * lazy-check for CS validity on exec-shield binaries:`
			`+ *`
			`+ * the original non-exec stack patch was written by`
			`+ * Solar Designer <solar at openwall.com>. Thanks!`
			`+ */`
			`+static int`
			`+check_lazy_exec_limit(int cpu, struct pt_regs *regs, long error_code)`
			`+{`
			`+ struct desc_struct desc1, desc2;`
			`+ struct vm_area_struct *vma;`
			`+ unsigned long limit;`
			`+`
			`+ if (current->mm == NULL)`
			`+ return 0;`
			`+`
			`+ limit = -1UL;`
			`+ if (current->mm->context.exec_limit != -1UL) {`
			`+ limit = PAGE_SIZE;`
			`+ spin_lock(&current->mm->page_table_lock);`
			`+ for (vma = current->mm->mmap; vma; vma = vma->vm_next)`
			`+ if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))`
			`+ limit = vma->vm_end;`
			`+ vma = get_gate_vma(current);`
			`+ if (vma && (vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))`
			`+ limit = vma->vm_end;`
			`+ spin_unlock(&current->mm->page_table_lock);`
			`+ if (limit >= TASK_SIZE)`
			`+ limit = -1UL;`
			`+ current->mm->context.exec_limit = limit;`
			`+ }`
			`+ set_user_cs(&current->mm->context.user_cs, limit);`
			`+`
			`+ desc1 = &current->mm->context.user_cs;`
			`+ desc2 = get_cpu_gdt_table(cpu) + GDT_ENTRY_DEFAULT_USER_CS;`
			`+`
			`+ if (__compare_user_cs_desc(desc1, desc2)) {`
			`+ /*`
			`+ * The CS was not in sync - reload it and retry the`
			`+ * instruction. If the instruction still faults then`
			`+ * we won't hit this branch next time around.`
			`+ */`
			`+ if (print_fatal_signals >= 2) {`
			`+ printk(KERN_ERR "#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n",`
			`+ error_code, error_code/8, regs->ip,`
			`+ smp_processor_id());`
			`+ printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x, CPU_cs: %08x/%08x.\n",`
			`+ current->mm->context.exec_limit,`
			`+ desc1->a, desc1->b, desc2->a, desc2->b);`
			`+ }`
			`+`
			`+ load_user_cs_desc(cpu, current->mm);`
			`+`
			`+ return 1;`
			`+ }`
			`+`
			`+ return 0;`
			`+}`
			`+#endif`
			`+`
			`static void __kprobes`
			`do_trap(int trapnr, int signr, char str, struct pt_regs regs,`
			`long error_code, siginfo_t *info)`
			`@@ -265,6 +337,29 @@ do_general_protection(struct pt_regs *regs, long error_code)`
			`if (!user_mode(regs))`
			`goto gp_in_kernel;`

			`+#ifdef CONFIG_X86_32`
			`+{`
			`+ int cpu;`
			`+ int ok;`
			`+`
			`+ cpu = get_cpu();`
			`+ ok = check_lazy_exec_limit(cpu, regs, error_code);`
			`+ put_cpu();`
			`+`
			`+ if (ok)`
			`+ return;`
			`+`
			`+ if (print_fatal_signals) {`
			`+ printk(KERN_ERR "#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n",`
			`+ error_code, error_code/8, regs->ip, smp_processor_id());`
			`+ printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x.\n",`
			`+ current->mm->context.exec_limit,`
			`+ current->mm->context.user_cs.a,`
			`+ current->mm->context.user_cs.b);`
			`+ }`
			`+}`
			`+#endif /CONFIG_X86_32/`
			`+`
			`tsk->thread.error_code = error_code;`
			`tsk->thread.trap_no = 13;`

			`@@ -792,19 +887,37 @@ do_device_not_available(struct pt_regs *regs, long error_code)`
			`}`

			`#ifdef CONFIG_X86_32`
			`+/*`
			`+ * The fixup code for errors in iret jumps to here (iret_exc). It loses`
			`+ * the original trap number and erorr code. The bogus trap 32 and error`
			`+ * code 0 are what the vanilla kernel delivers via:`
			`+ * DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)`
			`+ *`
			`+ * NOTE: Because of the final "1" in the macro we need to enable interrupts.`
			`+ *`
			`+ * In case of a general protection fault in the iret instruction, we`
			`+ * need to check for a lazy CS update for exec-shield.`
			`+ */`
			`dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)`
			`{`
			`- siginfo_t info;`
			`+ int ok;`
			`+ int cpu;`
			`+`
			`local_irq_enable();`

			`- info.si_signo = SIGILL;`
			`- info.si_errno = 0;`
			`- info.si_code = ILL_BADSTK;`
			`- info.si_addr = NULL;`
			`- if (notify_die(DIE_TRAP, "iret exception",`
			`- regs, error_code, 32, SIGILL) == NOTIFY_STOP)`
			`- return;`
			`- do_trap(32, SIGILL, "iret exception", regs, error_code, &info);`
			`+ cpu = get_cpu();`
			`+ ok = check_lazy_exec_limit(cpu, regs, error_code);`
			`+ put_cpu();`
			`+`
			`+ if (!ok && notify_die(DIE_TRAP, "iret exception", regs,`
			`+ error_code, 32, SIGSEGV) != NOTIFY_STOP) {`
			`+ siginfo_t info;`
			`+ info.si_signo = SIGSEGV;`
			`+ info.si_errno = 0;`
			`+ info.si_code = ILL_BADSTK;`
			`+ info.si_addr = 0;`
			`+ do_trap(32, SIGSEGV, "iret exception", regs, error_code, &info);`
			`+ }`
			`}`
			`#endif`

			`--- a/arch/x86/mm/setup_nx.c`
			`+++ b/arch/x86/mm/setup_nx.c`
			`@@ -1,3 +1,4 @@`
			`+#include <linux/sched.h>`
			`#include <linux/spinlock.h>`
			`#include <linux/errno.h>`
			`#include <linux/init.h>`
Remove the execshield boot parameter. Based on a patch from Kees Cook 2010-09-03 15:22:06 +00:00			`@@ -6,7 +6,7 @@`
			`#include <asm/pgtable.h>`
			`#include <asm/proto.h>`

			`-static int disable_nx __cpuinitdata;`
			`+int disable_nx __cpuinitdata;`

			`/*`
			`* noexec = on\|off`
initial srpm import 2010-07-29 23:46:31 +00:00			`@@ -40,6 +42,10 @@ void __cpuinit x86_configure_nx(void)`
			`void __init x86_report_nx(void)`
			`{`
			`if (!cpu_has_nx) {`
Remove the execshield boot parameter. Based on a patch from Kees Cook 2010-09-03 15:22:06 +00:00			`+ if (disable_nx)`
initial srpm import 2010-07-29 23:46:31 +00:00			`+ printk(KERN_INFO "Using x86 segment limits to approximate NX protection\n");`
			`+ else`
			`+`
			`printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "`
			`"missing in CPU or disabled in BIOS!\n");`
			`} else {`
			`--- a/arch/x86/mm/tlb.c`
			`+++ b/arch/x86/mm/tlb.c`
			`@@ -6,6 +6,7 @@`
			`#include <linux/interrupt.h>`
			`#include <linux/module.h>`

			`+#include <asm/desc.h>`
			`#include <asm/tlbflush.h>`
			`#include <asm/mmu_context.h>`
			`#include <asm/cache.h>`
			`@@ -131,6 +132,12 @@ void smp_invalidate_interrupt(struct pt_regs *regs)`
			`union smp_flush_state *f;`

			`cpu = smp_processor_id();`
			`+`
			`+#ifdef CONFIG_X86_32`
			`+ if (current->active_mm)`
			`+ load_user_cs_desc(cpu, current->active_mm);`
			`+#endif`
			`+`
			`/*`
			`* orig_rax contains the negated interrupt vector.`
			`* Use that to determine where the sender put the data.`
			`--- a/arch/x86/xen/enlighten.c`
			`+++ b/arch/x86/xen/enlighten.c`
			`@@ -335,6 +335,24 @@ static void xen_set_ldt(const void *addr, unsigned entries)`
			`xen_mc_issue(PARAVIRT_LAZY_CPU);`
			`}`

			`+#ifdef CONFIG_X86_32`
			`+static void xen_load_user_cs_desc(int cpu, struct mm_struct *mm)`
			`+{`
			`+ void *gdt;`
			`+ xmaddr_t mgdt;`
			`+ u64 descriptor;`
			`+ struct desc_struct user_cs;`
			`+`
			`+ gdt = &get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS];`
			`+ mgdt = virt_to_machine(gdt);`
			`+`
			`+ user_cs = mm->context.user_cs;`
			`+ descriptor = (u64) user_cs.a \| ((u64) user_cs.b) << 32;`
			`+`
			`+ HYPERVISOR_update_descriptor(mgdt.maddr, descriptor);`
			`+}`
			`+#endif /CONFIG_X86_32/`
			`+`
			`static void xen_load_gdt(const struct desc_ptr *dtr)`
			`{`
			`unsigned long va = dtr->address;`
			`@@ -961,6 +979,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {`

			`.load_tr_desc = paravirt_nop,`
			`.set_ldt = xen_set_ldt,`
			`+#ifdef CONFIG_X86_32`
			`+ .load_user_cs_desc = xen_load_user_cs_desc,`
			`+#endif /CONFIG_X86_32/`
			`.load_gdt = xen_load_gdt,`
			`.load_idt = xen_load_idt,`
			`.load_tls = xen_load_tls,`
			`--- a/fs/binfmt_elf.c`
			`+++ b/fs/binfmt_elf.c`
			`@@ -717,6 +722,15 @@ static int load_elf_binary(struct linux_binprm bprm, struct pt_regs regs)`
			`if (retval)`
			`goto out_free_dentry;`

			`+#ifdef CONFIG_X86_32`
			`+ /*`
			`+ * Turn off the CS limit completely if exec-shield disabled or`
			`+ * NX active:`
			`+ */`
Remove the execshield boot parameter. Based on a patch from Kees Cook 2010-09-03 15:22:06 +00:00			`+ if (disable_nx \|\| executable_stack != EXSTACK_DISABLE_X \|\| (__supported_pte_mask & _PAGE_NX))`
initial srpm import 2010-07-29 23:46:31 +00:00			`+ arch_add_exec_range(current->mm, -1);`
			`+#endif`
			`+`
			`/* OK, This is the point of no return */`
			`current->flags &= ~PF_FORKNOEXEC;`
			`current->mm->def_flags = def_flags;`
			`--- a/include/linux/sched.h`
			`+++ b/include/linux/sched.h`
			`@@ -101,6 +101,9 @@ struct bio_list;`
			`struct fs_struct;`
			`struct perf_event_context;`

Remove the execshield boot parameter. Based on a patch from Kees Cook 2010-09-03 15:22:06 +00:00			`+extern int disable_nx;`
initial srpm import 2010-07-29 23:46:31 +00:00			`+extern int print_fatal_signals;`
			`+`
			`/*`
			`* List of flags we want to share for kernel threads,`
			`* if only because they are not used by them anyway.`
			`--- b/mm/mmap.c`
			`+++ b/mm/mmap.c`
			`@@ -44,6 +45,18 @@`
			`#define arch_rebalance_pgtables(addr, len) (addr)`
			`#endif`

			`+/* No sane architecture will #define these to anything else */`
			`+#ifndef arch_add_exec_range`
			`+#define arch_add_exec_range(mm, limit) do { ; } while (0)`
			`+#endif`
			`+#ifndef arch_flush_exec_range`
			`+#define arch_flush_exec_range(mm) do { ; } while (0)`
			`+#endif`
			`+#ifndef arch_remove_exec_range`
			`+#define arch_remove_exec_range(mm, limit) do { ; } while (0)`
			`+#endif`
			`+`
			`+`
			`static void unmap_region(struct mm_struct *mm,`
			`struct vm_area_struct vma, struct vm_area_struct prev,`
			`unsigned long start, unsigned long end);`
Linux 2.6.36-rc1-git4 2010-08-22 12:22:19 +00:00			`@@ -388,6 +401,9 @@`
initial srpm import 2010-07-29 23:46:31 +00:00			`{`
Linux 2.6.36-rc1-git4 2010-08-22 12:22:19 +00:00			`struct vm_area_struct *next;`

initial srpm import 2010-07-29 23:46:31 +00:00			`+ if (vma->vm_flags & VM_EXEC)`
			`+ arch_add_exec_range(mm, vma->vm_end);`
Linux 2.6.36-rc1-git4 2010-08-22 12:22:19 +00:00			`+`
			`vma->vm_prev = prev;`
initial srpm import 2010-07-29 23:46:31 +00:00			`if (prev) {`
Linux 2.6.36-rc1-git4 2010-08-22 12:22:19 +00:00			`next = prev->vm_next;`
initial srpm import 2010-07-29 23:46:31 +00:00			`@@ -489,6 +504,8 @@`
			`rb_erase(&vma->vm_rb, &mm->mm_rb);`
			`if (mm->mmap_cache == vma)`
			`mm->mmap_cache = prev;`
			`+ if (vma->vm_flags & VM_EXEC)`
			`+ arch_remove_exec_range(mm, vma->vm_end);`
			`}`

			`/*`
			`@@ -790,6 +807,8 @@`
			`} else /* cases 2, 5, 7 */`
			`err = vma_adjust(prev, prev->vm_start,`
			`end, prev->vm_pgoff, NULL);`
			`+ if (prev->vm_flags & VM_EXEC)`
			`+ arch_add_exec_range(mm, prev->vm_end);`
			`if (err)`
			`return NULL;`
			`return prev;`
			`@@ -1966,10 +2075,14 @@`
			`if (new->vm_ops && new->vm_ops->open)`
			`new->vm_ops->open(new);`

			`- if (new_below)`
			`+ if (new_below) {`
			`+ unsigned long old_end = vma->vm_end;`
			`+`
			`err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +`
			`((addr - new->vm_start) >> PAGE_SHIFT), new);`
			`- else`
			`+ if (vma->vm_flags & VM_EXEC)`
			`+ arch_remove_exec_range(mm, old_end);`
			`+ } else`
			`err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);`

			`/* Success. */`
			`@@ -2254,6 +2367,7 @@`

			`free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);`
			`tlb_finish_mmu(tlb, 0, end);`
			`+ arch_flush_exec_range(mm);`

			`/*`
			`* Walk the list again, actually closing and freeing it,`
			`--- a/mm/mprotect.c`
			`+++ b/mm/mprotect.c`
			`@@ -25,9 +25,14 @@`
			`#include <linux/perf_event.h>`
			`#include <asm/uaccess.h>`
			`#include <asm/pgtable.h>`
			`+#include <asm/pgalloc.h>`
			`#include <asm/cacheflush.h>`
			`#include <asm/tlbflush.h>`

			`+#ifndef arch_remove_exec_range`
			`+#define arch_remove_exec_range(mm, limit) do { ; } while (0)`
			`+#endif`
			`+`
			`#ifndef pgprot_modify`
			`static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)`
			`{`
			`@@ -138,7 +143,7 @@ mprotect_fixup(struct vm_area_struct vma, struct vm_area_struct *pprev,`
			`struct mm_struct *mm = vma->vm_mm;`
			`unsigned long oldflags = vma->vm_flags;`
			`long nrpages = (end - start) >> PAGE_SHIFT;`
			`- unsigned long charged = 0;`
			`+ unsigned long charged = 0, old_end = vma->vm_end;`
			`pgoff_t pgoff;`
			`int error;`
			`int dirty_accountable = 0;`
			`@@ -203,6 +208,9 @@ success:`
			`dirty_accountable = 1;`
			`}`

			`+ if (oldflags & VM_EXEC)`
			`+ arch_remove_exec_range(current->mm, old_end);`
			`+`
			`mmu_notifier_invalidate_range_start(mm, start, end);`
			`if (is_vm_hugetlb_page(vma))`
			`hugetlb_change_protection(vma, start, end, vma->vm_page_prot);`
exec-randomization: brk away from exec rand area This is a fix for the NX emulation patch to force the brk area well outside of the exec randomization area to avoid future allocation or brk growth collisions. Normally this isn't a problem, except when the text region has been loaded from a PIE binary and the CS limit can't be put just above bss. A test-case that will show failures without this patch can be found here: http://bazaar.launchpad.net/~ubuntu-bugcontrol/qa-regression-testing/master/annotate/head%3A/scripts/kernel-aslr-collisions/explode-brk.c Signed-off-by: Kees Cook <kees.cook@canonical.com> 2010-09-03 15:48:57 +00:00			`diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c`
			`index 57d1868..29c0c35 100644`
			`--- a/arch/x86/kernel/process.c`
			`+++ b/arch/x86/kernel/process.c`
			`@@ -669,6 +669,16 @@ unsigned long arch_align_stack(unsigned long sp)`
			`unsigned long arch_randomize_brk(struct mm_struct *mm)`
			`{`
			`unsigned long range_end = mm->brk + 0x02000000;`
			`- return randomize_range(mm->brk, range_end, 0) ? : mm->brk;`
			`+ unsigned long bump = 0;`
			`+#ifdef CONFIG_X86_32`
			`+ /* in the case of NX emulation, shove the brk segment way out of the`
			`+ way of the exec randomization area, since it can collide with`
			`+ future allocations if not. */`
			`+ if ( (mm->get_unmapped_exec_area == arch_get_unmapped_exec_area) &&`
			`+ (mm->brk < 0x08000000) ) {`
			`+ bump = (TASK_SIZE/6);`
			`+ }`
			`+#endif`
			`+ return bump + (randomize_range(mm->brk, range_end, 0) ? : mm->brk);`
			`}`