Remove 32bit NX emulation. It's time has passed. (and it seems to be broken anyway)

This commit is contained in:
Dave Jones 2012-06-04 12:32:49 -04:00
parent 035f913c7c
commit 48d35dba7a
5 changed files with 3 additions and 1002 deletions

View File

@ -650,9 +650,6 @@ Patch09: linux-2.6-upstream-reverts.patch
# Standalone patches
Patch100: taint-vbox.patch
Patch160: linux-2.6-32bit-mmap-exec-randomization.patch
Patch161: linux-2.6-i386-nx-emulation.patch
Patch162: nx-emu-remove-cpuinitdata-for-disable_nx-on-x86_32.patch
Patch390: linux-2.6-defaults-acpi-video.patch
Patch391: linux-2.6-acpi-video-dos.patch
@ -737,9 +734,6 @@ Patch21098: hfsplus-Fix-bless-ioctl-when-used-with-hardlinks.patch
#rhbz 754518
Patch21235: scsi-sd_revalidate_disk-prevent-NULL-ptr-deref.patch
#rhbz 804957 CVE-2012-1568
Patch21306: shlib_base_randomize.patch
Patch21400: unhandled-irqs-switch-to-polling.patch
Patch22000: weird-root-dentry-name-debug.patch
@ -1300,9 +1294,6 @@ ApplyPatch taint-vbox.patch
# Architecture patches
# x86(-64)
ApplyPatch linux-2.6-32bit-mmap-exec-randomization.patch
ApplyPatch linux-2.6-i386-nx-emulation.patch
ApplyPatch nx-emu-remove-cpuinitdata-for-disable_nx-on-x86_32.patch
#
# ARM
@ -1427,9 +1418,6 @@ ApplyPatch hfsplus-Fix-bless-ioctl-when-used-with-hardlinks.patch
#rhbz 754518
ApplyPatch scsi-sd_revalidate_disk-prevent-NULL-ptr-deref.patch
#rhbz 804957 CVE-2012-1568
ApplyPatch shlib_base_randomize.patch
ApplyPatch unhandled-irqs-switch-to-polling.patch
ApplyPatch weird-root-dentry-name-debug.patch
@ -2290,6 +2278,9 @@ fi
# ||----w |
# || ||
%changelog
* Mon Jun 04 2012 Dave Jones <davej@redhat.com>
- Remove 32bit NX emulation.
* Mon Jun 04 2012 Josh Boyer <jwboyer@redhat.com>
- Remove modules.{devname,softdep} to prevent RPM verify errors (rhbz 650807)

View File

@ -1,248 +0,0 @@
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 735279e..0f9f005 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -756,6 +756,16 @@ unsigned long arch_align_stack(unsigned long sp)
unsigned long arch_randomize_brk(struct mm_struct *mm)
{
unsigned long range_end = mm->brk + 0x02000000;
- return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+ unsigned long bump = 0;
+#ifdef CONFIG_X86_32
+ /* in the case of NX emulation, shove the brk segment way out of the
+ way of the exec randomization area, since it can collide with
+ future allocations if not. */
+ if ( (mm->get_unmapped_exec_area == arch_get_unmapped_exec_area) &&
+ (mm->brk < 0x08000000) ) {
+ bump = (TASK_SIZE/6);
+ }
+#endif
+ return bump + (randomize_range(mm->brk, range_end, 0) ? : mm->brk);
}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 845df68..d437466 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -119,6 +119,12 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+#ifdef CONFIG_X86_32
+ if (!(current->personality & READ_IMPLIES_EXEC)
+ && !(__supported_pte_mask & _PAGE_NX)
+ && mmap_is_ia32())
+ mm->get_unmapped_exec_area = arch_get_unmapped_exec_area;
+#endif
mm->unmap_area = arch_unmap_area_topdown;
}
}
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 66e6d93..b80cde7 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -330,7 +330,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (compat)
addr = VDSO_HIGH_BASE;
else {
- addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+ addr = get_unmapped_area_prot(NULL, 0, PAGE_SIZE, 0, 0, 1);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b36d08c..91e573d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1387,7 +1387,13 @@ extern int install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
unsigned long flags, struct page **pages);
-extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+extern unsigned long get_unmapped_area_prot(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, int);
+
+static inline unsigned long get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ return get_unmapped_area_prot(file, addr, len, pgoff, flags, 0);
+}
extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, unsigned long flags,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index dad95bd..01a150c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -294,6 +294,9 @@ struct mm_struct {
unsigned long (*get_unmapped_area) (struct file *filp,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags);
+ unsigned long (*get_unmapped_exec_area) (struct file *filp,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags);
void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
#endif
unsigned long mmap_base; /* base of mmap area */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f34437e..12fe177 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -391,6 +391,10 @@ extern void arch_pick_mmap_layout(struct mm_struct *mm);
extern unsigned long
arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
unsigned long, unsigned long);
+
+extern unsigned long
+arch_get_unmapped_exec_area(struct file *, unsigned long, unsigned long,
+ unsigned long, unsigned long);
extern unsigned long
arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff,
diff --git a/mm/mmap.c b/mm/mmap.c
index 3edfcdf..076ec09 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -31,6 +31,7 @@
#include <linux/audit.h>
#include <linux/khugepaged.h>
#include <linux/uprobes.h>
+#include <linux/random.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
@@ -1011,7 +1012,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
/* Obtain the address to map to. we verify (or select) it and ensure
* that it represents a valid section of the address space.
*/
- addr = get_unmapped_area(file, addr, len, pgoff, flags);
+ addr = get_unmapped_area_prot(file, addr, len, pgoff, flags,
+ prot & PROT_EXEC);
if (addr & ~PAGE_MASK)
return addr;
@@ -1568,8 +1570,8 @@ void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
}
unsigned long
-get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
- unsigned long pgoff, unsigned long flags)
+get_unmapped_area_prot(struct file *file, unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags, int exec)
{
unsigned long (*get_area)(struct file *, unsigned long,
unsigned long, unsigned long, unsigned long);
@@ -1582,7 +1584,11 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
if (len > TASK_SIZE)
return -ENOMEM;
- get_area = current->mm->get_unmapped_area;
+ if (exec && current->mm->get_unmapped_exec_area)
+ get_area = current->mm->get_unmapped_exec_area;
+ else
+ get_area = current->mm->get_unmapped_area;
+
if (file && file->f_op && file->f_op->get_unmapped_area)
get_area = file->f_op->get_unmapped_area;
addr = get_area(file, addr, len, pgoff, flags);
@@ -1598,8 +1604,83 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
error = security_mmap_addr(addr);
return error ? error : addr;
}
+EXPORT_SYMBOL(get_unmapped_area_prot);
+
+static bool should_randomize(void)
+{
+ return (current->flags & PF_RANDOMIZE) &&
+ !(current->personality & ADDR_NO_RANDOMIZE);
+}
+
+#define SHLIB_BASE 0x00110000
+
+unsigned long
+arch_get_unmapped_exec_area(struct file *filp, unsigned long addr0,
+ unsigned long len0, unsigned long pgoff, unsigned long flags)
+{
+ unsigned long addr = addr0, len = len0;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long tmp;
+
+ if (len > TASK_SIZE)
+ return -ENOMEM;
+
+ if (flags & MAP_FIXED)
+ return addr;
+
+ if (!addr)
+ addr = !should_randomize() ? SHLIB_BASE :
+ randomize_range(SHLIB_BASE, 0x01000000, len);
+
+ if (addr) {
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+ (!vma || addr + len <= vma->vm_start))
+ return addr;
+ }
+
+ addr = SHLIB_BASE;
+ for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+ /* At this point: (!vma || addr < vma->vm_end). */
+ if (TASK_SIZE - len < addr)
+ return -ENOMEM;
+
+ if (!vma || addr + len <= vma->vm_start) {
+ /*
+ * Must not let a PROT_EXEC mapping get into the
+ * brk area:
+ */
+ if (addr + len > mm->brk)
+ goto failed;
+
+ /*
+ * Up until the brk area we randomize addresses
+ * as much as possible:
+ */
+ if (addr >= 0x01000000 && should_randomize()) {
+ tmp = randomize_range(0x01000000,
+ PAGE_ALIGN(max(mm->start_brk,
+ (unsigned long)0x08000000)), len);
+ vma = find_vma(mm, tmp);
+ if (TASK_SIZE - len >= tmp &&
+ (!vma || tmp + len <= vma->vm_start))
+ return tmp;
+ }
+ /*
+ * Ok, randomization didnt work out - return
+ * the result of the linear search:
+ */
+ return addr;
+ }
+ addr = vma->vm_end;
+ }
+
+failed:
+ return current->mm->get_unmapped_area(filp, addr0, len0, pgoff, flags);
+}
-EXPORT_SYMBOL(get_unmapped_area);
/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
diff --git a/mm/mremap.c b/mm/mremap.c
index 21fed20..a3de1ee 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -519,10 +519,10 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
if (vma->vm_flags & VM_MAYSHARE)
map_flags |= MAP_SHARED;
- new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
+ new_addr = get_unmapped_area_prot(vma->vm_file, 0, new_len,
vma->vm_pgoff +
((addr - vma->vm_start) >> PAGE_SHIFT),
- map_flags);
+ map_flags, vma->vm_flags & VM_EXEC);
if (new_addr & ~PAGE_MASK) {
ret = new_addr;
goto out;

View File

@ -1,626 +0,0 @@
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 8bf1c06..49f8ab2 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -5,6 +5,7 @@
#include <asm/ldt.h>
#include <asm/mmu.h>
+#include <linux/mm_types.h>
#include <linux/smp.h>
#include <linux/percpu.h>
@@ -100,6 +101,9 @@ static inline int desc_empty(const void *ptr)
#define load_TLS(t, cpu) native_load_tls(t, cpu)
#define set_ldt native_set_ldt
+#ifdef CONFIG_X86_32
+#define load_user_cs_desc native_load_user_cs_desc
+#endif /*CONFIG_X86_32*/
#define write_ldt_entry(dt, entry, desc) native_write_ldt_entry(dt, entry, desc)
#define write_gdt_entry(dt, entry, desc, type) native_write_gdt_entry(dt, entry, desc, type)
@@ -405,4 +409,25 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
}
+#ifdef CONFIG_X86_32
+static inline void set_user_cs(struct desc_struct *desc, unsigned long limit)
+{
+ limit = (limit - 1) / PAGE_SIZE;
+ desc->a = limit & 0xffff;
+ desc->b = (limit & 0xf0000) | 0x00c0fb00;
+}
+
+static inline void native_load_user_cs_desc(int cpu, struct mm_struct *mm)
+{
+ get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs;
+}
+
+#define arch_add_exec_range arch_add_exec_range
+#define arch_remove_exec_range arch_remove_exec_range
+#define arch_flush_exec_range arch_flush_exec_range
+extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit);
+extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit);
+extern void arch_flush_exec_range(struct mm_struct *mm);
+#endif /* CONFIG_X86_32 */
+
#endif /* _ASM_X86_DESC_H */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 5f55e69..aba94f0 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -7,6 +7,9 @@
/*
* The x86 doesn't have a mmu context, but
* we put the segment information here.
+ *
+ * exec_limit is used to track the range PROT_EXEC
+ * mappings span.
*/
typedef struct {
void *ldt;
@@ -19,6 +22,11 @@ typedef struct {
struct mutex lock;
void *vdso;
+
+#ifdef CONFIG_X86_32
+ struct desc_struct user_cs;
+ unsigned long exec_limit;
+#endif
} mm_context_t;
#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index aa0f913..016fcf6 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -299,6 +299,12 @@ static inline void set_ldt(const void *addr, unsigned entries)
{
PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
}
+#ifdef CONFIG_X86_32
+static inline void load_user_cs_desc(unsigned int cpu, struct mm_struct *mm)
+{
+ PVOP_VCALL2(pv_cpu_ops.load_user_cs_desc, cpu, mm);
+}
+#endif /*CONFIG_X86_32*/
static inline void store_gdt(struct desc_ptr *dtr)
{
PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 8e8b9a4..cca421e 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -125,6 +125,9 @@ struct pv_cpu_ops {
void (*store_gdt)(struct desc_ptr *);
void (*store_idt)(struct desc_ptr *);
void (*set_ldt)(const void *desc, unsigned entries);
+#ifdef CONFIG_X86_32
+ void (*load_user_cs_desc)(int cpu, struct mm_struct *mm);
+#endif
unsigned long (*store_tr)(void);
void (*load_tls)(struct thread_struct *t, unsigned int cpu);
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 82f29e7..d8597f2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -841,6 +841,22 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
/* Filter out anything that depends on CPUID levels we don't have */
filter_cpuid_features(c, true);
+#ifdef CONFIG_X86_32
+ /*
+ * emulation of NX with segment limits unfortunately means
+ * we have to disable the fast system calls, due to the way that
+ * sysexit clears the segment limits on return.
+ * If we have either disabled exec-shield on the boot command line,
+ * or we have NX, then we don't need to do this.
+ */
+ if (!disable_nx) {
+#ifdef CONFIG_X86_PAE
+ if (!test_cpu_cap(c, X86_FEATURE_NX))
+#endif
+ clear_cpu_cap(c, X86_FEATURE_SEP);
+ }
+#endif
+
/* If the model name is still unset, do table lookup. */
if (!c->x86_model_id[0]) {
const char *p;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 9ce8859..d19990c 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -360,6 +360,9 @@ struct pv_cpu_ops pv_cpu_ops = {
.read_tscp = native_read_tscp,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,
+#ifdef CONFIG_X86_32
+ .load_user_cs_desc = native_load_user_cs_desc,
+#endif /*CONFIG_X86_32*/
.load_gdt = native_load_gdt,
.load_idt = native_load_idt,
.store_gdt = native_store_gdt,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 01d8d40..4c2ece1 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -191,7 +191,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
+ int cpu;
+
set_user_gs(regs, 0);
+
regs->fs = 0;
regs->ds = __USER_DS;
regs->es = __USER_DS;
@@ -199,6 +202,11 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
regs->cs = __USER_CS;
regs->ip = new_ip;
regs->sp = new_sp;
+
+ cpu = get_cpu();
+ load_user_cs_desc(cpu, current->mm);
+ put_cpu();
+
/*
* Free the old FP and other extended state
*/
@@ -264,6 +272,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
lazy_save_gs(prev->gs);
+ if (next_p->mm)
+ load_user_cs_desc(cpu, next_p->mm);
+
/*
* Load the per-thread Thread-Local Storage descriptor.
*/
@@ -334,3 +345,40 @@ unsigned long get_wchan(struct task_struct *p)
return 0;
}
+static void modify_cs(struct mm_struct *mm, unsigned long limit)
+{
+ mm->context.exec_limit = limit;
+ set_user_cs(&mm->context.user_cs, limit);
+ if (mm == current->mm) {
+ int cpu;
+
+ cpu = get_cpu();
+ load_user_cs_desc(cpu, mm);
+ put_cpu();
+ }
+}
+
+void arch_add_exec_range(struct mm_struct *mm, unsigned long limit)
+{
+ if (limit > mm->context.exec_limit)
+ modify_cs(mm, limit);
+}
+
+void arch_remove_exec_range(struct mm_struct *mm, unsigned long old_end)
+{
+ struct vm_area_struct *vma;
+ unsigned long limit = PAGE_SIZE;
+
+ if (old_end == mm->context.exec_limit) {
+ for (vma = mm->mmap; vma; vma = vma->vm_next)
+ if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
+ limit = vma->vm_end;
+ modify_cs(mm, limit);
+ }
+}
+
+void arch_flush_exec_range(struct mm_struct *mm)
+{
+ mm->context.exec_limit = 0;
+ set_user_cs(&mm->context.user_cs, 0);
+}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 92d5756..662c2f9 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -108,6 +108,78 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
dec_preempt_count();
}
+#ifdef CONFIG_X86_32
+static inline int
+__compare_user_cs_desc(const struct desc_struct *desc1,
+ const struct desc_struct *desc2)
+{
+ return ((desc1->limit0 != desc2->limit0) ||
+ (desc1->limit != desc2->limit) ||
+ (desc1->base0 != desc2->base0) ||
+ (desc1->base1 != desc2->base1) ||
+ (desc1->base2 != desc2->base2));
+}
+
+/*
+ * lazy-check for CS validity on exec-shield binaries:
+ *
+ * the original non-exec stack patch was written by
+ * Solar Designer <solar at openwall.com>. Thanks!
+ */
+static int
+check_lazy_exec_limit(int cpu, struct pt_regs *regs, long error_code)
+{
+ struct desc_struct *desc1, *desc2;
+ struct vm_area_struct *vma;
+ unsigned long limit;
+
+ if (current->mm == NULL)
+ return 0;
+
+ limit = -1UL;
+ if (current->mm->context.exec_limit != -1UL) {
+ limit = PAGE_SIZE;
+ spin_lock(&current->mm->page_table_lock);
+ for (vma = current->mm->mmap; vma; vma = vma->vm_next)
+ if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
+ limit = vma->vm_end;
+ vma = get_gate_vma(current->mm);
+ if (vma && (vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
+ limit = vma->vm_end;
+ spin_unlock(&current->mm->page_table_lock);
+ if (limit >= TASK_SIZE)
+ limit = -1UL;
+ current->mm->context.exec_limit = limit;
+ }
+ set_user_cs(&current->mm->context.user_cs, limit);
+
+ desc1 = &current->mm->context.user_cs;
+ desc2 = get_cpu_gdt_table(cpu) + GDT_ENTRY_DEFAULT_USER_CS;
+
+ if (__compare_user_cs_desc(desc1, desc2)) {
+ /*
+ * The CS was not in sync - reload it and retry the
+ * instruction. If the instruction still faults then
+ * we won't hit this branch next time around.
+ */
+ if (print_fatal_signals >= 2) {
+ printk(KERN_ERR "#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n",
+ error_code, error_code/8, regs->ip,
+ smp_processor_id());
+ printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x, CPU_cs: %08x/%08x.\n",
+ current->mm->context.exec_limit,
+ desc1->a, desc1->b, desc2->a, desc2->b);
+ }
+
+ load_user_cs_desc(cpu, current->mm);
+
+ return 1;
+ }
+
+ return 0;
+}
+#endif
+
static void __kprobes
do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
long error_code, siginfo_t *info)
@@ -268,6 +340,29 @@ do_general_protection(struct pt_regs *regs, long error_code)
if (!user_mode(regs))
goto gp_in_kernel;
+#ifdef CONFIG_X86_32
+{
+ int cpu;
+ int ok;
+
+ cpu = get_cpu();
+ ok = check_lazy_exec_limit(cpu, regs, error_code);
+ put_cpu();
+
+ if (ok)
+ return;
+
+ if (print_fatal_signals) {
+ printk(KERN_ERR "#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n",
+ error_code, error_code/8, regs->ip, smp_processor_id());
+ printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x.\n",
+ current->mm->context.exec_limit,
+ current->mm->context.user_cs.a,
+ current->mm->context.user_cs.b);
+ }
+}
+#endif /*CONFIG_X86_32*/
+
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP;
@@ -646,20 +741,37 @@ do_device_not_available(struct pt_regs *regs, long error_code)
}
#ifdef CONFIG_X86_32
+/*
+ * The fixup code for errors in iret jumps to here (iret_exc). It loses
+ * the original trap number and erorr code. The bogus trap 32 and error
+ * code 0 are what the vanilla kernel delivers via:
+ * DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
+ *
+ * NOTE: Because of the final "1" in the macro we need to enable interrupts.
+ *
+ * In case of a general protection fault in the iret instruction, we
+ * need to check for a lazy CS update for exec-shield.
+ */
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
{
- siginfo_t info;
+ int ok;
+ int cpu;
+
local_irq_enable();
- info.si_signo = SIGILL;
- info.si_errno = 0;
- info.si_code = ILL_BADSTK;
- info.si_addr = NULL;
- if (notify_die(DIE_TRAP, "iret exception", regs, error_code,
- X86_TRAP_IRET, SIGILL) == NOTIFY_STOP)
- return;
- do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
- &info);
+ cpu = get_cpu();
+ ok = check_lazy_exec_limit(cpu, regs, error_code);
+ put_cpu();
+
+ if (!ok && notify_die(DIE_TRAP, "iret exception", regs,
+ error_code, 32, SIGSEGV) != NOTIFY_STOP) {
+ siginfo_t info;
+ info.si_signo = SIGSEGV;
+ info.si_errno = 0;
+ info.si_code = ILL_BADSTK;
+ info.si_addr = 0;
+ do_trap(32, SIGSEGV, "iret exception", regs, error_code, &info);
+ }
}
#endif
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 410531d..eb040ad 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -1,3 +1,4 @@
+#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/errno.h>
#include <linux/init.h>
@@ -5,7 +6,7 @@
#include <asm/pgtable.h>
#include <asm/proto.h>
-static int disable_nx __cpuinitdata;
+int disable_nx __cpuinitdata;
/*
* noexec = on|off
@@ -40,6 +41,10 @@ void __cpuinit x86_configure_nx(void)
void __init x86_report_nx(void)
{
if (!cpu_has_nx) {
+ if (!disable_nx)
+ printk(KERN_INFO "Using x86 segment limits to approximate NX protection\n");
+ else
+
printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
"missing in CPU!\n");
} else {
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 3804471..3c7805c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/cpu.h>
+#include <asm/desc.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/cache.h>
@@ -134,6 +135,12 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
union smp_flush_state *f;
cpu = smp_processor_id();
+
+#ifdef CONFIG_X86_32
+ if (current->active_mm)
+ load_user_cs_desc(cpu, current->active_mm);
+#endif
+
/*
* orig_rax contains the negated interrupt vector.
* Use that to determine where the sender put the data.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c0f5fac..2040256 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -446,6 +446,24 @@ static void xen_set_ldt(const void *addr, unsigned entries)
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
+#ifdef CONFIG_X86_32
+static void xen_load_user_cs_desc(int cpu, struct mm_struct *mm)
+{
+ void *gdt;
+ xmaddr_t mgdt;
+ u64 descriptor;
+ struct desc_struct user_cs;
+
+ gdt = &get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS];
+ mgdt = virt_to_machine(gdt);
+
+ user_cs = mm->context.user_cs;
+ descriptor = (u64) user_cs.a | ((u64) user_cs.b) << 32;
+
+ HYPERVISOR_update_descriptor(mgdt.maddr, descriptor);
+}
+#endif /*CONFIG_X86_32*/
+
static void xen_load_gdt(const struct desc_ptr *dtr)
{
unsigned long va = dtr->address;
@@ -1119,6 +1137,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.load_tr_desc = paravirt_nop,
.set_ldt = xen_set_ldt,
+#ifdef CONFIG_X86_32
+ .load_user_cs_desc = xen_load_user_cs_desc,
+#endif /*CONFIG_X86_32*/
.load_gdt = xen_load_gdt,
.load_idt = xen_load_idt,
.load_tls = xen_load_tls,
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 16f7354..611f1c6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -708,6 +708,16 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
if (retval)
goto out_free_dentry;
+#ifdef CONFIG_X86_32
+ /*
+ * Turn off the CS limit completely if exec-shield disabled or
+ * NX active:
+ */
+ if (disable_nx || executable_stack != EXSTACK_DISABLE_X || (__supported_pte_mask & _PAGE_NX))
+ arch_add_exec_range(current->mm, -1);
+#endif
+
+
/* OK, This is the point of no return */
current->mm->def_flags = def_flags;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 28fa9d0..c961aa8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -101,6 +101,9 @@ struct fs_struct;
struct perf_event_context;
struct blk_plug;
+extern int disable_nx;
+extern int print_fatal_signals;
+
/*
* List of flags we want to share for kernel threads,
* if only because they are not used by them anyway.
diff --git a/mm/mmap.c b/mm/mmap.c
index 69a1889..5172f68 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -46,6 +46,18 @@
#define arch_rebalance_pgtables(addr, len) (addr)
#endif
+/* No sane architecture will #define these to anything else */
+#ifndef arch_add_exec_range
+#define arch_add_exec_range(mm, limit) do { ; } while (0)
+#endif
+#ifndef arch_flush_exec_range
+#define arch_flush_exec_range(mm) do { ; } while (0)
+#endif
+#ifndef arch_remove_exec_range
+#define arch_remove_exec_range(mm, limit) do { ; } while (0)
+#endif
+
+
static void unmap_region(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *prev,
unsigned long start, unsigned long end);
@@ -426,6 +438,8 @@ __vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *prev, struct rb_node **rb_link,
struct rb_node *rb_parent)
{
+ if (vma->vm_flags & VM_EXEC)
+ arch_add_exec_range(mm, vma->vm_end);
__vma_link_list(mm, vma, prev, rb_parent);
__vma_link_rb(mm, vma, rb_link, rb_parent);
}
@@ -479,6 +493,8 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
rb_erase(&vma->vm_rb, &mm->mm_rb);
if (mm->mmap_cache == vma)
mm->mmap_cache = prev;
+ if (vma->vm_flags & VM_EXEC)
+ arch_remove_exec_range(mm, vma->vm_end);
}
/*
@@ -795,6 +811,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
} else /* cases 2, 5, 7 */
err = vma_adjust(prev, prev->vm_start,
end, prev->vm_pgoff, NULL);
+ if (prev->vm_flags & VM_EXEC)
+ arch_add_exec_range(mm, prev->vm_end);
if (err)
return NULL;
khugepaged_enter_vma_merge(prev);
@@ -2009,10 +2027,14 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
if (new->vm_ops && new->vm_ops->open)
new->vm_ops->open(new);
- if (new_below)
+ if (new_below) {
+ unsigned long old_end = vma->vm_end;
+
err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
((addr - new->vm_start) >> PAGE_SHIFT), new);
- else
+ if (vma->vm_flags & VM_EXEC)
+ arch_remove_exec_range(mm, old_end);
+ } else
err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
/* Success. */
@@ -2312,6 +2334,7 @@ void exit_mmap(struct mm_struct *mm)
free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
tlb_finish_mmu(&tlb, 0, -1);
+ arch_flush_exec_range(mm);
/*
* Walk the list again, actually closing and freeing it,
diff --git a/mm/mprotect.c b/mm/mprotect.c
index a409926..5e05c67 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -25,9 +25,14 @@
#include <linux/perf_event.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
+#ifndef arch_remove_exec_range
+#define arch_remove_exec_range(mm, limit) do { ; } while (0)
+#endif
+
#ifndef pgprot_modify
static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
{
@@ -148,7 +153,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
struct mm_struct *mm = vma->vm_mm;
unsigned long oldflags = vma->vm_flags;
long nrpages = (end - start) >> PAGE_SHIFT;
- unsigned long charged = 0;
+ unsigned long charged = 0, old_end = vma->vm_end;
pgoff_t pgoff;
int error;
int dirty_accountable = 0;
@@ -213,6 +218,9 @@ success:
dirty_accountable = 1;
}
+ if (oldflags & VM_EXEC)
+ arch_remove_exec_range(current->mm, old_end);
+
mmu_notifier_invalidate_range_start(mm, start, end);
if (is_vm_hugetlb_page(vma))
hugetlb_change_protection(vma, start, end, vma->vm_page_prot);

View File

@ -1,47 +0,0 @@
>From e540f21852043a4d8e8cf5e505607909d0ab0f51 Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Thu, 29 Mar 2012 06:21:01 -0600
Subject: [PATCH] UBUNTU: SAUCE: disable_nx should not be in __cpuinitdata
section for X86_32
I noticed a section mismatch warning while building 3.2.0-20.33 for X86_32.
AR arch/x86/lib/lib.a
LD vmlinux.o
MODPOST vmlinux.o
WARNING: vmlinux.o(.text+0x187833): Section mismatch in reference from the function load_elf_binary() to the variable .cpuinit.data:disable_nx
The function load_elf_binary() references
the variable __cpuinitdata disable_nx.
This is often because load_elf_binary lacks a __cpuinitdata
annotation or the annotation of disable_nx is wrong.
load_elf_binary() is definitely called after initialization.
This code was added by 'UBUNTU: ubuntu: nx-emu - i386: NX emulation', so
this is not an upstream problem.
Reported-by: Tetsuo Handa <from-ubuntu@I-love.SAKURA.ne.jp>
Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
---
arch/x86/mm/setup_nx.c | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 90c9eff3..89fd946 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -6,7 +6,11 @@
#include <asm/pgtable.h>
#include <asm/proto.h>
+#ifdef CONFIG_X86_32
+int disable_nx; /* referenced by load_elf_binary() */
+#else
int disable_nx __cpuinitdata;
+#endif
/*
* noexec = on|off
--
1.7.9.1

View File

@ -1,69 +0,0 @@
diff -uNrp kernel-3.2.fc16.orig/arch/x86/mm/mmap.c kernel-3.2.fc16.new/arch/x86/mm/mmap.c
--- kernel-3.2.fc16.orig/arch/x86/mm/mmap.c 2012-03-19 16:47:03.495169091 -0400
+++ kernel-3.2.fc16.new/arch/x86/mm/mmap.c 2012-03-19 16:50:03.574168052 -0400
@@ -106,6 +106,10 @@ static unsigned long mmap_legacy_base(vo
return TASK_UNMAPPED_BASE + mmap_rnd();
}
+#ifdef CONFIG_X86_32
+ #define SHLIB_BASE 0x00111000
+#endif
+
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
@@ -126,8 +126,10 @@ void arch_pick_mmap_layout(struct mm_str
#ifdef CONFIG_X86_32
if (!(current->personality & READ_IMPLIES_EXEC)
&& !(__supported_pte_mask & _PAGE_NX)
- && mmap_is_ia32())
+ && mmap_is_ia32()) {
+ mm->shlib_base = SHLIB_BASE + mmap_rnd();
mm->get_unmapped_exec_area = arch_get_unmapped_exec_area;
+ }
#endif
mm->unmap_area = arch_unmap_area_topdown;
}
diff -uNrp kernel-3.2.fc16.orig/include/linux/mm_types.h kernel-3.2.fc16.new/include/linux/mm_types.h
--- kernel-3.2.fc16.orig/include/linux/mm_types.h 2012-03-19 16:46:47.382169153 -0400
+++ kernel-3.2.fc16.new/include/linux/mm_types.h 2012-03-19 16:50:40.738168219 -0400
@@ -300,6 +300,7 @@ struct mm_struct {
void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
#endif
unsigned long mmap_base; /* base of mmap area */
+ unsigned long shlib_base; /* base of lib map area (ASCII armour)*/
unsigned long task_size; /* size of task vm space */
unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */
unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */
diff -uNrp kernel-3.2.fc16.orig/mm/mmap.c kernel-3.2.fc16.new/mm/mmap.c
--- kernel-3.2.fc16.orig/mm/mmap.c 2012-03-19 16:46:15.791169274 -0400
+++ kernel-3.2.fc16.new/mm/mmap.c 2012-03-19 16:51:37.351166875 -0400
@@ -1594,8 +1594,6 @@ static bool should_randomize(void)
!(current->personality & ADDR_NO_RANDOMIZE);
}
-#define SHLIB_BASE 0x00110000
-
unsigned long
arch_get_unmapped_exec_area(struct file *filp, unsigned long addr0,
unsigned long len0, unsigned long pgoff, unsigned long flags)
@@ -1612,8 +1610,8 @@ arch_get_unmapped_exec_area(struct file
return addr;
if (!addr)
- addr = !should_randomize() ? SHLIB_BASE :
- randomize_range(SHLIB_BASE, 0x01000000, len);
+ addr = !should_randomize() ? mm->shlib_base :
+ randomize_range(mm->shlib_base, 0x01000000, len);
if (addr) {
addr = PAGE_ALIGN(addr);
@@ -1623,7 +1621,7 @@ arch_get_unmapped_exec_area(struct file
return addr;
}
- addr = SHLIB_BASE;
+ addr = mm->shlib_base;
for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
/* At this point: (!vma || addr < vma->vm_end). */
if (TASK_SIZE - len < addr)