Linux v4.1.6

This commit is contained in:
Laura Abbott 2015-08-17 12:33:56 -07:00
parent e4c9072a88
commit ce2ed4aeb9
13 changed files with 9 additions and 1295 deletions

View File

@ -52,7 +52,7 @@ Summary: The Linux kernel
%if 0%{?released_kernel}
# Do we have a -stable update to apply?
%define stable_update 5
%define stable_update 6
# Set rpm version accordingly
%if 0%{?stable_update}
%define stablerev %{stable_update}
@ -613,9 +613,6 @@ Patch26219: firmware-Drop-WARN-from-usermodehelper_read_trylock-.patch
#rhbz 1226743
Patch26221: drm-i915-turn-off-wc-mmaps.patch
# CVE-2015-XXXX rhbz 1230770 1230774
Patch26231: kvm-x86-fix-kvm_apic_has_events-to-check-for-NULL-po.patch
# rhbz 1227891
Patch26250: HID-rmi-Disable-populating-F30-when-the-touchpad-has.patch
@ -625,20 +622,6 @@ Patch26251: ideapad_laptop-Lenovo-G50-30-fix-rfkill-reports-wire.patch
# rhbz 1180920 1206724
Patch26252: pcmcia-fix-a-boot-time-warning-in-pcmcia-cs-code.patch
# CVE-2015-3290 CVE-2015-3291 rhbz 1243465 1245927
Patch26254: x86-asm-entry-64-Remove-pointless-jump-to-irq_return.patch
Patch26255: x86-entry-Stop-using-PER_CPU_VAR-kernel_stack.patch
Patch26256: x86-entry-Define-cpu_current_top_of_stack-for-64-bit.patch
Patch26257: x86-nmi-Enable-nested-do_nmi-handling-for-64-bit-ker.patch
Patch26258: x86-nmi-64-Remove-asm-code-that-saves-cr2.patch
Patch26259: x86-nmi-64-Switch-stacks-on-userspace-NMI-entry.patch
Patch26260: x86-nmi-64-Improve-nested-NMI-comments.patch
Patch26261: x86-nmi-64-Reorder-nested-NMI-checks.patch
Patch26262: x86-nmi-64-Use-DF-to-avoid-userspace-RSP-confusing-n.patch
# CVE-2015-5697 (rhbz 1249011 1249013)
Patch26263: md-use-kzalloc-when-bitmap-is-disabled.patch
#rhbz 1244511
Patch507: HID-chicony-Add-support-for-Acer-Aspire-Switch-12.patch
@ -1383,9 +1366,6 @@ ApplyPatch drm-i915-turn-off-wc-mmaps.patch
# pplyPatch Input-synaptics-allocate-3-slots-to-keep-stability-i.patch
# pplyPatch Input-synaptics-pin-3-touches-when-the-firmware-repo.patch
# CVE-2015-XXXX rhbz 1230770 1230774
ApplyPatch kvm-x86-fix-kvm_apic_has_events-to-check-for-NULL-po.patch
#rhbz 1227891
ApplyPatch HID-rmi-Disable-populating-F30-when-the-touchpad-has.patch
@ -1395,20 +1375,6 @@ ApplyPatch ideapad_laptop-Lenovo-G50-30-fix-rfkill-reports-wire.patch
# rhbz 1180920 1206724
ApplyPatch pcmcia-fix-a-boot-time-warning-in-pcmcia-cs-code.patch
# CVE-2015-3290 CVE-2015-3291 rhbz 1243465 1245927
ApplyPatch x86-asm-entry-64-Remove-pointless-jump-to-irq_return.patch
ApplyPatch x86-entry-Stop-using-PER_CPU_VAR-kernel_stack.patch
ApplyPatch x86-entry-Define-cpu_current_top_of_stack-for-64-bit.patch
ApplyPatch x86-nmi-Enable-nested-do_nmi-handling-for-64-bit-ker.patch
ApplyPatch x86-nmi-64-Remove-asm-code-that-saves-cr2.patch
ApplyPatch x86-nmi-64-Switch-stacks-on-userspace-NMI-entry.patch
ApplyPatch x86-nmi-64-Improve-nested-NMI-comments.patch
ApplyPatch x86-nmi-64-Reorder-nested-NMI-checks.patch
ApplyPatch x86-nmi-64-Use-DF-to-avoid-userspace-RSP-confusing-n.patch
# CVE-2015-5697 (rhbz 1249011 1249013)
ApplyPatch md-use-kzalloc-when-bitmap-is-disabled.patch
#rhbz 1244511
ApplyPatch HID-chicony-Add-support-for-Acer-Aspire-Switch-12.patch
@ -1418,6 +1384,9 @@ ApplyPatch HID-hid-input-Fix-accessing-freed-memory-during-devi.patch
#rhbz 1239050
ApplyPatch ideapad-laptop-Add-Lenovo-Yoga-3-14-to-no_hw_rfkill-.patch
#rhbz 1253789
ApplyPatch iSCSI-let-session-recovery_tmo-sysfs-writes-persist.patch
# END OF PATCH APPLICATIONS
%endif
@ -2268,6 +2237,10 @@ fi
#
#
%changelog
* Mon Aug 17 2015 Laura Abbott <labbott@fedoraproject.org> - 4.1.6-200
- Linux v4.1.6
- Actually apply the fix for rhbz 1253789
* Mon Aug 17 2015 Josh Boyer <jwboyer@fedoraproject.org>
- Fix iscsi issue (rhbz 1253789)

View File

@ -1,25 +0,0 @@
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 4 Jun 2015 10:18:22 +0200
Subject: [PATCH] kvm: x86: fix kvm_apic_has_events to check for NULL pointer
Malicious (or egregiously buggy) userspace can trigger it, but it
should never happen in normal operation.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/kvm/lapic.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 0bc6c656625b..ca4f92d56e16 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -165,7 +165,7 @@ static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.apic->pending_events;
+ return kvm_vcpu_has_lapic(vcpu) && vcpu->arch.apic->pending_events;
}
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);

View File

@ -1,73 +0,0 @@
From 28af19ef1eaa703bbbeff1022194a7e29c4d7ec3 Mon Sep 17 00:00:00 2001
From: Benjamin Randazzo <benjamin@randazzo.fr>
Date: Sat, 25 Jul 2015 16:36:50 +0200
Subject: [PATCH] md: use kzalloc() when bitmap is disabled
In drivers/md/md.c get_bitmap_file() uses kmalloc() for creating a
mdu_bitmap_file_t called "file".
5769 file = kmalloc(sizeof(*file), GFP_NOIO);
5770 if (!file)
5771 return -ENOMEM;
This structure is copied to user space at the end of the function.
5786 if (err == 0 &&
5787 copy_to_user(arg, file, sizeof(*file)))
5788 err = -EFAULT
But if bitmap is disabled only the first byte of "file" is initialized
with zero, so it's possible to read some bytes (up to 4095) of kernel
space memory from user space. This is an information leak.
5775 /* bitmap disabled, zero the first byte and copy out */
5776 if (!mddev->bitmap_info.file)
5777 file->pathname[0] = '\0';
Signed-off-by: Benjamin Randazzo <benjamin@randazzo.fr>
Signed-off-by: NeilBrown <neilb@suse.com>
---
drivers/md/md.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index b9200282fd77..90fa5fcb412f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5740,22 +5740,22 @@ static int get_bitmap_file(struct mddev *mddev, void __user * arg)
char *ptr;
int err;
- file = kmalloc(sizeof(*file), GFP_NOIO);
+ file = kzalloc(sizeof(*file), GFP_NOIO);
if (!file)
return -ENOMEM;
err = 0;
spin_lock(&mddev->lock);
- /* bitmap disabled, zero the first byte and copy out */
- if (!mddev->bitmap_info.file)
- file->pathname[0] = '\0';
- else if ((ptr = d_path(&mddev->bitmap_info.file->f_path,
- file->pathname, sizeof(file->pathname))),
- IS_ERR(ptr))
- err = PTR_ERR(ptr);
- else
- memmove(file->pathname, ptr,
- sizeof(file->pathname)-(ptr-file->pathname));
+ /* bitmap enabled */
+ if (mddev->bitmap_info.file) {
+ ptr = d_path(&mddev->bitmap_info.file->f_path,
+ file->pathname, sizeof(file->pathname));
+ if (IS_ERR(ptr))
+ err = PTR_ERR(ptr);
+ else
+ memmove(file->pathname, ptr,
+ sizeof(file->pathname)-(ptr-file->pathname));
+ }
spin_unlock(&mddev->lock);
if (err == 0 &&
--
2.4.3

View File

@ -1,3 +1,3 @@
fe9dc0f6729f36400ea81aa41d614c37 linux-4.1.tar.xz
5b4d0e18c713a479a7b4c1aa53a7432b perf-man-4.1.tar.gz
79d9a168b73d91544c15b3b77221528a patch-4.1.5.xz
7dea69f02c906206f88df48085069eb6 patch-4.1.6.xz

View File

@ -1,50 +0,0 @@
From 480ea71e661b2a48a8d28ea4327cafa666c267ff Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 4 Jun 2015 13:24:29 -0700
Subject: [PATCH 1/7] x86/asm/entry/64: Remove pointless jump to irq_return
INTERRUPT_RETURN turns into a jmp instruction. There's no need
for extra indirection.
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: <linux-kernel@vger.kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/2f2318653dbad284a59311f13f08cea71298fd7c.1433449436.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/x86/kernel/entry_64.S | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 02c2eff..b32f346 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -793,8 +793,6 @@ retint_kernel:
restore_c_regs_and_iret:
RESTORE_C_REGS
REMOVE_PT_GPREGS_FROM_STACK 8
-
-irq_return:
INTERRUPT_RETURN
ENTRY(native_iret)
@@ -1640,7 +1638,7 @@ nmi_restore:
/* Clear the NMI executing stack variable */
movq $0, 5*8(%rsp)
- jmp irq_return
+ INTERRUPT_RETURN
CFI_ENDPROC
END(nmi)
--
2.4.3

View File

@ -1,129 +0,0 @@
From e154db431700ef8e9a29c0e88fa8a11b8dc8fc1e Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Fri, 24 Apr 2015 17:31:35 +0200
Subject: [PATCH 3/9] x86/entry: Define 'cpu_current_top_of_stack' for 64-bit
code
32-bit code has PER_CPU_VAR(cpu_current_top_of_stack).
64-bit code uses somewhat more obscure: PER_CPU_VAR(cpu_tss + TSS_sp0).
Define the 'cpu_current_top_of_stack' macro on CONFIG_X86_64
as well so that the PER_CPU_VAR(cpu_current_top_of_stack)
expression can be used in both 32-bit and 64-bit code.
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1429889495-27850-3-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/x86/ia32/ia32entry.S | 4 ++--
arch/x86/include/asm/thread_info.h | 10 ++++------
arch/x86/kernel/entry_64.S | 2 +-
arch/x86/xen/xen-asm_64.S | 5 +++--
4 files changed, 10 insertions(+), 11 deletions(-)
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index e3c0f06..1c30fa9 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -119,7 +119,7 @@ ENTRY(ia32_sysenter_target)
* it is too small to ever cause noticeable irq latency.
*/
SWAPGS_UNSAFE_STACK
- movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
ENABLE_INTERRUPTS(CLBR_NONE)
/* Zero-extending 32-bit regs, do not remove */
@@ -356,7 +356,7 @@ ENTRY(ia32_cstar_target)
SWAPGS_UNSAFE_STACK
movl %esp,%r8d
CFI_REGISTER rsp,r8
- movq PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp
+ movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp
ENABLE_INTERRUPTS(CLBR_NONE)
/* Zero-extending 32-bit regs, do not remove */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index d656a36..9ececd0 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -197,16 +197,14 @@ static inline unsigned long current_stack_pointer(void)
#else /* !__ASSEMBLY__ */
+#ifdef CONFIG_X86_64
+# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
+#endif
+
/* Load thread_info address into "reg" */
-#ifdef CONFIG_X86_32
#define GET_THREAD_INFO(reg) \
_ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
_ASM_SUB $(THREAD_SIZE),reg ;
-#else
-#define GET_THREAD_INFO(reg) \
- _ASM_MOV PER_CPU_VAR(cpu_tss + TSS_sp0),reg ; \
- _ASM_SUB $(THREAD_SIZE),reg ;
-#endif
/*
* ASM operand which evaluates to a 'thread_info' address of
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 00cde3c..61e0f27 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -216,7 +216,7 @@ ENTRY(system_call)
GLOBAL(system_call_after_swapgs)
movq %rsp,PER_CPU_VAR(rsp_scratch)
- movq PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp
+ movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp
/* Construct struct pt_regs on stack */
pushq_cfi $__USER_DS /* pt_regs->ss */
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index acc49e0..5e15e92 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -16,6 +16,7 @@
#include <asm/processor-flags.h>
#include <asm/segment.h>
#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
#include <xen/interface/xen.h>
@@ -70,7 +71,7 @@ ENTRY(xen_sysret64)
* still with the kernel gs, so we can easily switch back
*/
movq %rsp, PER_CPU_VAR(rsp_scratch)
- movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
pushq $__USER_DS
pushq PER_CPU_VAR(rsp_scratch)
@@ -89,7 +90,7 @@ ENTRY(xen_sysret32)
* still with the kernel gs, so we can easily switch back
*/
movq %rsp, PER_CPU_VAR(rsp_scratch)
- movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
pushq $__USER32_DS
pushq PER_CPU_VAR(rsp_scratch)
--
2.4.3

View File

@ -1,115 +0,0 @@
From 252c6cff2b83c4cc969b64306956a5c8c130a50d Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Fri, 24 Apr 2015 17:31:33 +0200
Subject: [PATCH 2/9] x86/entry: Stop using PER_CPU_VAR(kernel_stack)
PER_CPU_VAR(kernel_stack) is redundant:
- On the 64-bit build, we can use PER_CPU_VAR(cpu_tss + TSS_sp0).
- On the 32-bit build, we can use PER_CPU_VAR(cpu_current_top_of_stack).
PER_CPU_VAR(kernel_stack) will be deleted by a separate change.
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1429889495-27850-1-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/x86/ia32/ia32entry.S | 2 +-
arch/x86/include/asm/thread_info.h | 8 +++++++-
arch/x86/kernel/entry_64.S | 2 +-
arch/x86/xen/xen-asm_64.S | 5 +++--
4 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 72bf268..e3c0f06 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -356,7 +356,7 @@ ENTRY(ia32_cstar_target)
SWAPGS_UNSAFE_STACK
movl %esp,%r8d
CFI_REGISTER rsp,r8
- movq PER_CPU_VAR(kernel_stack),%rsp
+ movq PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp
ENABLE_INTERRUPTS(CLBR_NONE)
/* Zero-extending 32-bit regs, do not remove */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index b4bdec3..d656a36 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -198,9 +198,15 @@ static inline unsigned long current_stack_pointer(void)
#else /* !__ASSEMBLY__ */
/* Load thread_info address into "reg" */
+#ifdef CONFIG_X86_32
#define GET_THREAD_INFO(reg) \
- _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
+ _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
_ASM_SUB $(THREAD_SIZE),reg ;
+#else
+#define GET_THREAD_INFO(reg) \
+ _ASM_MOV PER_CPU_VAR(cpu_tss + TSS_sp0),reg ; \
+ _ASM_SUB $(THREAD_SIZE),reg ;
+#endif
/*
* ASM operand which evaluates to a 'thread_info' address of
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b32f346..00cde3c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -216,7 +216,7 @@ ENTRY(system_call)
GLOBAL(system_call_after_swapgs)
movq %rsp,PER_CPU_VAR(rsp_scratch)
- movq PER_CPU_VAR(kernel_stack),%rsp
+ movq PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp
/* Construct struct pt_regs on stack */
pushq_cfi $__USER_DS /* pt_regs->ss */
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 985fc3e..acc49e0 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -15,6 +15,7 @@
#include <asm/percpu.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
+#include <asm/asm-offsets.h>
#include <xen/interface/xen.h>
@@ -69,7 +70,7 @@ ENTRY(xen_sysret64)
* still with the kernel gs, so we can easily switch back
*/
movq %rsp, PER_CPU_VAR(rsp_scratch)
- movq PER_CPU_VAR(kernel_stack), %rsp
+ movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
pushq $__USER_DS
pushq PER_CPU_VAR(rsp_scratch)
@@ -88,7 +89,7 @@ ENTRY(xen_sysret32)
* still with the kernel gs, so we can easily switch back
*/
movq %rsp, PER_CPU_VAR(rsp_scratch)
- movq PER_CPU_VAR(kernel_stack), %rsp
+ movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
pushq $__USER32_DS
pushq PER_CPU_VAR(rsp_scratch)
--
2.4.3

View File

@ -1,290 +0,0 @@
From 7f340812f17f9c24519254fdaa88dd7b54fc3b59 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Fri, 10 Jul 2015 17:13:26 -0700
Subject: [PATCH 5/7] x86/nmi/64: Improve nested NMI comments
commit ed02eaa10579ffd480c3bda29701e658f17196e9 upstream.
I found the nested NMI documentation to be difficult to follow.
Improve the comments.
Cc: stable@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@kernel.org>
[bwh: Backported to 4.0: adjust filename, context]
[labbott: Context fixup around nmi_restore]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
arch/x86/kernel/entry_64.S | 160 ++++++++++++++++++++++++++-------------------
arch/x86/kernel/nmi.c | 4 +-
2 files changed, 94 insertions(+), 70 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 4fabbaa..152790c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1411,11 +1411,12 @@ ENTRY(nmi)
* If the variable is not set and the stack is not the NMI
* stack then:
* o Set the special variable on the stack
- * o Copy the interrupt frame into a "saved" location on the stack
- * o Copy the interrupt frame into a "copy" location on the stack
+ * o Copy the interrupt frame into an "outermost" location on the
+ * stack
+ * o Copy the interrupt frame into an "iret" location on the stack
* o Continue processing the NMI
* If the variable is set or the previous stack is the NMI stack:
- * o Modify the "copy" location to jump to the repeate_nmi
+ * o Modify the "iret" location to jump to the repeat_nmi
* o return back to the first NMI
*
* Now on exit of the first NMI, we first clear the stack variable
@@ -1509,18 +1510,60 @@ ENTRY(nmi)
.Lnmi_from_kernel:
/*
- * Check the special variable on the stack to see if NMIs are
- * executing.
+ * Here's what our stack frame will look like:
+ * +---------------------------------------------------------+
+ * | original SS |
+ * | original Return RSP |
+ * | original RFLAGS |
+ * | original CS |
+ * | original RIP |
+ * +---------------------------------------------------------+
+ * | temp storage for rdx |
+ * +---------------------------------------------------------+
+ * | "NMI executing" variable |
+ * +---------------------------------------------------------+
+ * | iret SS } Copied from "outermost" frame |
+ * | iret Return RSP } on each loop iteration; overwritten |
+ * | iret RFLAGS } by a nested NMI to force another |
+ * | iret CS } iteration if needed. |
+ * | iret RIP } |
+ * +---------------------------------------------------------+
+ * | outermost SS } initialized in first_nmi; |
+ * | outermost Return RSP } will not be changed before |
+ * | outermost RFLAGS } NMI processing is done. |
+ * | outermost CS } Copied to "iret" frame on each |
+ * | outermost RIP } iteration. |
+ * +---------------------------------------------------------+
+ * | pt_regs |
+ * +---------------------------------------------------------+
+ *
+ * The "original" frame is used by hardware. Before re-enabling
+ * NMIs, we need to be done with it, and we need to leave enough
+ * space for the asm code here.
+ *
+ * We return by executing IRET while RSP points to the "iret" frame.
+ * That will either return for real or it will loop back into NMI
+ * processing.
+ *
+ * The "outermost" frame is copied to the "iret" frame on each
+ * iteration of the loop, so each iteration starts with the "iret"
+ * frame pointing to the final return target.
+ */
+
+ /*
+ * Determine whether we're a nested NMI.
+ *
+ * First check "NMI executing". If it's set, then we're nested.
+ * This will not detect if we interrupted an outer NMI just
+ * before IRET.
*/
cmpl $1, -8(%rsp)
je nested_nmi
/*
- * Now test if the previous stack was an NMI stack.
- * We need the double check. We check the NMI stack to satisfy the
- * race when the first NMI clears the variable before returning.
- * We check the variable because the first NMI could be in a
- * breakpoint routine using a breakpoint stack.
+ * Now test if the previous stack was an NMI stack. This covers
+ * the case where we interrupt an outer NMI after it clears
+ * "NMI executing" but before IRET.
*/
lea 6*8(%rsp), %rdx
/* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
@@ -1537,9 +1580,11 @@ ENTRY(nmi)
nested_nmi:
/*
- * Do nothing if we interrupted the fixup in repeat_nmi.
- * It's about to repeat the NMI handler, so we are fine
- * with ignoring this one.
+ * If we interrupted an NMI that is between repeat_nmi and
+ * end_repeat_nmi, then we must not modify the "iret" frame
+ * because it's being written by the outer NMI. That's okay:
+ * the outer NMI handler is about to call do_nmi anyway,
+ * so we can just resume the outer NMI.
*/
movq $repeat_nmi, %rdx
cmpq 8(%rsp), %rdx
@@ -1549,7 +1594,10 @@ nested_nmi:
ja nested_nmi_out
1:
- /* Set up the interrupted NMIs stack to jump to repeat_nmi */
+ /*
+ * Modify the "iret" frame to point to repeat_nmi, forcing another
+ * iteration of NMI handling.
+ */
leaq -1*8(%rsp), %rdx
movq %rdx, %rsp
CFI_ADJUST_CFA_OFFSET 1*8
@@ -1568,60 +1616,23 @@ nested_nmi_out:
popq_cfi %rdx
CFI_RESTORE rdx
- /* No need to check faults here */
+ /* We are returning to kernel mode, so this cannot result in a fault. */
INTERRUPT_RETURN
CFI_RESTORE_STATE
first_nmi:
- /*
- * Because nested NMIs will use the pushed location that we
- * stored in rdx, we must keep that space available.
- * Here's what our stack frame will look like:
- * +-------------------------+
- * | original SS |
- * | original Return RSP |
- * | original RFLAGS |
- * | original CS |
- * | original RIP |
- * +-------------------------+
- * | temp storage for rdx |
- * +-------------------------+
- * | NMI executing variable |
- * +-------------------------+
- * | copied SS |
- * | copied Return RSP |
- * | copied RFLAGS |
- * | copied CS |
- * | copied RIP |
- * +-------------------------+
- * | Saved SS |
- * | Saved Return RSP |
- * | Saved RFLAGS |
- * | Saved CS |
- * | Saved RIP |
- * +-------------------------+
- * | pt_regs |
- * +-------------------------+
- *
- * The saved stack frame is used to fix up the copied stack frame
- * that a nested NMI may change to make the interrupted NMI iret jump
- * to the repeat_nmi. The original stack frame and the temp storage
- * is also used by nested NMIs and can not be trusted on exit.
- */
- /* Do not pop rdx, nested NMIs will corrupt that part of the stack */
+ /* Restore rdx. */
movq (%rsp), %rdx
CFI_RESTORE rdx
- /* Set the NMI executing variable on the stack. */
+ /* Set "NMI executing" on the stack. */
pushq_cfi $1
- /*
- * Leave room for the "copied" frame
- */
+ /* Leave room for the "iret" frame */
subq $(5*8), %rsp
CFI_ADJUST_CFA_OFFSET 5*8
- /* Copy the stack frame to the Saved frame */
+ /* Copy the "original" frame to the "outermost" frame */
.rept 5
pushq_cfi 11*8(%rsp)
.endr
@@ -1629,6 +1640,7 @@ first_nmi:
/* Everything up to here is safe from nested NMIs */
+repeat_nmi:
/*
* If there was a nested NMI, the first NMI's iret will return
* here. But NMIs are still enabled and we can take another
@@ -1637,16 +1649,21 @@ first_nmi:
* it will just return, as we are about to repeat an NMI anyway.
* This makes it safe to copy to the stack frame that a nested
* NMI will update.
- */
-repeat_nmi:
- /*
- * Update the stack variable to say we are still in NMI (the update
- * is benign for the non-repeat case, where 1 was pushed just above
- * to this very stack slot).
+ *
+ * RSP is pointing to "outermost RIP". gsbase is unknown, but, if
+ * we're repeating an NMI, gsbase has the same value that it had on
+ * the first iteration. paranoid_entry will load the kernel
+ * gsbase if needed before we call do_nmi.
+ *
+ * Set "NMI executing" in case we came back here via IRET.
*/
movq $1, 10*8(%rsp)
- /* Make another copy, this one may be modified by nested NMIs */
+ /*
+ * Copy the "outermost" frame to the "iret" frame. NMIs that nest
+ * here must not modify the "iret" frame while we're writing to
+ * it or it will end up containing garbage.
+ */
addq $(10*8), %rsp
CFI_ADJUST_CFA_OFFSET -10*8
.rept 5
@@ -1657,9 +1674,9 @@ repeat_nmi:
end_repeat_nmi:
/*
- * Everything below this point can be preempted by a nested
- * NMI if the first NMI took an exception and reset our iret stack
- * so that we repeat another NMI.
+ * Everything below this point can be preempted by a nested NMI.
+ * If this happens, then the inner NMI will change the "iret"
+ * frame to point back to repeat_nmi.
*/
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
ALLOC_PT_GPREGS_ON_STACK
@@ -1686,11 +1703,18 @@ nmi_swapgs:
nmi_restore:
RESTORE_EXTRA_REGS
RESTORE_C_REGS
- /* Pop the extra iret frame at once */
+
+ /* Point RSP at the "iret" frame. */
REMOVE_PT_GPREGS_FROM_STACK 6*8
- /* Clear the NMI executing stack variable */
+ /* Clear "NMI executing". */
movq $0, 5*8(%rsp)
+
+ /*
+ * INTERRUPT_RETURN reads the "iret" frame and exits the NMI
+ * stack in a single instruction. We are returning to kernel
+ * mode, so this cannot result in a fault.
+ */
INTERRUPT_RETURN
CFI_ENDPROC
END(nmi)
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index d8766b1..d05bd2e 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -408,8 +408,8 @@ static void default_do_nmi(struct pt_regs *regs)
NOKPROBE_SYMBOL(default_do_nmi);
/*
- * NMIs can hit breakpoints which will cause it to lose its NMI context
- * with the CPU when the breakpoint or page fault does an IRET.
+ * NMIs can page fault or hit breakpoints which will cause it to lose
+ * its NMI context with the CPU when the breakpoint or page fault does an IRET.
*
* As a result, NMIs can nest if NMIs get unmasked due an IRET during
* NMI processing. On x86_64, the asm glue protects us from nested NMIs
--
2.4.3

View File

@ -1,58 +0,0 @@
From 827b728c1e4aec28a115b6be56a0ac2407e9dd05 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Fri, 10 Jul 2015 12:03:34 -0700
Subject: [PATCH 3/7] x86/nmi/64: Remove asm code that saves cr2
commit e7c2c90651fd54c3ca499fbb065ea5cbac30047d upstream.
Now that do_nmi saves cr2, we don't need to save it in asm.
This is a prerequisity for the fix for CVE-2015-3290.
Cc: stable@vger.kernel.org
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
[bwh: Backported to 4.0: adjust filename, context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
arch/x86/kernel/entry_64.S | 18 ------------------
1 file changed, 18 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b32f346..34f07d7 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1603,29 +1603,11 @@ end_repeat_nmi:
call paranoid_entry
DEFAULT_FRAME 0
- /*
- * Save off the CR2 register. If we take a page fault in the NMI then
- * it could corrupt the CR2 value. If the NMI preempts a page fault
- * handler before it was able to read the CR2 register, and then the
- * NMI itself takes a page fault, the page fault that was preempted
- * will read the information from the NMI page fault and not the
- * origin fault. Save it off and restore it if it changes.
- * Use the r12 callee-saved register.
- */
- movq %cr2, %r12
-
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
movq %rsp,%rdi
movq $-1,%rsi
call do_nmi
- /* Did the NMI take a page fault? Restore cr2 if it did */
- movq %cr2, %rcx
- cmpq %rcx, %r12
- je 1f
- movq %r12, %cr2
-1:
-
testl %ebx,%ebx /* swapgs needed? */
jnz nmi_restore
nmi_swapgs:
--
2.4.3

View File

@ -1,90 +0,0 @@
From dc46e3fb4b597196182d4b1f5d97d0dcca5018b6 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Sun, 12 Jul 2015 20:59:57 -0700
Subject: [PATCH 6/7] x86/nmi/64: Reorder nested NMI checks
commit b7dcb27674b28ca49b710e95da74c44d32154bed upstream.
Check the repeat_nmi .. end_repeat_nmi special case first. The next
patch will rework the RSP check and, as a side effect, the RSP check
will no longer detect repeat_nmi .. end_repeat_nmi, so we'll need
this ordering of the checks.
Note: this is more subtle than it appears. The check for repeat_nmi
.. end_repeat_nmi jumps straight out of the NMI code instead of
adjusting the "iret" frame to force a repeat. This is necessary,
because the code between repeat_nmi and end_repeat_nmi sets "NMI
executing" and then writes to the "iret" frame itself. If a nested
NMI comes in and modifies the "iret" frame while repeat_nmi is also
modifying it, we'll end up with garbage. The old code got this
right, as does the new code, but the new code is a bit more
explicit.
If we were to move the check right after the "NMI executing" check,
then we'd get it wrong and have random crashes.
This is a prerequisite for the fix for CVE-2015-3291.
Cc: stable@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@kernel.org>
[bwh: Backported to 4.0: adjust filename, spacing]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
arch/x86/kernel/entry_64.S | 34 ++++++++++++++++++----------------
1 file changed, 18 insertions(+), 16 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 152790c..be2c4ba 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1553,7 +1553,24 @@ ENTRY(nmi)
/*
* Determine whether we're a nested NMI.
*
- * First check "NMI executing". If it's set, then we're nested.
+ * If we interrupted kernel code between repeat_nmi and
+ * end_repeat_nmi, then we are a nested NMI. We must not
+ * modify the "iret" frame because it's being written by
+ * the outer NMI. That's okay: the outer NMI handler is
+ * about to about to call do_nmi anyway, so we can just
+ * resume the outer NMI.
+ */
+
+ movq $repeat_nmi, %rdx
+ cmpq 8(%rsp), %rdx
+ ja 1f
+ movq $end_repeat_nmi, %rdx
+ cmpq 8(%rsp), %rdx
+ ja nested_nmi_out
+1:
+
+ /*
+ * Now check "NMI executing". If it's set, then we're nested.
* This will not detect if we interrupted an outer NMI just
* before IRET.
*/
@@ -1580,21 +1597,6 @@ ENTRY(nmi)
nested_nmi:
/*
- * If we interrupted an NMI that is between repeat_nmi and
- * end_repeat_nmi, then we must not modify the "iret" frame
- * because it's being written by the outer NMI. That's okay:
- * the outer NMI handler is about to call do_nmi anyway,
- * so we can just resume the outer NMI.
- */
- movq $repeat_nmi, %rdx
- cmpq 8(%rsp), %rdx
- ja 1f
- movq $end_repeat_nmi, %rdx
- cmpq 8(%rsp), %rdx
- ja nested_nmi_out
-
-1:
- /*
* Modify the "iret" frame to point to repeat_nmi, forcing another
* iteration of NMI handling.
*/
--
2.4.3

View File

@ -1,140 +0,0 @@
From f1a66b1c4d231e2e9d3a1f7affecb35df3ce0464 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Fri, 10 Jul 2015 11:35:31 -0700
Subject: [PATCH 6/9] x86/nmi/64: Switch stacks on userspace NMI entry
commit 4fb2a8d9cb0efcd7405f1ad105d7f3c764afe02f upstream.
Returning to userspace is tricky: IRET can fail, and ESPFIX can
rearrange the stack prior to IRET.
The NMI nesting fixup relies on a precise stack layout and atomic
IRET. Rather than trying to teach the NMI nesting fixup to handle
ESPFIX and failed IRET, punt: run NMIs that came from user mode on
the normal kernel stack.
This will make some nested NMIs visible to C code, but the C code is
okay with that.
As a side effect, this should speed up perf: it eliminates an RDMSR
when NMIs come from user mode.
Fixes CVE-2015-3290.
Cc: stable@vger.kernel.org
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
[bwh: Backported to 4.0:
- Adjust filename, context
- s/restore_c_regs_and_iret/restore_args/
- Use kernel_stack + KERNEL_STACK_OFFSET instead of cpu_current_top_of_stack]
[luto: Open-coded return path to avoid dependency on partial pt_regs details]
[labbott: just use cpu_current_top_of_stack instead]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
arch/x86/kernel/entry_64.S | 78 +++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 74 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 6916856..75988f4 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1424,19 +1424,89 @@ ENTRY(nmi)
* a nested NMI that updated the copy interrupt stack frame, a
* jump will be made to the repeat_nmi code that will handle the second
* NMI.
+ *
+ * However, espfix prevents us from directly returning to userspace
+ * with a single IRET instruction. Similarly, IRET to user mode
+ * can fault. We therefore handle NMIs from user space like
+ * other IST entries.
*/
/* Use %rdx as our temp variable throughout */
pushq_cfi %rdx
CFI_REL_OFFSET rdx, 0
+ testb $3, CS-RIP+8(%rsp)
+ jz .Lnmi_from_kernel
+
/*
- * If %cs was not the kernel segment, then the NMI triggered in user
- * space, which means it is definitely not nested.
+ * NMI from user mode. We need to run on the thread stack, but we
+ * can't go through the normal entry paths: NMIs are masked, and
+ * we don't want to enable interrupts, because then we'll end
+ * up in an awkward situation in which IRQs are on but NMIs
+ * are off.
*/
- cmpl $__KERNEL_CS, 16(%rsp)
- jne first_nmi
+ SWAPGS
+ cld
+ movq %rsp, %rdx
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ pushq 5*8(%rdx) /* pt_regs->ss */
+ pushq 4*8(%rdx) /* pt_regs->rsp */
+ pushq 3*8(%rdx) /* pt_regs->flags */
+ pushq 2*8(%rdx) /* pt_regs->cs */
+ pushq 1*8(%rdx) /* pt_regs->rip */
+ pushq $-1 /* pt_regs->orig_ax */
+ pushq %rdi /* pt_regs->di */
+ pushq %rsi /* pt_regs->si */
+ pushq (%rdx) /* pt_regs->dx */
+ pushq %rcx /* pt_regs->cx */
+ pushq %rax /* pt_regs->ax */
+ pushq %r8 /* pt_regs->r8 */
+ pushq %r9 /* pt_regs->r9 */
+ pushq %r10 /* pt_regs->r10 */
+ pushq %r11 /* pt_regs->r11 */
+ pushq %rbx /* pt_regs->rbx */
+ pushq %rbp /* pt_regs->rbp */
+ pushq %r12 /* pt_regs->r12 */
+ pushq %r13 /* pt_regs->r13 */
+ pushq %r14 /* pt_regs->r14 */
+ pushq %r15 /* pt_regs->r15 */
+
+ /*
+ * At this point we no longer need to worry about stack damage
+ * due to nesting -- we're on the normal thread stack and we're
+ * done with the NMI stack.
+ */
+
+ movq %rsp, %rdi
+ movq $-1, %rsi
+ call do_nmi
+
+ /*
+ * Return back to user mode. We must *not* do the normal exit
+ * work, because we don't want to enable interrupts. Fortunately,
+ * do_nmi doesn't modify pt_regs.
+ */
+ SWAPGS
+
+ /*
+ * Open-code the entire return process for compatibility with varying
+ * register layouts across different kernel versions.
+ */
+ addq $6*8, %rsp /* skip bx, bp, and r12-r15 */
+ popq %r11 /* pt_regs->r11 */
+ popq %r10 /* pt_regs->r10 */
+ popq %r9 /* pt_regs->r9 */
+ popq %r8 /* pt_regs->r8 */
+ popq %rax /* pt_regs->ax */
+ popq %rcx /* pt_regs->cx */
+ popq %rdx /* pt_regs->dx */
+ popq %rsi /* pt_regs->si */
+ popq %rdi /* pt_regs->di */
+ addq $8, %rsp /* skip orig_ax */
+ INTERRUPT_RETURN
+
+.Lnmi_from_kernel:
/*
* Check the special variable on the stack to see if NMIs are
* executing.
--
2.4.3

View File

@ -1,93 +0,0 @@
From 8f3e2dab9567ee8c4e104f484941a858f353ea02 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Fri, 10 Jul 2015 17:25:53 -0700
Subject: [PATCH 7/7] x86/nmi/64: Use DF to avoid userspace RSP confusing
nested NMI detection
commit dc68c0f2ec634b2cfecf879235564da58d422cee upstream.
We have a tricky bug in the nested NMI code: if we see RSP pointing
to the NMI stack on NMI entry from kernel mode, we assume that we
are executing a nested NMI.
This isn't quite true. A malicious userspace program can point RSP
at the NMI stack, issue SYSCALL, and arrange for an NMI to happen
while RSP is still pointing at the NMI stack.
Fix it with a sneaky trick. Set DF in the region of code that the RSP
check is intended to detect. IRET will clear DF atomically.
(Note: other than paravirt, there's little need for all this complexity.
We could check RIP instead of RSP.)
Fixes CVE-2015-3291.
Cc: stable@vger.kernel.org
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
[bwh: Backported to 4.0: adjust filename, context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
arch/x86/kernel/entry_64.S | 29 +++++++++++++++++++++++++----
1 file changed, 25 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index be2c4ba..ce4431c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1580,7 +1580,14 @@ ENTRY(nmi)
/*
* Now test if the previous stack was an NMI stack. This covers
* the case where we interrupt an outer NMI after it clears
- * "NMI executing" but before IRET.
+ * "NMI executing" but before IRET. We need to be careful, though:
+ * there is one case in which RSP could point to the NMI stack
+ * despite there being no NMI active: naughty userspace controls
+ * RSP at the very beginning of the SYSCALL targets. We can
+ * pull a fast one on naughty userspace, though: we program
+ * SYSCALL to mask DF, so userspace cannot cause DF to be set
+ * if it controls the kernel's RSP. We set DF before we clear
+ * "NMI executing".
*/
lea 6*8(%rsp), %rdx
/* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
@@ -1591,10 +1598,16 @@ ENTRY(nmi)
cmpq %rdx, 4*8(%rsp)
/* If it is below the NMI stack, it is a normal NMI */
jb first_nmi
- /* Ah, it is within the NMI stack, treat it as nested */
+
+ /* Ah, it is within the NMI stack. */
+
+ testb $(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp)
+ jz first_nmi /* RSP was user controlled. */
CFI_REMEMBER_STATE
+ /* This is a nested NMI. */
+
nested_nmi:
/*
* Modify the "iret" frame to point to repeat_nmi, forcing another
@@ -1709,8 +1722,16 @@ nmi_restore:
/* Point RSP at the "iret" frame. */
REMOVE_PT_GPREGS_FROM_STACK 6*8
- /* Clear "NMI executing". */
- movq $0, 5*8(%rsp)
+ /*
+ * Clear "NMI executing". Set DF first so that we can easily
+ * distinguish the remaining code between here and IRET from
+ * the SYSCALL entry and exit paths. On a native kernel, we
+ * could just inspect RIP, but, on paravirt kernels,
+ * INTERRUPT_RETURN can translate into a jump into a
+ * hypercall page.
+ */
+ std
+ movq $0, 5*8(%rsp) /* clear "NMI executing" */
/*
* INTERRUPT_RETURN reads the "iret" frame and exits the NMI
--
2.4.3

View File

@ -1,196 +0,0 @@
From 44c76628aaf7408eb93f2020d4aee0894f1d408e Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 15 Jul 2015 10:29:33 -0700
Subject: [PATCH 2/7] x86/nmi: Enable nested do_nmi() handling for 64-bit
kernels
32-bit kernels handle nested NMIs in C. Enable the exact same
handling on 64-bit kernels as well. This isn't currently
necessary, but it will become necessary once the asm code starts
allowing limited nesting.
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/x86/kernel/nmi.c | 123 +++++++++++++++++++++-----------------------------
1 file changed, 52 insertions(+), 71 deletions(-)
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index c3e985d..d8766b1 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -408,15 +408,15 @@ static void default_do_nmi(struct pt_regs *regs)
NOKPROBE_SYMBOL(default_do_nmi);
/*
- * NMIs can hit breakpoints which will cause it to lose its
- * NMI context with the CPU when the breakpoint does an iret.
- */
-#ifdef CONFIG_X86_32
-/*
- * For i386, NMIs use the same stack as the kernel, and we can
- * add a workaround to the iret problem in C (preventing nested
- * NMIs if an NMI takes a trap). Simply have 3 states the NMI
- * can be in:
+ * NMIs can hit breakpoints which will cause it to lose its NMI context
+ * with the CPU when the breakpoint or page fault does an IRET.
+ *
+ * As a result, NMIs can nest if NMIs get unmasked due an IRET during
+ * NMI processing. On x86_64, the asm glue protects us from nested NMIs
+ * if the outer NMI came from kernel mode, but we can still nest if the
+ * outer NMI came from user mode.
+ *
+ * To handle these nested NMIs, we have three states:
*
* 1) not running
* 2) executing
@@ -430,15 +430,14 @@ NOKPROBE_SYMBOL(default_do_nmi);
* (Note, the latch is binary, thus multiple NMIs triggering,
* when one is running, are ignored. Only one NMI is restarted.)
*
- * If an NMI hits a breakpoint that executes an iret, another
- * NMI can preempt it. We do not want to allow this new NMI
- * to run, but we want to execute it when the first one finishes.
- * We set the state to "latched", and the exit of the first NMI will
- * perform a dec_return, if the result is zero (NOT_RUNNING), then
- * it will simply exit the NMI handler. If not, the dec_return
- * would have set the state to NMI_EXECUTING (what we want it to
- * be when we are running). In this case, we simply jump back
- * to rerun the NMI handler again, and restart the 'latched' NMI.
+ * If an NMI executes an iret, another NMI can preempt it. We do not
+ * want to allow this new NMI to run, but we want to execute it when the
+ * first one finishes. We set the state to "latched", and the exit of
+ * the first NMI will perform a dec_return, if the result is zero
+ * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the
+ * dec_return would have set the state to NMI_EXECUTING (what we want it
+ * to be when we are running). In this case, we simply jump back to
+ * rerun the NMI handler again, and restart the 'latched' NMI.
*
* No trap (breakpoint or page fault) should be hit before nmi_restart,
* thus there is no race between the first check of state for NOT_RUNNING
@@ -461,49 +460,36 @@ enum nmi_states {
static DEFINE_PER_CPU(enum nmi_states, nmi_state);
static DEFINE_PER_CPU(unsigned long, nmi_cr2);
-#define nmi_nesting_preprocess(regs) \
- do { \
- if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \
- this_cpu_write(nmi_state, NMI_LATCHED); \
- return; \
- } \
- this_cpu_write(nmi_state, NMI_EXECUTING); \
- this_cpu_write(nmi_cr2, read_cr2()); \
- } while (0); \
- nmi_restart:
-
-#define nmi_nesting_postprocess() \
- do { \
- if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \
- write_cr2(this_cpu_read(nmi_cr2)); \
- if (this_cpu_dec_return(nmi_state)) \
- goto nmi_restart; \
- } while (0)
-#else /* x86_64 */
+#ifdef CONFIG_X86_64
/*
- * In x86_64 things are a bit more difficult. This has the same problem
- * where an NMI hitting a breakpoint that calls iret will remove the
- * NMI context, allowing a nested NMI to enter. What makes this more
- * difficult is that both NMIs and breakpoints have their own stack.
- * When a new NMI or breakpoint is executed, the stack is set to a fixed
- * point. If an NMI is nested, it will have its stack set at that same
- * fixed address that the first NMI had, and will start corrupting the
- * stack. This is handled in entry_64.S, but the same problem exists with
- * the breakpoint stack.
+ * In x86_64, we need to handle breakpoint -> NMI -> breakpoint. Without
+ * some care, the inner breakpoint will clobber the outer breakpoint's
+ * stack.
*
- * If a breakpoint is being processed, and the debug stack is being used,
- * if an NMI comes in and also hits a breakpoint, the stack pointer
- * will be set to the same fixed address as the breakpoint that was
- * interrupted, causing that stack to be corrupted. To handle this case,
- * check if the stack that was interrupted is the debug stack, and if
- * so, change the IDT so that new breakpoints will use the current stack
- * and not switch to the fixed address. On return of the NMI, switch back
- * to the original IDT.
+ * If a breakpoint is being processed, and the debug stack is being
+ * used, if an NMI comes in and also hits a breakpoint, the stack
+ * pointer will be set to the same fixed address as the breakpoint that
+ * was interrupted, causing that stack to be corrupted. To handle this
+ * case, check if the stack that was interrupted is the debug stack, and
+ * if so, change the IDT so that new breakpoints will use the current
+ * stack and not switch to the fixed address. On return of the NMI,
+ * switch back to the original IDT.
*/
static DEFINE_PER_CPU(int, update_debug_stack);
+#endif
-static inline void nmi_nesting_preprocess(struct pt_regs *regs)
+dotraplinkage notrace void
+do_nmi(struct pt_regs *regs, long error_code)
{
+ if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {
+ this_cpu_write(nmi_state, NMI_LATCHED);
+ return;
+ }
+ this_cpu_write(nmi_state, NMI_EXECUTING);
+ this_cpu_write(nmi_cr2, read_cr2());
+nmi_restart:
+
+#ifdef CONFIG_X86_64
/*
* If we interrupted a breakpoint, it is possible that
* the nmi handler will have breakpoints too. We need to
@@ -514,22 +500,8 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs)
debug_stack_set_zero();
this_cpu_write(update_debug_stack, 1);
}
-}
-
-static inline void nmi_nesting_postprocess(void)
-{
- if (unlikely(this_cpu_read(update_debug_stack))) {
- debug_stack_reset();
- this_cpu_write(update_debug_stack, 0);
- }
-}
#endif
-dotraplinkage notrace void
-do_nmi(struct pt_regs *regs, long error_code)
-{
- nmi_nesting_preprocess(regs);
-
nmi_enter();
inc_irq_stat(__nmi_count);
@@ -539,8 +511,17 @@ do_nmi(struct pt_regs *regs, long error_code)
nmi_exit();
- /* On i386, may loop back to preprocess */
- nmi_nesting_postprocess();
+#ifdef CONFIG_X86_64
+ if (unlikely(this_cpu_read(update_debug_stack))) {
+ debug_stack_reset();
+ this_cpu_write(update_debug_stack, 0);
+ }
+#endif
+
+ if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
+ write_cr2(this_cpu_read(nmi_cr2));
+ if (this_cpu_dec_return(nmi_state))
+ goto nmi_restart;
}
NOKPROBE_SYMBOL(do_nmi);
--
2.4.3