- CVE-2015-3290 CVE-2015-3291 NMI issues (rhbz 1243465 1245927)
This commit is contained in:
parent
829cdb4122
commit
61b3efdaf3
25
kernel.spec
25
kernel.spec
|
@ -634,6 +634,17 @@ Patch26252: pcmcia-fix-a-boot-time-warning-in-pcmcia-cs-code.patch
|
|||
#CVE-2015-1333 rhbz 1244171
|
||||
Patch26253: KEYS-ensure-we-free-the-assoc-array-edit-if-edit-is-.patch
|
||||
|
||||
# CVE-2015-3290 CVE-2015-3291 rhbz 1243465 1245927
|
||||
Patch26254: x86-asm-entry-64-Remove-pointless-jump-to-irq_return.patch
|
||||
Patch26255: x86-entry-Stop-using-PER_CPU_VAR-kernel_stack.patch
|
||||
Patch26256: x86-entry-Define-cpu_current_top_of_stack-for-64-bit.patch
|
||||
Patch26257: x86-nmi-Enable-nested-do_nmi-handling-for-64-bit-ker.patch
|
||||
Patch26258: x86-nmi-64-Remove-asm-code-that-saves-cr2.patch
|
||||
Patch26259: x86-nmi-64-Switch-stacks-on-userspace-NMI-entry.patch
|
||||
Patch26260: x86-nmi-64-Improve-nested-NMI-comments.patch
|
||||
Patch26261: x86-nmi-64-Reorder-nested-NMI-checks.patch
|
||||
Patch26262: x86-nmi-64-Use-DF-to-avoid-userspace-RSP-confusing-n.patch
|
||||
|
||||
# END OF PATCH DEFINITIONS
|
||||
|
||||
%endif
|
||||
|
@ -1386,6 +1397,17 @@ ApplyPatch pcmcia-fix-a-boot-time-warning-in-pcmcia-cs-code.patch
|
|||
#CVE-2015-1333 rhbz 1244171
|
||||
ApplyPatch KEYS-ensure-we-free-the-assoc-array-edit-if-edit-is-.patch
|
||||
|
||||
# CVE-2015-3290 CVE-2015-3291 rhbz 1243465 1245927
|
||||
ApplyPatch x86-asm-entry-64-Remove-pointless-jump-to-irq_return.patch
|
||||
ApplyPatch x86-entry-Stop-using-PER_CPU_VAR-kernel_stack.patch
|
||||
ApplyPatch x86-entry-Define-cpu_current_top_of_stack-for-64-bit.patch
|
||||
ApplyPatch x86-nmi-Enable-nested-do_nmi-handling-for-64-bit-ker.patch
|
||||
ApplyPatch x86-nmi-64-Remove-asm-code-that-saves-cr2.patch
|
||||
ApplyPatch x86-nmi-64-Switch-stacks-on-userspace-NMI-entry.patch
|
||||
ApplyPatch x86-nmi-64-Improve-nested-NMI-comments.patch
|
||||
ApplyPatch x86-nmi-64-Reorder-nested-NMI-checks.patch
|
||||
ApplyPatch x86-nmi-64-Use-DF-to-avoid-userspace-RSP-confusing-n.patch
|
||||
|
||||
# END OF PATCH APPLICATIONS
|
||||
|
||||
%endif
|
||||
|
@ -2236,6 +2258,9 @@ fi
|
|||
#
|
||||
#
|
||||
%changelog
|
||||
* Mon Jul 27 2015 Laura Abbott <labbott@fedoraproject.org>
|
||||
- CVE-2015-3290 CVE-2015-3291 NMI issues (rhbz 1243465 1245927)
|
||||
|
||||
* Mon Jul 27 2015 Josh Boyer <jwboyer@fedoraproject.org>
|
||||
- CVE-2015-1333 add_key memory leak (rhbz 1244171)
|
||||
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
From 480ea71e661b2a48a8d28ea4327cafa666c267ff Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Thu, 4 Jun 2015 13:24:29 -0700
|
||||
Subject: [PATCH 1/7] x86/asm/entry/64: Remove pointless jump to irq_return
|
||||
|
||||
INTERRUPT_RETURN turns into a jmp instruction. There's no need
|
||||
for extra indirection.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: <linux-kernel@vger.kernel.org>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Andy Lutomirski <luto@amacapital.net>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Brian Gerst <brgerst@gmail.com>
|
||||
Cc: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: http://lkml.kernel.org/r/2f2318653dbad284a59311f13f08cea71298fd7c.1433449436.git.luto@kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
---
|
||||
arch/x86/kernel/entry_64.S | 4 +---
|
||||
1 file changed, 1 insertion(+), 3 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
|
||||
index 02c2eff..b32f346 100644
|
||||
--- a/arch/x86/kernel/entry_64.S
|
||||
+++ b/arch/x86/kernel/entry_64.S
|
||||
@@ -793,8 +793,6 @@ retint_kernel:
|
||||
restore_c_regs_and_iret:
|
||||
RESTORE_C_REGS
|
||||
REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
-
|
||||
-irq_return:
|
||||
INTERRUPT_RETURN
|
||||
|
||||
ENTRY(native_iret)
|
||||
@@ -1640,7 +1638,7 @@ nmi_restore:
|
||||
|
||||
/* Clear the NMI executing stack variable */
|
||||
movq $0, 5*8(%rsp)
|
||||
- jmp irq_return
|
||||
+ INTERRUPT_RETURN
|
||||
CFI_ENDPROC
|
||||
END(nmi)
|
||||
|
||||
--
|
||||
2.4.3
|
||||
|
|
@ -0,0 +1,129 @@
|
|||
From e154db431700ef8e9a29c0e88fa8a11b8dc8fc1e Mon Sep 17 00:00:00 2001
|
||||
From: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Date: Fri, 24 Apr 2015 17:31:35 +0200
|
||||
Subject: [PATCH 3/9] x86/entry: Define 'cpu_current_top_of_stack' for 64-bit
|
||||
code
|
||||
|
||||
32-bit code has PER_CPU_VAR(cpu_current_top_of_stack).
|
||||
64-bit code uses somewhat more obscure: PER_CPU_VAR(cpu_tss + TSS_sp0).
|
||||
|
||||
Define the 'cpu_current_top_of_stack' macro on CONFIG_X86_64
|
||||
as well so that the PER_CPU_VAR(cpu_current_top_of_stack)
|
||||
expression can be used in both 32-bit and 64-bit code.
|
||||
|
||||
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: Alexei Starovoitov <ast@plumgrid.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Andy Lutomirski <luto@amacapital.net>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Frederic Weisbecker <fweisbec@gmail.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Kees Cook <keescook@chromium.org>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Oleg Nesterov <oleg@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Will Drewry <wad@chromium.org>
|
||||
Link: http://lkml.kernel.org/r/1429889495-27850-3-git-send-email-dvlasenk@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
---
|
||||
arch/x86/ia32/ia32entry.S | 4 ++--
|
||||
arch/x86/include/asm/thread_info.h | 10 ++++------
|
||||
arch/x86/kernel/entry_64.S | 2 +-
|
||||
arch/x86/xen/xen-asm_64.S | 5 +++--
|
||||
4 files changed, 10 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
|
||||
index e3c0f06..1c30fa9 100644
|
||||
--- a/arch/x86/ia32/ia32entry.S
|
||||
+++ b/arch/x86/ia32/ia32entry.S
|
||||
@@ -119,7 +119,7 @@ ENTRY(ia32_sysenter_target)
|
||||
* it is too small to ever cause noticeable irq latency.
|
||||
*/
|
||||
SWAPGS_UNSAFE_STACK
|
||||
- movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
|
||||
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
|
||||
/* Zero-extending 32-bit regs, do not remove */
|
||||
@@ -356,7 +356,7 @@ ENTRY(ia32_cstar_target)
|
||||
SWAPGS_UNSAFE_STACK
|
||||
movl %esp,%r8d
|
||||
CFI_REGISTER rsp,r8
|
||||
- movq PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp
|
||||
+ movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
|
||||
/* Zero-extending 32-bit regs, do not remove */
|
||||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
|
||||
index d656a36..9ececd0 100644
|
||||
--- a/arch/x86/include/asm/thread_info.h
|
||||
+++ b/arch/x86/include/asm/thread_info.h
|
||||
@@ -197,16 +197,14 @@ static inline unsigned long current_stack_pointer(void)
|
||||
|
||||
#else /* !__ASSEMBLY__ */
|
||||
|
||||
+#ifdef CONFIG_X86_64
|
||||
+# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
|
||||
+#endif
|
||||
+
|
||||
/* Load thread_info address into "reg" */
|
||||
-#ifdef CONFIG_X86_32
|
||||
#define GET_THREAD_INFO(reg) \
|
||||
_ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
|
||||
_ASM_SUB $(THREAD_SIZE),reg ;
|
||||
-#else
|
||||
-#define GET_THREAD_INFO(reg) \
|
||||
- _ASM_MOV PER_CPU_VAR(cpu_tss + TSS_sp0),reg ; \
|
||||
- _ASM_SUB $(THREAD_SIZE),reg ;
|
||||
-#endif
|
||||
|
||||
/*
|
||||
* ASM operand which evaluates to a 'thread_info' address of
|
||||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
|
||||
index 00cde3c..61e0f27 100644
|
||||
--- a/arch/x86/kernel/entry_64.S
|
||||
+++ b/arch/x86/kernel/entry_64.S
|
||||
@@ -216,7 +216,7 @@ ENTRY(system_call)
|
||||
GLOBAL(system_call_after_swapgs)
|
||||
|
||||
movq %rsp,PER_CPU_VAR(rsp_scratch)
|
||||
- movq PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp
|
||||
+ movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp
|
||||
|
||||
/* Construct struct pt_regs on stack */
|
||||
pushq_cfi $__USER_DS /* pt_regs->ss */
|
||||
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
|
||||
index acc49e0..5e15e92 100644
|
||||
--- a/arch/x86/xen/xen-asm_64.S
|
||||
+++ b/arch/x86/xen/xen-asm_64.S
|
||||
@@ -16,6 +16,7 @@
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/segment.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
+#include <asm/thread_info.h>
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
|
||||
@@ -70,7 +71,7 @@ ENTRY(xen_sysret64)
|
||||
* still with the kernel gs, so we can easily switch back
|
||||
*/
|
||||
movq %rsp, PER_CPU_VAR(rsp_scratch)
|
||||
- movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
|
||||
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
pushq $__USER_DS
|
||||
pushq PER_CPU_VAR(rsp_scratch)
|
||||
@@ -89,7 +90,7 @@ ENTRY(xen_sysret32)
|
||||
* still with the kernel gs, so we can easily switch back
|
||||
*/
|
||||
movq %rsp, PER_CPU_VAR(rsp_scratch)
|
||||
- movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
|
||||
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
pushq $__USER32_DS
|
||||
pushq PER_CPU_VAR(rsp_scratch)
|
||||
--
|
||||
2.4.3
|
||||
|
|
@ -0,0 +1,115 @@
|
|||
From 252c6cff2b83c4cc969b64306956a5c8c130a50d Mon Sep 17 00:00:00 2001
|
||||
From: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Date: Fri, 24 Apr 2015 17:31:33 +0200
|
||||
Subject: [PATCH 2/9] x86/entry: Stop using PER_CPU_VAR(kernel_stack)
|
||||
|
||||
PER_CPU_VAR(kernel_stack) is redundant:
|
||||
|
||||
- On the 64-bit build, we can use PER_CPU_VAR(cpu_tss + TSS_sp0).
|
||||
- On the 32-bit build, we can use PER_CPU_VAR(cpu_current_top_of_stack).
|
||||
|
||||
PER_CPU_VAR(kernel_stack) will be deleted by a separate change.
|
||||
|
||||
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
|
||||
Cc: Alexei Starovoitov <ast@plumgrid.com>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Andy Lutomirski <luto@amacapital.net>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Frederic Weisbecker <fweisbec@gmail.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Kees Cook <keescook@chromium.org>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Oleg Nesterov <oleg@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Will Drewry <wad@chromium.org>
|
||||
Link: http://lkml.kernel.org/r/1429889495-27850-1-git-send-email-dvlasenk@redhat.com
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
---
|
||||
arch/x86/ia32/ia32entry.S | 2 +-
|
||||
arch/x86/include/asm/thread_info.h | 8 +++++++-
|
||||
arch/x86/kernel/entry_64.S | 2 +-
|
||||
arch/x86/xen/xen-asm_64.S | 5 +++--
|
||||
4 files changed, 12 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
|
||||
index 72bf268..e3c0f06 100644
|
||||
--- a/arch/x86/ia32/ia32entry.S
|
||||
+++ b/arch/x86/ia32/ia32entry.S
|
||||
@@ -356,7 +356,7 @@ ENTRY(ia32_cstar_target)
|
||||
SWAPGS_UNSAFE_STACK
|
||||
movl %esp,%r8d
|
||||
CFI_REGISTER rsp,r8
|
||||
- movq PER_CPU_VAR(kernel_stack),%rsp
|
||||
+ movq PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
|
||||
/* Zero-extending 32-bit regs, do not remove */
|
||||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
|
||||
index b4bdec3..d656a36 100644
|
||||
--- a/arch/x86/include/asm/thread_info.h
|
||||
+++ b/arch/x86/include/asm/thread_info.h
|
||||
@@ -198,9 +198,15 @@ static inline unsigned long current_stack_pointer(void)
|
||||
#else /* !__ASSEMBLY__ */
|
||||
|
||||
/* Load thread_info address into "reg" */
|
||||
+#ifdef CONFIG_X86_32
|
||||
#define GET_THREAD_INFO(reg) \
|
||||
- _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
|
||||
+ _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
|
||||
_ASM_SUB $(THREAD_SIZE),reg ;
|
||||
+#else
|
||||
+#define GET_THREAD_INFO(reg) \
|
||||
+ _ASM_MOV PER_CPU_VAR(cpu_tss + TSS_sp0),reg ; \
|
||||
+ _ASM_SUB $(THREAD_SIZE),reg ;
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* ASM operand which evaluates to a 'thread_info' address of
|
||||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
|
||||
index b32f346..00cde3c 100644
|
||||
--- a/arch/x86/kernel/entry_64.S
|
||||
+++ b/arch/x86/kernel/entry_64.S
|
||||
@@ -216,7 +216,7 @@ ENTRY(system_call)
|
||||
GLOBAL(system_call_after_swapgs)
|
||||
|
||||
movq %rsp,PER_CPU_VAR(rsp_scratch)
|
||||
- movq PER_CPU_VAR(kernel_stack),%rsp
|
||||
+ movq PER_CPU_VAR(cpu_tss + TSS_sp0),%rsp
|
||||
|
||||
/* Construct struct pt_regs on stack */
|
||||
pushq_cfi $__USER_DS /* pt_regs->ss */
|
||||
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
|
||||
index 985fc3e..acc49e0 100644
|
||||
--- a/arch/x86/xen/xen-asm_64.S
|
||||
+++ b/arch/x86/xen/xen-asm_64.S
|
||||
@@ -15,6 +15,7 @@
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/segment.h>
|
||||
+#include <asm/asm-offsets.h>
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
|
||||
@@ -69,7 +70,7 @@ ENTRY(xen_sysret64)
|
||||
* still with the kernel gs, so we can easily switch back
|
||||
*/
|
||||
movq %rsp, PER_CPU_VAR(rsp_scratch)
|
||||
- movq PER_CPU_VAR(kernel_stack), %rsp
|
||||
+ movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
|
||||
|
||||
pushq $__USER_DS
|
||||
pushq PER_CPU_VAR(rsp_scratch)
|
||||
@@ -88,7 +89,7 @@ ENTRY(xen_sysret32)
|
||||
* still with the kernel gs, so we can easily switch back
|
||||
*/
|
||||
movq %rsp, PER_CPU_VAR(rsp_scratch)
|
||||
- movq PER_CPU_VAR(kernel_stack), %rsp
|
||||
+ movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
|
||||
|
||||
pushq $__USER32_DS
|
||||
pushq PER_CPU_VAR(rsp_scratch)
|
||||
--
|
||||
2.4.3
|
||||
|
|
@ -0,0 +1,290 @@
|
|||
From 7f340812f17f9c24519254fdaa88dd7b54fc3b59 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Fri, 10 Jul 2015 17:13:26 -0700
|
||||
Subject: [PATCH 5/7] x86/nmi/64: Improve nested NMI comments
|
||||
|
||||
commit ed02eaa10579ffd480c3bda29701e658f17196e9 upstream.
|
||||
|
||||
I found the nested NMI documentation to be difficult to follow.
|
||||
Improve the comments.
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
[bwh: Backported to 4.0: adjust filename, context]
|
||||
[labbott: Context fixup around nmi_restore]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
arch/x86/kernel/entry_64.S | 160 ++++++++++++++++++++++++++-------------------
|
||||
arch/x86/kernel/nmi.c | 4 +-
|
||||
2 files changed, 94 insertions(+), 70 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
|
||||
index 4fabbaa..152790c 100644
|
||||
--- a/arch/x86/kernel/entry_64.S
|
||||
+++ b/arch/x86/kernel/entry_64.S
|
||||
@@ -1411,11 +1411,12 @@ ENTRY(nmi)
|
||||
* If the variable is not set and the stack is not the NMI
|
||||
* stack then:
|
||||
* o Set the special variable on the stack
|
||||
- * o Copy the interrupt frame into a "saved" location on the stack
|
||||
- * o Copy the interrupt frame into a "copy" location on the stack
|
||||
+ * o Copy the interrupt frame into an "outermost" location on the
|
||||
+ * stack
|
||||
+ * o Copy the interrupt frame into an "iret" location on the stack
|
||||
* o Continue processing the NMI
|
||||
* If the variable is set or the previous stack is the NMI stack:
|
||||
- * o Modify the "copy" location to jump to the repeate_nmi
|
||||
+ * o Modify the "iret" location to jump to the repeat_nmi
|
||||
* o return back to the first NMI
|
||||
*
|
||||
* Now on exit of the first NMI, we first clear the stack variable
|
||||
@@ -1509,18 +1510,60 @@ ENTRY(nmi)
|
||||
|
||||
.Lnmi_from_kernel:
|
||||
/*
|
||||
- * Check the special variable on the stack to see if NMIs are
|
||||
- * executing.
|
||||
+ * Here's what our stack frame will look like:
|
||||
+ * +---------------------------------------------------------+
|
||||
+ * | original SS |
|
||||
+ * | original Return RSP |
|
||||
+ * | original RFLAGS |
|
||||
+ * | original CS |
|
||||
+ * | original RIP |
|
||||
+ * +---------------------------------------------------------+
|
||||
+ * | temp storage for rdx |
|
||||
+ * +---------------------------------------------------------+
|
||||
+ * | "NMI executing" variable |
|
||||
+ * +---------------------------------------------------------+
|
||||
+ * | iret SS } Copied from "outermost" frame |
|
||||
+ * | iret Return RSP } on each loop iteration; overwritten |
|
||||
+ * | iret RFLAGS } by a nested NMI to force another |
|
||||
+ * | iret CS } iteration if needed. |
|
||||
+ * | iret RIP } |
|
||||
+ * +---------------------------------------------------------+
|
||||
+ * | outermost SS } initialized in first_nmi; |
|
||||
+ * | outermost Return RSP } will not be changed before |
|
||||
+ * | outermost RFLAGS } NMI processing is done. |
|
||||
+ * | outermost CS } Copied to "iret" frame on each |
|
||||
+ * | outermost RIP } iteration. |
|
||||
+ * +---------------------------------------------------------+
|
||||
+ * | pt_regs |
|
||||
+ * +---------------------------------------------------------+
|
||||
+ *
|
||||
+ * The "original" frame is used by hardware. Before re-enabling
|
||||
+ * NMIs, we need to be done with it, and we need to leave enough
|
||||
+ * space for the asm code here.
|
||||
+ *
|
||||
+ * We return by executing IRET while RSP points to the "iret" frame.
|
||||
+ * That will either return for real or it will loop back into NMI
|
||||
+ * processing.
|
||||
+ *
|
||||
+ * The "outermost" frame is copied to the "iret" frame on each
|
||||
+ * iteration of the loop, so each iteration starts with the "iret"
|
||||
+ * frame pointing to the final return target.
|
||||
+ */
|
||||
+
|
||||
+ /*
|
||||
+ * Determine whether we're a nested NMI.
|
||||
+ *
|
||||
+ * First check "NMI executing". If it's set, then we're nested.
|
||||
+ * This will not detect if we interrupted an outer NMI just
|
||||
+ * before IRET.
|
||||
*/
|
||||
cmpl $1, -8(%rsp)
|
||||
je nested_nmi
|
||||
|
||||
/*
|
||||
- * Now test if the previous stack was an NMI stack.
|
||||
- * We need the double check. We check the NMI stack to satisfy the
|
||||
- * race when the first NMI clears the variable before returning.
|
||||
- * We check the variable because the first NMI could be in a
|
||||
- * breakpoint routine using a breakpoint stack.
|
||||
+ * Now test if the previous stack was an NMI stack. This covers
|
||||
+ * the case where we interrupt an outer NMI after it clears
|
||||
+ * "NMI executing" but before IRET.
|
||||
*/
|
||||
lea 6*8(%rsp), %rdx
|
||||
/* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
|
||||
@@ -1537,9 +1580,11 @@ ENTRY(nmi)
|
||||
|
||||
nested_nmi:
|
||||
/*
|
||||
- * Do nothing if we interrupted the fixup in repeat_nmi.
|
||||
- * It's about to repeat the NMI handler, so we are fine
|
||||
- * with ignoring this one.
|
||||
+ * If we interrupted an NMI that is between repeat_nmi and
|
||||
+ * end_repeat_nmi, then we must not modify the "iret" frame
|
||||
+ * because it's being written by the outer NMI. That's okay:
|
||||
+ * the outer NMI handler is about to call do_nmi anyway,
|
||||
+ * so we can just resume the outer NMI.
|
||||
*/
|
||||
movq $repeat_nmi, %rdx
|
||||
cmpq 8(%rsp), %rdx
|
||||
@@ -1549,7 +1594,10 @@ nested_nmi:
|
||||
ja nested_nmi_out
|
||||
|
||||
1:
|
||||
- /* Set up the interrupted NMIs stack to jump to repeat_nmi */
|
||||
+ /*
|
||||
+ * Modify the "iret" frame to point to repeat_nmi, forcing another
|
||||
+ * iteration of NMI handling.
|
||||
+ */
|
||||
leaq -1*8(%rsp), %rdx
|
||||
movq %rdx, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET 1*8
|
||||
@@ -1568,60 +1616,23 @@ nested_nmi_out:
|
||||
popq_cfi %rdx
|
||||
CFI_RESTORE rdx
|
||||
|
||||
- /* No need to check faults here */
|
||||
+ /* We are returning to kernel mode, so this cannot result in a fault. */
|
||||
INTERRUPT_RETURN
|
||||
|
||||
CFI_RESTORE_STATE
|
||||
first_nmi:
|
||||
- /*
|
||||
- * Because nested NMIs will use the pushed location that we
|
||||
- * stored in rdx, we must keep that space available.
|
||||
- * Here's what our stack frame will look like:
|
||||
- * +-------------------------+
|
||||
- * | original SS |
|
||||
- * | original Return RSP |
|
||||
- * | original RFLAGS |
|
||||
- * | original CS |
|
||||
- * | original RIP |
|
||||
- * +-------------------------+
|
||||
- * | temp storage for rdx |
|
||||
- * +-------------------------+
|
||||
- * | NMI executing variable |
|
||||
- * +-------------------------+
|
||||
- * | copied SS |
|
||||
- * | copied Return RSP |
|
||||
- * | copied RFLAGS |
|
||||
- * | copied CS |
|
||||
- * | copied RIP |
|
||||
- * +-------------------------+
|
||||
- * | Saved SS |
|
||||
- * | Saved Return RSP |
|
||||
- * | Saved RFLAGS |
|
||||
- * | Saved CS |
|
||||
- * | Saved RIP |
|
||||
- * +-------------------------+
|
||||
- * | pt_regs |
|
||||
- * +-------------------------+
|
||||
- *
|
||||
- * The saved stack frame is used to fix up the copied stack frame
|
||||
- * that a nested NMI may change to make the interrupted NMI iret jump
|
||||
- * to the repeat_nmi. The original stack frame and the temp storage
|
||||
- * is also used by nested NMIs and can not be trusted on exit.
|
||||
- */
|
||||
- /* Do not pop rdx, nested NMIs will corrupt that part of the stack */
|
||||
+ /* Restore rdx. */
|
||||
movq (%rsp), %rdx
|
||||
CFI_RESTORE rdx
|
||||
|
||||
- /* Set the NMI executing variable on the stack. */
|
||||
+ /* Set "NMI executing" on the stack. */
|
||||
pushq_cfi $1
|
||||
|
||||
- /*
|
||||
- * Leave room for the "copied" frame
|
||||
- */
|
||||
+ /* Leave room for the "iret" frame */
|
||||
subq $(5*8), %rsp
|
||||
CFI_ADJUST_CFA_OFFSET 5*8
|
||||
|
||||
- /* Copy the stack frame to the Saved frame */
|
||||
+ /* Copy the "original" frame to the "outermost" frame */
|
||||
.rept 5
|
||||
pushq_cfi 11*8(%rsp)
|
||||
.endr
|
||||
@@ -1629,6 +1640,7 @@ first_nmi:
|
||||
|
||||
/* Everything up to here is safe from nested NMIs */
|
||||
|
||||
+repeat_nmi:
|
||||
/*
|
||||
* If there was a nested NMI, the first NMI's iret will return
|
||||
* here. But NMIs are still enabled and we can take another
|
||||
@@ -1637,16 +1649,21 @@ first_nmi:
|
||||
* it will just return, as we are about to repeat an NMI anyway.
|
||||
* This makes it safe to copy to the stack frame that a nested
|
||||
* NMI will update.
|
||||
- */
|
||||
-repeat_nmi:
|
||||
- /*
|
||||
- * Update the stack variable to say we are still in NMI (the update
|
||||
- * is benign for the non-repeat case, where 1 was pushed just above
|
||||
- * to this very stack slot).
|
||||
+ *
|
||||
+ * RSP is pointing to "outermost RIP". gsbase is unknown, but, if
|
||||
+ * we're repeating an NMI, gsbase has the same value that it had on
|
||||
+ * the first iteration. paranoid_entry will load the kernel
|
||||
+ * gsbase if needed before we call do_nmi.
|
||||
+ *
|
||||
+ * Set "NMI executing" in case we came back here via IRET.
|
||||
*/
|
||||
movq $1, 10*8(%rsp)
|
||||
|
||||
- /* Make another copy, this one may be modified by nested NMIs */
|
||||
+ /*
|
||||
+ * Copy the "outermost" frame to the "iret" frame. NMIs that nest
|
||||
+ * here must not modify the "iret" frame while we're writing to
|
||||
+ * it or it will end up containing garbage.
|
||||
+ */
|
||||
addq $(10*8), %rsp
|
||||
CFI_ADJUST_CFA_OFFSET -10*8
|
||||
.rept 5
|
||||
@@ -1657,9 +1674,9 @@ repeat_nmi:
|
||||
end_repeat_nmi:
|
||||
|
||||
/*
|
||||
- * Everything below this point can be preempted by a nested
|
||||
- * NMI if the first NMI took an exception and reset our iret stack
|
||||
- * so that we repeat another NMI.
|
||||
+ * Everything below this point can be preempted by a nested NMI.
|
||||
+ * If this happens, then the inner NMI will change the "iret"
|
||||
+ * frame to point back to repeat_nmi.
|
||||
*/
|
||||
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
|
||||
ALLOC_PT_GPREGS_ON_STACK
|
||||
@@ -1686,11 +1703,18 @@ nmi_swapgs:
|
||||
nmi_restore:
|
||||
RESTORE_EXTRA_REGS
|
||||
RESTORE_C_REGS
|
||||
- /* Pop the extra iret frame at once */
|
||||
+
|
||||
+ /* Point RSP at the "iret" frame. */
|
||||
REMOVE_PT_GPREGS_FROM_STACK 6*8
|
||||
|
||||
- /* Clear the NMI executing stack variable */
|
||||
+ /* Clear "NMI executing". */
|
||||
movq $0, 5*8(%rsp)
|
||||
+
|
||||
+ /*
|
||||
+ * INTERRUPT_RETURN reads the "iret" frame and exits the NMI
|
||||
+ * stack in a single instruction. We are returning to kernel
|
||||
+ * mode, so this cannot result in a fault.
|
||||
+ */
|
||||
INTERRUPT_RETURN
|
||||
CFI_ENDPROC
|
||||
END(nmi)
|
||||
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
|
||||
index d8766b1..d05bd2e 100644
|
||||
--- a/arch/x86/kernel/nmi.c
|
||||
+++ b/arch/x86/kernel/nmi.c
|
||||
@@ -408,8 +408,8 @@ static void default_do_nmi(struct pt_regs *regs)
|
||||
NOKPROBE_SYMBOL(default_do_nmi);
|
||||
|
||||
/*
|
||||
- * NMIs can hit breakpoints which will cause it to lose its NMI context
|
||||
- * with the CPU when the breakpoint or page fault does an IRET.
|
||||
+ * NMIs can page fault or hit breakpoints which will cause it to lose
|
||||
+ * its NMI context with the CPU when the breakpoint or page fault does an IRET.
|
||||
*
|
||||
* As a result, NMIs can nest if NMIs get unmasked due an IRET during
|
||||
* NMI processing. On x86_64, the asm glue protects us from nested NMIs
|
||||
--
|
||||
2.4.3
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
From 827b728c1e4aec28a115b6be56a0ac2407e9dd05 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Fri, 10 Jul 2015 12:03:34 -0700
|
||||
Subject: [PATCH 3/7] x86/nmi/64: Remove asm code that saves cr2
|
||||
|
||||
commit e7c2c90651fd54c3ca499fbb065ea5cbac30047d upstream.
|
||||
|
||||
Now that do_nmi saves cr2, we don't need to save it in asm.
|
||||
|
||||
This is a prerequisity for the fix for CVE-2015-3290.
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Acked-by: Borislav Petkov <bp@suse.de>
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
[bwh: Backported to 4.0: adjust filename, context]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
arch/x86/kernel/entry_64.S | 18 ------------------
|
||||
1 file changed, 18 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
|
||||
index b32f346..34f07d7 100644
|
||||
--- a/arch/x86/kernel/entry_64.S
|
||||
+++ b/arch/x86/kernel/entry_64.S
|
||||
@@ -1603,29 +1603,11 @@ end_repeat_nmi:
|
||||
call paranoid_entry
|
||||
DEFAULT_FRAME 0
|
||||
|
||||
- /*
|
||||
- * Save off the CR2 register. If we take a page fault in the NMI then
|
||||
- * it could corrupt the CR2 value. If the NMI preempts a page fault
|
||||
- * handler before it was able to read the CR2 register, and then the
|
||||
- * NMI itself takes a page fault, the page fault that was preempted
|
||||
- * will read the information from the NMI page fault and not the
|
||||
- * origin fault. Save it off and restore it if it changes.
|
||||
- * Use the r12 callee-saved register.
|
||||
- */
|
||||
- movq %cr2, %r12
|
||||
-
|
||||
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
|
||||
movq %rsp,%rdi
|
||||
movq $-1,%rsi
|
||||
call do_nmi
|
||||
|
||||
- /* Did the NMI take a page fault? Restore cr2 if it did */
|
||||
- movq %cr2, %rcx
|
||||
- cmpq %rcx, %r12
|
||||
- je 1f
|
||||
- movq %r12, %cr2
|
||||
-1:
|
||||
-
|
||||
testl %ebx,%ebx /* swapgs needed? */
|
||||
jnz nmi_restore
|
||||
nmi_swapgs:
|
||||
--
|
||||
2.4.3
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
From dc46e3fb4b597196182d4b1f5d97d0dcca5018b6 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Sun, 12 Jul 2015 20:59:57 -0700
|
||||
Subject: [PATCH 6/7] x86/nmi/64: Reorder nested NMI checks
|
||||
|
||||
commit b7dcb27674b28ca49b710e95da74c44d32154bed upstream.
|
||||
|
||||
Check the repeat_nmi .. end_repeat_nmi special case first. The next
|
||||
patch will rework the RSP check and, as a side effect, the RSP check
|
||||
will no longer detect repeat_nmi .. end_repeat_nmi, so we'll need
|
||||
this ordering of the checks.
|
||||
|
||||
Note: this is more subtle than it appears. The check for repeat_nmi
|
||||
.. end_repeat_nmi jumps straight out of the NMI code instead of
|
||||
adjusting the "iret" frame to force a repeat. This is necessary,
|
||||
because the code between repeat_nmi and end_repeat_nmi sets "NMI
|
||||
executing" and then writes to the "iret" frame itself. If a nested
|
||||
NMI comes in and modifies the "iret" frame while repeat_nmi is also
|
||||
modifying it, we'll end up with garbage. The old code got this
|
||||
right, as does the new code, but the new code is a bit more
|
||||
explicit.
|
||||
|
||||
If we were to move the check right after the "NMI executing" check,
|
||||
then we'd get it wrong and have random crashes.
|
||||
|
||||
This is a prerequisite for the fix for CVE-2015-3291.
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
[bwh: Backported to 4.0: adjust filename, spacing]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
arch/x86/kernel/entry_64.S | 34 ++++++++++++++++++----------------
|
||||
1 file changed, 18 insertions(+), 16 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
|
||||
index 152790c..be2c4ba 100644
|
||||
--- a/arch/x86/kernel/entry_64.S
|
||||
+++ b/arch/x86/kernel/entry_64.S
|
||||
@@ -1553,7 +1553,24 @@ ENTRY(nmi)
|
||||
/*
|
||||
* Determine whether we're a nested NMI.
|
||||
*
|
||||
- * First check "NMI executing". If it's set, then we're nested.
|
||||
+ * If we interrupted kernel code between repeat_nmi and
|
||||
+ * end_repeat_nmi, then we are a nested NMI. We must not
|
||||
+ * modify the "iret" frame because it's being written by
|
||||
+ * the outer NMI. That's okay: the outer NMI handler is
|
||||
+ * about to about to call do_nmi anyway, so we can just
|
||||
+ * resume the outer NMI.
|
||||
+ */
|
||||
+
|
||||
+ movq $repeat_nmi, %rdx
|
||||
+ cmpq 8(%rsp), %rdx
|
||||
+ ja 1f
|
||||
+ movq $end_repeat_nmi, %rdx
|
||||
+ cmpq 8(%rsp), %rdx
|
||||
+ ja nested_nmi_out
|
||||
+1:
|
||||
+
|
||||
+ /*
|
||||
+ * Now check "NMI executing". If it's set, then we're nested.
|
||||
* This will not detect if we interrupted an outer NMI just
|
||||
* before IRET.
|
||||
*/
|
||||
@@ -1580,21 +1597,6 @@ ENTRY(nmi)
|
||||
|
||||
nested_nmi:
|
||||
/*
|
||||
- * If we interrupted an NMI that is between repeat_nmi and
|
||||
- * end_repeat_nmi, then we must not modify the "iret" frame
|
||||
- * because it's being written by the outer NMI. That's okay:
|
||||
- * the outer NMI handler is about to call do_nmi anyway,
|
||||
- * so we can just resume the outer NMI.
|
||||
- */
|
||||
- movq $repeat_nmi, %rdx
|
||||
- cmpq 8(%rsp), %rdx
|
||||
- ja 1f
|
||||
- movq $end_repeat_nmi, %rdx
|
||||
- cmpq 8(%rsp), %rdx
|
||||
- ja nested_nmi_out
|
||||
-
|
||||
-1:
|
||||
- /*
|
||||
* Modify the "iret" frame to point to repeat_nmi, forcing another
|
||||
* iteration of NMI handling.
|
||||
*/
|
||||
--
|
||||
2.4.3
|
||||
|
|
@ -0,0 +1,140 @@
|
|||
From f1a66b1c4d231e2e9d3a1f7affecb35df3ce0464 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Fri, 10 Jul 2015 11:35:31 -0700
|
||||
Subject: [PATCH 6/9] x86/nmi/64: Switch stacks on userspace NMI entry
|
||||
|
||||
commit 4fb2a8d9cb0efcd7405f1ad105d7f3c764afe02f upstream.
|
||||
|
||||
Returning to userspace is tricky: IRET can fail, and ESPFIX can
|
||||
rearrange the stack prior to IRET.
|
||||
|
||||
The NMI nesting fixup relies on a precise stack layout and atomic
|
||||
IRET. Rather than trying to teach the NMI nesting fixup to handle
|
||||
ESPFIX and failed IRET, punt: run NMIs that came from user mode on
|
||||
the normal kernel stack.
|
||||
|
||||
This will make some nested NMIs visible to C code, but the C code is
|
||||
okay with that.
|
||||
|
||||
As a side effect, this should speed up perf: it eliminates an RDMSR
|
||||
when NMIs come from user mode.
|
||||
|
||||
Fixes CVE-2015-3290.
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Reviewed-by: Borislav Petkov <bp@suse.de>
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
[bwh: Backported to 4.0:
|
||||
- Adjust filename, context
|
||||
- s/restore_c_regs_and_iret/restore_args/
|
||||
- Use kernel_stack + KERNEL_STACK_OFFSET instead of cpu_current_top_of_stack]
|
||||
[luto: Open-coded return path to avoid dependency on partial pt_regs details]
|
||||
[labbott: just use cpu_current_top_of_stack instead]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
---
|
||||
arch/x86/kernel/entry_64.S | 78 +++++++++++++++++++++++++++++++++++++++++++---
|
||||
1 file changed, 74 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
|
||||
index 6916856..75988f4 100644
|
||||
--- a/arch/x86/kernel/entry_64.S
|
||||
+++ b/arch/x86/kernel/entry_64.S
|
||||
@@ -1424,19 +1424,89 @@ ENTRY(nmi)
|
||||
* a nested NMI that updated the copy interrupt stack frame, a
|
||||
* jump will be made to the repeat_nmi code that will handle the second
|
||||
* NMI.
|
||||
+ *
|
||||
+ * However, espfix prevents us from directly returning to userspace
|
||||
+ * with a single IRET instruction. Similarly, IRET to user mode
|
||||
+ * can fault. We therefore handle NMIs from user space like
|
||||
+ * other IST entries.
|
||||
*/
|
||||
|
||||
/* Use %rdx as our temp variable throughout */
|
||||
pushq_cfi %rdx
|
||||
CFI_REL_OFFSET rdx, 0
|
||||
|
||||
+ testb $3, CS-RIP+8(%rsp)
|
||||
+ jz .Lnmi_from_kernel
|
||||
+
|
||||
/*
|
||||
- * If %cs was not the kernel segment, then the NMI triggered in user
|
||||
- * space, which means it is definitely not nested.
|
||||
+ * NMI from user mode. We need to run on the thread stack, but we
|
||||
+ * can't go through the normal entry paths: NMIs are masked, and
|
||||
+ * we don't want to enable interrupts, because then we'll end
|
||||
+ * up in an awkward situation in which IRQs are on but NMIs
|
||||
+ * are off.
|
||||
*/
|
||||
- cmpl $__KERNEL_CS, 16(%rsp)
|
||||
- jne first_nmi
|
||||
|
||||
+ SWAPGS
|
||||
+ cld
|
||||
+ movq %rsp, %rdx
|
||||
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
+ pushq 5*8(%rdx) /* pt_regs->ss */
|
||||
+ pushq 4*8(%rdx) /* pt_regs->rsp */
|
||||
+ pushq 3*8(%rdx) /* pt_regs->flags */
|
||||
+ pushq 2*8(%rdx) /* pt_regs->cs */
|
||||
+ pushq 1*8(%rdx) /* pt_regs->rip */
|
||||
+ pushq $-1 /* pt_regs->orig_ax */
|
||||
+ pushq %rdi /* pt_regs->di */
|
||||
+ pushq %rsi /* pt_regs->si */
|
||||
+ pushq (%rdx) /* pt_regs->dx */
|
||||
+ pushq %rcx /* pt_regs->cx */
|
||||
+ pushq %rax /* pt_regs->ax */
|
||||
+ pushq %r8 /* pt_regs->r8 */
|
||||
+ pushq %r9 /* pt_regs->r9 */
|
||||
+ pushq %r10 /* pt_regs->r10 */
|
||||
+ pushq %r11 /* pt_regs->r11 */
|
||||
+ pushq %rbx /* pt_regs->rbx */
|
||||
+ pushq %rbp /* pt_regs->rbp */
|
||||
+ pushq %r12 /* pt_regs->r12 */
|
||||
+ pushq %r13 /* pt_regs->r13 */
|
||||
+ pushq %r14 /* pt_regs->r14 */
|
||||
+ pushq %r15 /* pt_regs->r15 */
|
||||
+
|
||||
+ /*
|
||||
+ * At this point we no longer need to worry about stack damage
|
||||
+ * due to nesting -- we're on the normal thread stack and we're
|
||||
+ * done with the NMI stack.
|
||||
+ */
|
||||
+
|
||||
+ movq %rsp, %rdi
|
||||
+ movq $-1, %rsi
|
||||
+ call do_nmi
|
||||
+
|
||||
+ /*
|
||||
+ * Return back to user mode. We must *not* do the normal exit
|
||||
+ * work, because we don't want to enable interrupts. Fortunately,
|
||||
+ * do_nmi doesn't modify pt_regs.
|
||||
+ */
|
||||
+ SWAPGS
|
||||
+
|
||||
+ /*
|
||||
+ * Open-code the entire return process for compatibility with varying
|
||||
+ * register layouts across different kernel versions.
|
||||
+ */
|
||||
+ addq $6*8, %rsp /* skip bx, bp, and r12-r15 */
|
||||
+ popq %r11 /* pt_regs->r11 */
|
||||
+ popq %r10 /* pt_regs->r10 */
|
||||
+ popq %r9 /* pt_regs->r9 */
|
||||
+ popq %r8 /* pt_regs->r8 */
|
||||
+ popq %rax /* pt_regs->ax */
|
||||
+ popq %rcx /* pt_regs->cx */
|
||||
+ popq %rdx /* pt_regs->dx */
|
||||
+ popq %rsi /* pt_regs->si */
|
||||
+ popq %rdi /* pt_regs->di */
|
||||
+ addq $8, %rsp /* skip orig_ax */
|
||||
+ INTERRUPT_RETURN
|
||||
+
|
||||
+.Lnmi_from_kernel:
|
||||
/*
|
||||
* Check the special variable on the stack to see if NMIs are
|
||||
* executing.
|
||||
--
|
||||
2.4.3
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
From 8f3e2dab9567ee8c4e104f484941a858f353ea02 Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Fri, 10 Jul 2015 17:25:53 -0700
|
||||
Subject: [PATCH 7/7] x86/nmi/64: Use DF to avoid userspace RSP confusing
|
||||
nested NMI detection
|
||||
|
||||
commit dc68c0f2ec634b2cfecf879235564da58d422cee upstream.
|
||||
|
||||
We have a tricky bug in the nested NMI code: if we see RSP pointing
|
||||
to the NMI stack on NMI entry from kernel mode, we assume that we
|
||||
are executing a nested NMI.
|
||||
|
||||
This isn't quite true. A malicious userspace program can point RSP
|
||||
at the NMI stack, issue SYSCALL, and arrange for an NMI to happen
|
||||
while RSP is still pointing at the NMI stack.
|
||||
|
||||
Fix it with a sneaky trick. Set DF in the region of code that the RSP
|
||||
check is intended to detect. IRET will clear DF atomically.
|
||||
|
||||
(Note: other than paravirt, there's little need for all this complexity.
|
||||
We could check RIP instead of RSP.)
|
||||
|
||||
Fixes CVE-2015-3291.
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
[bwh: Backported to 4.0: adjust filename, context]
|
||||
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
---
|
||||
arch/x86/kernel/entry_64.S | 29 +++++++++++++++++++++++++----
|
||||
1 file changed, 25 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
|
||||
index be2c4ba..ce4431c 100644
|
||||
--- a/arch/x86/kernel/entry_64.S
|
||||
+++ b/arch/x86/kernel/entry_64.S
|
||||
@@ -1580,7 +1580,14 @@ ENTRY(nmi)
|
||||
/*
|
||||
* Now test if the previous stack was an NMI stack. This covers
|
||||
* the case where we interrupt an outer NMI after it clears
|
||||
- * "NMI executing" but before IRET.
|
||||
+ * "NMI executing" but before IRET. We need to be careful, though:
|
||||
+ * there is one case in which RSP could point to the NMI stack
|
||||
+ * despite there being no NMI active: naughty userspace controls
|
||||
+ * RSP at the very beginning of the SYSCALL targets. We can
|
||||
+ * pull a fast one on naughty userspace, though: we program
|
||||
+ * SYSCALL to mask DF, so userspace cannot cause DF to be set
|
||||
+ * if it controls the kernel's RSP. We set DF before we clear
|
||||
+ * "NMI executing".
|
||||
*/
|
||||
lea 6*8(%rsp), %rdx
|
||||
/* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
|
||||
@@ -1591,10 +1598,16 @@ ENTRY(nmi)
|
||||
cmpq %rdx, 4*8(%rsp)
|
||||
/* If it is below the NMI stack, it is a normal NMI */
|
||||
jb first_nmi
|
||||
- /* Ah, it is within the NMI stack, treat it as nested */
|
||||
+
|
||||
+ /* Ah, it is within the NMI stack. */
|
||||
+
|
||||
+ testb $(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp)
|
||||
+ jz first_nmi /* RSP was user controlled. */
|
||||
|
||||
CFI_REMEMBER_STATE
|
||||
|
||||
+ /* This is a nested NMI. */
|
||||
+
|
||||
nested_nmi:
|
||||
/*
|
||||
* Modify the "iret" frame to point to repeat_nmi, forcing another
|
||||
@@ -1709,8 +1722,16 @@ nmi_restore:
|
||||
/* Point RSP at the "iret" frame. */
|
||||
REMOVE_PT_GPREGS_FROM_STACK 6*8
|
||||
|
||||
- /* Clear "NMI executing". */
|
||||
- movq $0, 5*8(%rsp)
|
||||
+ /*
|
||||
+ * Clear "NMI executing". Set DF first so that we can easily
|
||||
+ * distinguish the remaining code between here and IRET from
|
||||
+ * the SYSCALL entry and exit paths. On a native kernel, we
|
||||
+ * could just inspect RIP, but, on paravirt kernels,
|
||||
+ * INTERRUPT_RETURN can translate into a jump into a
|
||||
+ * hypercall page.
|
||||
+ */
|
||||
+ std
|
||||
+ movq $0, 5*8(%rsp) /* clear "NMI executing" */
|
||||
|
||||
/*
|
||||
* INTERRUPT_RETURN reads the "iret" frame and exits the NMI
|
||||
--
|
||||
2.4.3
|
||||
|
|
@ -0,0 +1,196 @@
|
|||
From 44c76628aaf7408eb93f2020d4aee0894f1d408e Mon Sep 17 00:00:00 2001
|
||||
From: Andy Lutomirski <luto@kernel.org>
|
||||
Date: Wed, 15 Jul 2015 10:29:33 -0700
|
||||
Subject: [PATCH 2/7] x86/nmi: Enable nested do_nmi() handling for 64-bit
|
||||
kernels
|
||||
|
||||
32-bit kernels handle nested NMIs in C. Enable the exact same
|
||||
handling on 64-bit kernels as well. This isn't currently
|
||||
necessary, but it will become necessary once the asm code starts
|
||||
allowing limited nesting.
|
||||
|
||||
Signed-off-by: Andy Lutomirski <luto@kernel.org>
|
||||
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Cc: Borislav Petkov <bp@suse.de>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: stable@vger.kernel.org
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
---
|
||||
arch/x86/kernel/nmi.c | 123 +++++++++++++++++++++-----------------------------
|
||||
1 file changed, 52 insertions(+), 71 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
|
||||
index c3e985d..d8766b1 100644
|
||||
--- a/arch/x86/kernel/nmi.c
|
||||
+++ b/arch/x86/kernel/nmi.c
|
||||
@@ -408,15 +408,15 @@ static void default_do_nmi(struct pt_regs *regs)
|
||||
NOKPROBE_SYMBOL(default_do_nmi);
|
||||
|
||||
/*
|
||||
- * NMIs can hit breakpoints which will cause it to lose its
|
||||
- * NMI context with the CPU when the breakpoint does an iret.
|
||||
- */
|
||||
-#ifdef CONFIG_X86_32
|
||||
-/*
|
||||
- * For i386, NMIs use the same stack as the kernel, and we can
|
||||
- * add a workaround to the iret problem in C (preventing nested
|
||||
- * NMIs if an NMI takes a trap). Simply have 3 states the NMI
|
||||
- * can be in:
|
||||
+ * NMIs can hit breakpoints which will cause it to lose its NMI context
|
||||
+ * with the CPU when the breakpoint or page fault does an IRET.
|
||||
+ *
|
||||
+ * As a result, NMIs can nest if NMIs get unmasked due an IRET during
|
||||
+ * NMI processing. On x86_64, the asm glue protects us from nested NMIs
|
||||
+ * if the outer NMI came from kernel mode, but we can still nest if the
|
||||
+ * outer NMI came from user mode.
|
||||
+ *
|
||||
+ * To handle these nested NMIs, we have three states:
|
||||
*
|
||||
* 1) not running
|
||||
* 2) executing
|
||||
@@ -430,15 +430,14 @@ NOKPROBE_SYMBOL(default_do_nmi);
|
||||
* (Note, the latch is binary, thus multiple NMIs triggering,
|
||||
* when one is running, are ignored. Only one NMI is restarted.)
|
||||
*
|
||||
- * If an NMI hits a breakpoint that executes an iret, another
|
||||
- * NMI can preempt it. We do not want to allow this new NMI
|
||||
- * to run, but we want to execute it when the first one finishes.
|
||||
- * We set the state to "latched", and the exit of the first NMI will
|
||||
- * perform a dec_return, if the result is zero (NOT_RUNNING), then
|
||||
- * it will simply exit the NMI handler. If not, the dec_return
|
||||
- * would have set the state to NMI_EXECUTING (what we want it to
|
||||
- * be when we are running). In this case, we simply jump back
|
||||
- * to rerun the NMI handler again, and restart the 'latched' NMI.
|
||||
+ * If an NMI executes an iret, another NMI can preempt it. We do not
|
||||
+ * want to allow this new NMI to run, but we want to execute it when the
|
||||
+ * first one finishes. We set the state to "latched", and the exit of
|
||||
+ * the first NMI will perform a dec_return, if the result is zero
|
||||
+ * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the
|
||||
+ * dec_return would have set the state to NMI_EXECUTING (what we want it
|
||||
+ * to be when we are running). In this case, we simply jump back to
|
||||
+ * rerun the NMI handler again, and restart the 'latched' NMI.
|
||||
*
|
||||
* No trap (breakpoint or page fault) should be hit before nmi_restart,
|
||||
* thus there is no race between the first check of state for NOT_RUNNING
|
||||
@@ -461,49 +460,36 @@ enum nmi_states {
|
||||
static DEFINE_PER_CPU(enum nmi_states, nmi_state);
|
||||
static DEFINE_PER_CPU(unsigned long, nmi_cr2);
|
||||
|
||||
-#define nmi_nesting_preprocess(regs) \
|
||||
- do { \
|
||||
- if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \
|
||||
- this_cpu_write(nmi_state, NMI_LATCHED); \
|
||||
- return; \
|
||||
- } \
|
||||
- this_cpu_write(nmi_state, NMI_EXECUTING); \
|
||||
- this_cpu_write(nmi_cr2, read_cr2()); \
|
||||
- } while (0); \
|
||||
- nmi_restart:
|
||||
-
|
||||
-#define nmi_nesting_postprocess() \
|
||||
- do { \
|
||||
- if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \
|
||||
- write_cr2(this_cpu_read(nmi_cr2)); \
|
||||
- if (this_cpu_dec_return(nmi_state)) \
|
||||
- goto nmi_restart; \
|
||||
- } while (0)
|
||||
-#else /* x86_64 */
|
||||
+#ifdef CONFIG_X86_64
|
||||
/*
|
||||
- * In x86_64 things are a bit more difficult. This has the same problem
|
||||
- * where an NMI hitting a breakpoint that calls iret will remove the
|
||||
- * NMI context, allowing a nested NMI to enter. What makes this more
|
||||
- * difficult is that both NMIs and breakpoints have their own stack.
|
||||
- * When a new NMI or breakpoint is executed, the stack is set to a fixed
|
||||
- * point. If an NMI is nested, it will have its stack set at that same
|
||||
- * fixed address that the first NMI had, and will start corrupting the
|
||||
- * stack. This is handled in entry_64.S, but the same problem exists with
|
||||
- * the breakpoint stack.
|
||||
+ * In x86_64, we need to handle breakpoint -> NMI -> breakpoint. Without
|
||||
+ * some care, the inner breakpoint will clobber the outer breakpoint's
|
||||
+ * stack.
|
||||
*
|
||||
- * If a breakpoint is being processed, and the debug stack is being used,
|
||||
- * if an NMI comes in and also hits a breakpoint, the stack pointer
|
||||
- * will be set to the same fixed address as the breakpoint that was
|
||||
- * interrupted, causing that stack to be corrupted. To handle this case,
|
||||
- * check if the stack that was interrupted is the debug stack, and if
|
||||
- * so, change the IDT so that new breakpoints will use the current stack
|
||||
- * and not switch to the fixed address. On return of the NMI, switch back
|
||||
- * to the original IDT.
|
||||
+ * If a breakpoint is being processed, and the debug stack is being
|
||||
+ * used, if an NMI comes in and also hits a breakpoint, the stack
|
||||
+ * pointer will be set to the same fixed address as the breakpoint that
|
||||
+ * was interrupted, causing that stack to be corrupted. To handle this
|
||||
+ * case, check if the stack that was interrupted is the debug stack, and
|
||||
+ * if so, change the IDT so that new breakpoints will use the current
|
||||
+ * stack and not switch to the fixed address. On return of the NMI,
|
||||
+ * switch back to the original IDT.
|
||||
*/
|
||||
static DEFINE_PER_CPU(int, update_debug_stack);
|
||||
+#endif
|
||||
|
||||
-static inline void nmi_nesting_preprocess(struct pt_regs *regs)
|
||||
+dotraplinkage notrace void
|
||||
+do_nmi(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
+ if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {
|
||||
+ this_cpu_write(nmi_state, NMI_LATCHED);
|
||||
+ return;
|
||||
+ }
|
||||
+ this_cpu_write(nmi_state, NMI_EXECUTING);
|
||||
+ this_cpu_write(nmi_cr2, read_cr2());
|
||||
+nmi_restart:
|
||||
+
|
||||
+#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* If we interrupted a breakpoint, it is possible that
|
||||
* the nmi handler will have breakpoints too. We need to
|
||||
@@ -514,22 +500,8 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs)
|
||||
debug_stack_set_zero();
|
||||
this_cpu_write(update_debug_stack, 1);
|
||||
}
|
||||
-}
|
||||
-
|
||||
-static inline void nmi_nesting_postprocess(void)
|
||||
-{
|
||||
- if (unlikely(this_cpu_read(update_debug_stack))) {
|
||||
- debug_stack_reset();
|
||||
- this_cpu_write(update_debug_stack, 0);
|
||||
- }
|
||||
-}
|
||||
#endif
|
||||
|
||||
-dotraplinkage notrace void
|
||||
-do_nmi(struct pt_regs *regs, long error_code)
|
||||
-{
|
||||
- nmi_nesting_preprocess(regs);
|
||||
-
|
||||
nmi_enter();
|
||||
|
||||
inc_irq_stat(__nmi_count);
|
||||
@@ -539,8 +511,17 @@ do_nmi(struct pt_regs *regs, long error_code)
|
||||
|
||||
nmi_exit();
|
||||
|
||||
- /* On i386, may loop back to preprocess */
|
||||
- nmi_nesting_postprocess();
|
||||
+#ifdef CONFIG_X86_64
|
||||
+ if (unlikely(this_cpu_read(update_debug_stack))) {
|
||||
+ debug_stack_reset();
|
||||
+ this_cpu_write(update_debug_stack, 0);
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
+ if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
|
||||
+ write_cr2(this_cpu_read(nmi_cr2));
|
||||
+ if (this_cpu_dec_return(nmi_state))
|
||||
+ goto nmi_restart;
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_nmi);
|
||||
|
||||
--
|
||||
2.4.3
|
||||
|
Loading…
Reference in New Issue