6543 lines
206 KiB
Diff
6543 lines
206 KiB
Diff
Documentation/admin-guide/kernel-parameters.txt | 25 ++
|
|
Makefile | 4 +
|
|
arch/arm/Kconfig | 4 +-
|
|
arch/arm64/Kconfig | 3 +-
|
|
arch/s390/include/asm/ipl.h | 1 +
|
|
arch/s390/kernel/ipl.c | 5 +
|
|
arch/s390/kernel/setup.c | 4 +
|
|
arch/um/kernel/um_arch.c | 4 +
|
|
arch/x86/Kconfig | 103 ++++-
|
|
arch/x86/Makefile | 6 +
|
|
arch/x86/boot/header.S | 4 +
|
|
arch/x86/entry/Makefile | 2 +-
|
|
arch/x86/entry/calling.h | 72 +++-
|
|
arch/x86/entry/entry.S | 22 +
|
|
arch/x86/entry/entry_32.S | 2 -
|
|
arch/x86/entry/entry_64.S | 88 +++-
|
|
arch/x86/entry/entry_64_compat.S | 21 +-
|
|
arch/x86/entry/vdso/Makefile | 1 +
|
|
arch/x86/entry/vsyscall/vsyscall_emu_64.S | 9 +-
|
|
arch/x86/include/asm/alternative.h | 1 +
|
|
arch/x86/include/asm/cpufeatures.h | 12 +-
|
|
arch/x86/include/asm/disabled-features.h | 21 +-
|
|
arch/x86/include/asm/efi.h | 5 +
|
|
arch/x86/include/asm/linkage.h | 8 +
|
|
arch/x86/include/asm/msr-index.h | 13 +
|
|
arch/x86/include/asm/nospec-branch.h | 69 ++-
|
|
arch/x86/include/asm/static_call.h | 19 +-
|
|
arch/x86/include/asm/traps.h | 2 +-
|
|
arch/x86/include/asm/unwind_hints.h | 14 +-
|
|
arch/x86/kernel/alternative.c | 69 +++
|
|
arch/x86/kernel/asm-offsets.c | 6 +
|
|
arch/x86/kernel/cpu/amd.c | 46 +-
|
|
arch/x86/kernel/cpu/bugs.c | 475 ++++++++++++++++++---
|
|
arch/x86/kernel/cpu/common.c | 61 ++-
|
|
arch/x86/kernel/cpu/cpu.h | 2 +
|
|
arch/x86/kernel/cpu/hygon.c | 6 +
|
|
arch/x86/kernel/cpu/scattered.c | 1 +
|
|
arch/x86/kernel/ftrace.c | 7 +-
|
|
arch/x86/kernel/head_32.S | 1 +
|
|
arch/x86/kernel/head_64.S | 5 +
|
|
arch/x86/kernel/module.c | 8 +-
|
|
arch/x86/kernel/process.c | 2 +-
|
|
arch/x86/kernel/relocate_kernel_32.S | 25 +-
|
|
arch/x86/kernel/relocate_kernel_64.S | 23 +-
|
|
arch/x86/kernel/setup.c | 22 +-
|
|
arch/x86/kernel/static_call.c | 51 ++-
|
|
arch/x86/kernel/traps.c | 19 +-
|
|
arch/x86/kernel/vmlinux.lds.S | 9 +-
|
|
arch/x86/kvm/emulate.c | 35 +-
|
|
arch/x86/kvm/svm/vmenter.S | 18 +
|
|
arch/x86/kvm/vmx/capabilities.h | 4 +-
|
|
arch/x86/kvm/vmx/nested.c | 2 +-
|
|
arch/x86/kvm/vmx/run_flags.h | 8 +
|
|
arch/x86/kvm/vmx/vmenter.S | 194 +++++----
|
|
arch/x86/kvm/vmx/vmx.c | 84 ++--
|
|
arch/x86/kvm/vmx/vmx.h | 10 +-
|
|
arch/x86/kvm/vmx/vmx_ops.h | 2 +-
|
|
arch/x86/kvm/x86.c | 4 +-
|
|
arch/x86/lib/memmove_64.S | 7 +-
|
|
arch/x86/lib/retpoline.S | 79 +++-
|
|
arch/x86/mm/mem_encrypt_boot.S | 10 +-
|
|
arch/x86/net/bpf_jit_comp.c | 26 +-
|
|
arch/x86/platform/efi/efi_thunk_64.S | 5 +-
|
|
arch/x86/xen/setup.c | 6 +-
|
|
arch/x86/xen/xen-asm.S | 30 +-
|
|
arch/x86/xen/xen-head.S | 1 +
|
|
arch/x86/xen/xen-ops.h | 6 +-
|
|
drivers/acpi/apei/hest.c | 8 +
|
|
drivers/acpi/irq.c | 17 +-
|
|
drivers/acpi/scan.c | 9 +
|
|
drivers/ata/libahci.c | 18 +
|
|
drivers/base/cpu.c | 8 +
|
|
drivers/char/ipmi/ipmi_dmi.c | 15 +
|
|
drivers/char/ipmi/ipmi_msghandler.c | 16 +-
|
|
drivers/firmware/efi/Kconfig | 12 +
|
|
drivers/firmware/efi/Makefile | 1 +
|
|
drivers/firmware/efi/efi.c | 124 ++++--
|
|
drivers/firmware/efi/libstub/efistub.h | 74 ++++
|
|
drivers/firmware/efi/libstub/x86-stub.c | 119 +++++-
|
|
drivers/firmware/efi/secureboot.c | 38 ++
|
|
drivers/firmware/sysfb.c | 18 +-
|
|
drivers/gpu/drm/i915/display/intel_psr.c | 9 +
|
|
drivers/hid/hid-rmi.c | 64 ---
|
|
drivers/hwtracing/coresight/coresight-etm4x-core.c | 19 +
|
|
drivers/idle/intel_idle.c | 44 +-
|
|
drivers/input/rmi4/rmi_driver.c | 124 +++---
|
|
drivers/iommu/iommu.c | 22 +
|
|
drivers/nvme/host/core.c | 22 +-
|
|
drivers/nvme/host/multipath.c | 19 +-
|
|
drivers/nvme/host/nvme.h | 4 +
|
|
drivers/pci/quirks.c | 24 ++
|
|
drivers/usb/core/hub.c | 7 +
|
|
include/linux/cpu.h | 2 +
|
|
include/linux/efi.h | 24 +-
|
|
include/linux/kvm_host.h | 2 +-
|
|
include/linux/lsm_hook_defs.h | 2 +
|
|
include/linux/lsm_hooks.h | 6 +
|
|
include/linux/objtool.h | 9 +-
|
|
include/linux/rmi.h | 1 +
|
|
include/linux/security.h | 5 +
|
|
init/Kconfig | 2 +-
|
|
kernel/module_signing.c | 9 +-
|
|
scripts/Makefile.build | 1 +
|
|
scripts/link-vmlinux.sh | 3 +
|
|
scripts/tags.sh | 2 +
|
|
security/Kconfig | 11 -
|
|
security/integrity/platform_certs/load_uefi.c | 6 +-
|
|
security/lockdown/Kconfig | 13 +
|
|
security/lockdown/lockdown.c | 1 +
|
|
security/security.c | 6 +
|
|
tools/arch/x86/include/asm/cpufeatures.h | 12 +-
|
|
tools/arch/x86/include/asm/disabled-features.h | 21 +-
|
|
tools/arch/x86/include/asm/msr-index.h | 13 +
|
|
tools/include/linux/objtool.h | 9 +-
|
|
tools/objtool/arch/x86/decode.c | 5 +
|
|
tools/objtool/builtin-check.c | 4 +-
|
|
tools/objtool/check.c | 331 +++++++++++++-
|
|
tools/objtool/include/objtool/arch.h | 1 +
|
|
tools/objtool/include/objtool/builtin.h | 2 +-
|
|
tools/objtool/include/objtool/check.h | 24 +-
|
|
tools/objtool/include/objtool/elf.h | 1 +
|
|
tools/objtool/include/objtool/objtool.h | 1 +
|
|
tools/objtool/objtool.c | 1 +
|
|
123 files changed, 2656 insertions(+), 622 deletions(-)
|
|
|
|
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
|
|
index c4893782055b..eb92195ca015 100644
|
|
--- a/Documentation/admin-guide/kernel-parameters.txt
|
|
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
|
@@ -5124,6 +5124,30 @@
|
|
|
|
retain_initrd [RAM] Keep initrd memory after extraction
|
|
|
|
+ retbleed= [X86] Control mitigation of RETBleed (Arbitrary
|
|
+ Speculative Code Execution with Return Instructions)
|
|
+ vulnerability.
|
|
+
|
|
+ off - no mitigation
|
|
+ auto - automatically select a migitation
|
|
+ auto,nosmt - automatically select a mitigation,
|
|
+ disabling SMT if necessary for
|
|
+ the full mitigation (only on Zen1
|
|
+ and older without STIBP).
|
|
+ ibpb - mitigate short speculation windows on
|
|
+ basic block boundaries too. Safe, highest
|
|
+ perf impact.
|
|
+ unret - force enable untrained return thunks,
|
|
+ only effective on AMD f15h-f17h
|
|
+ based systems.
|
|
+ unret,nosmt - like unret, will disable SMT when STIBP
|
|
+ is not available.
|
|
+
|
|
+ Selecting 'auto' will choose a mitigation method at run
|
|
+ time according to the CPU.
|
|
+
|
|
+ Not specifying this option is equivalent to retbleed=auto.
|
|
+
|
|
rfkill.default_state=
|
|
0 "airplane mode". All wifi, bluetooth, wimax, gps, fm,
|
|
etc. communication is blocked by default.
|
|
@@ -5482,6 +5506,7 @@
|
|
eibrs - enhanced IBRS
|
|
eibrs,retpoline - enhanced IBRS + Retpolines
|
|
eibrs,lfence - enhanced IBRS + LFENCE
|
|
+ ibrs - use IBRS to protect kernel
|
|
|
|
Not specifying this option is equivalent to
|
|
spectre_v2=auto.
|
|
diff --git a/Makefile b/Makefile
|
|
index 1f3c753cb28d..89ed649fae1b 100644
|
|
--- a/Makefile
|
|
+++ b/Makefile
|
|
@@ -18,6 +18,10 @@ $(if $(filter __%, $(MAKECMDGOALS)), \
|
|
PHONY := __all
|
|
__all:
|
|
|
|
+# Set RHEL variables
|
|
+# Use this spot to avoid future merge conflicts
|
|
+include Makefile.rhelver
|
|
+
|
|
# We are using a recursive build, so we need to do a little thinking
|
|
# to get the ordering right.
|
|
#
|
|
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
|
|
index 2e8091e2d8a8..32e0403314b1 100644
|
|
--- a/arch/arm/Kconfig
|
|
+++ b/arch/arm/Kconfig
|
|
@@ -1486,9 +1486,9 @@ config HIGHMEM
|
|
If unsure, say n.
|
|
|
|
config HIGHPTE
|
|
- bool "Allocate 2nd-level pagetables from highmem" if EXPERT
|
|
+ bool "Allocate 2nd-level pagetables from highmem"
|
|
depends on HIGHMEM
|
|
- default y
|
|
+ default n
|
|
help
|
|
The VM uses one page of physical memory for each page table.
|
|
For systems with a lot of processes, this can use a lot of
|
|
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
|
|
index 20ea89d9ac2f..774c1a05152d 100644
|
|
--- a/arch/arm64/Kconfig
|
|
+++ b/arch/arm64/Kconfig
|
|
@@ -1126,7 +1126,7 @@ endchoice
|
|
|
|
config ARM64_FORCE_52BIT
|
|
bool "Force 52-bit virtual addresses for userspace"
|
|
- depends on ARM64_VA_BITS_52 && EXPERT
|
|
+ depends on ARM64_VA_BITS_52
|
|
help
|
|
For systems with 52-bit userspace VAs enabled, the kernel will attempt
|
|
to maintain compatibility with older software by providing 48-bit VAs
|
|
@@ -1365,6 +1365,7 @@ config XEN
|
|
config FORCE_MAX_ZONEORDER
|
|
int
|
|
default "14" if ARM64_64K_PAGES
|
|
+ default "13" if (ARCH_THUNDER && !ARM64_64K_PAGES)
|
|
default "12" if ARM64_16K_PAGES
|
|
default "11"
|
|
help
|
|
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
|
|
index 3f8ee257f9aa..3ab92feb6241 100644
|
|
--- a/arch/s390/include/asm/ipl.h
|
|
+++ b/arch/s390/include/asm/ipl.h
|
|
@@ -128,6 +128,7 @@ int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf,
|
|
unsigned char flags, unsigned short cert);
|
|
int ipl_report_add_certificate(struct ipl_report *report, void *key,
|
|
unsigned long addr, unsigned long len);
|
|
+bool ipl_get_secureboot(void);
|
|
|
|
/*
|
|
* DIAG 308 support
|
|
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
|
|
index 1cc85b8ff42e..b7ee128c67ce 100644
|
|
--- a/arch/s390/kernel/ipl.c
|
|
+++ b/arch/s390/kernel/ipl.c
|
|
@@ -2216,3 +2216,8 @@ int ipl_report_free(struct ipl_report *report)
|
|
}
|
|
|
|
#endif
|
|
+
|
|
+bool ipl_get_secureboot(void)
|
|
+{
|
|
+ return !!ipl_secure_flag;
|
|
+}
|
|
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
|
|
index 2cef49983e9e..c50998b4b554 100644
|
|
--- a/arch/s390/kernel/setup.c
|
|
+++ b/arch/s390/kernel/setup.c
|
|
@@ -49,6 +49,7 @@
|
|
#include <linux/memory.h>
|
|
#include <linux/compat.h>
|
|
#include <linux/start_kernel.h>
|
|
+#include <linux/security.h>
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/kmemleak.h>
|
|
|
|
@@ -970,6 +971,9 @@ void __init setup_arch(char **cmdline_p)
|
|
|
|
log_component_list();
|
|
|
|
+ if (ipl_get_secureboot())
|
|
+ security_lock_kernel_down("Secure IPL mode", LOCKDOWN_INTEGRITY_MAX);
|
|
+
|
|
/* Have one command line that is parsed and saved in /proc/cmdline */
|
|
/* boot_command_line has been already set up in early.c */
|
|
*cmdline_p = boot_command_line;
|
|
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
|
|
index 0760e24f2eba..9838967d0b2f 100644
|
|
--- a/arch/um/kernel/um_arch.c
|
|
+++ b/arch/um/kernel/um_arch.c
|
|
@@ -432,6 +432,10 @@ void apply_retpolines(s32 *start, s32 *end)
|
|
{
|
|
}
|
|
|
|
+void apply_returns(s32 *start, s32 *end)
|
|
+{
|
|
+}
|
|
+
|
|
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
|
|
{
|
|
}
|
|
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
|
|
index b2c65f573353..4d1d87f76a74 100644
|
|
--- a/arch/x86/Kconfig
|
|
+++ b/arch/x86/Kconfig
|
|
@@ -457,27 +457,6 @@ config GOLDFISH
|
|
def_bool y
|
|
depends on X86_GOLDFISH
|
|
|
|
-config RETPOLINE
|
|
- bool "Avoid speculative indirect branches in kernel"
|
|
- default y
|
|
- help
|
|
- Compile kernel with the retpoline compiler options to guard against
|
|
- kernel-to-user data leaks by avoiding speculative indirect
|
|
- branches. Requires a compiler with -mindirect-branch=thunk-extern
|
|
- support for full protection. The kernel may run slower.
|
|
-
|
|
-config CC_HAS_SLS
|
|
- def_bool $(cc-option,-mharden-sls=all)
|
|
-
|
|
-config SLS
|
|
- bool "Mitigate Straight-Line-Speculation"
|
|
- depends on CC_HAS_SLS && X86_64
|
|
- default n
|
|
- help
|
|
- Compile the kernel with straight-line-speculation options to guard
|
|
- against straight line speculation. The kernel image might be slightly
|
|
- larger.
|
|
-
|
|
config X86_CPU_RESCTRL
|
|
bool "x86 CPU resource control support"
|
|
depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
|
|
@@ -2449,6 +2428,88 @@ source "kernel/livepatch/Kconfig"
|
|
|
|
endmenu
|
|
|
|
+config CC_HAS_SLS
|
|
+ def_bool $(cc-option,-mharden-sls=all)
|
|
+
|
|
+config CC_HAS_RETURN_THUNK
|
|
+ def_bool $(cc-option,-mfunction-return=thunk-extern)
|
|
+
|
|
+menuconfig SPECULATION_MITIGATIONS
|
|
+ bool "Mitigations for speculative execution vulnerabilities"
|
|
+ default y
|
|
+ help
|
|
+ Say Y here to enable options which enable mitigations for
|
|
+ speculative execution hardware vulnerabilities.
|
|
+
|
|
+ If you say N, all mitigations will be disabled. You really
|
|
+ should know what you are doing to say so.
|
|
+
|
|
+if SPECULATION_MITIGATIONS
|
|
+
|
|
+config PAGE_TABLE_ISOLATION
|
|
+ bool "Remove the kernel mapping in user mode"
|
|
+ default y
|
|
+ depends on (X86_64 || X86_PAE)
|
|
+ help
|
|
+ This feature reduces the number of hardware side channels by
|
|
+ ensuring that the majority of kernel addresses are not mapped
|
|
+ into userspace.
|
|
+
|
|
+ See Documentation/x86/pti.rst for more details.
|
|
+
|
|
+config RETPOLINE
|
|
+ bool "Avoid speculative indirect branches in kernel"
|
|
+ default y
|
|
+ help
|
|
+ Compile kernel with the retpoline compiler options to guard against
|
|
+ kernel-to-user data leaks by avoiding speculative indirect
|
|
+ branches. Requires a compiler with -mindirect-branch=thunk-extern
|
|
+ support for full protection. The kernel may run slower.
|
|
+
|
|
+config RETHUNK
|
|
+ bool "Enable return-thunks"
|
|
+ depends on RETPOLINE && CC_HAS_RETURN_THUNK
|
|
+ default y
|
|
+ help
|
|
+ Compile the kernel with the return-thunks compiler option to guard
|
|
+ against kernel-to-user data leaks by avoiding return speculation.
|
|
+ Requires a compiler with -mfunction-return=thunk-extern
|
|
+ support for full protection. The kernel may run slower.
|
|
+
|
|
+config CPU_UNRET_ENTRY
|
|
+ bool "Enable UNRET on kernel entry"
|
|
+ depends on CPU_SUP_AMD && RETHUNK
|
|
+ default y
|
|
+ help
|
|
+ Compile the kernel with support for the retbleed=unret mitigation.
|
|
+
|
|
+config CPU_IBPB_ENTRY
|
|
+ bool "Enable IBPB on kernel entry"
|
|
+ depends on CPU_SUP_AMD
|
|
+ default y
|
|
+ help
|
|
+ Compile the kernel with support for the retbleed=ibpb mitigation.
|
|
+
|
|
+config CPU_IBRS_ENTRY
|
|
+ bool "Enable IBRS on kernel entry"
|
|
+ depends on CPU_SUP_INTEL
|
|
+ default y
|
|
+ help
|
|
+ Compile the kernel with support for the spectre_v2=ibrs mitigation.
|
|
+ This mitigates both spectre_v2 and retbleed at great cost to
|
|
+ performance.
|
|
+
|
|
+config SLS
|
|
+ bool "Mitigate Straight-Line-Speculation"
|
|
+ depends on CC_HAS_SLS && X86_64
|
|
+ default n
|
|
+ help
|
|
+ Compile the kernel with straight-line-speculation options to guard
|
|
+ against straight line speculation. The kernel image might be slightly
|
|
+ larger.
|
|
+
|
|
+endif
|
|
+
|
|
config ARCH_HAS_ADD_PAGES
|
|
def_bool y
|
|
depends on ARCH_ENABLE_MEMORY_HOTPLUG
|
|
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
|
|
index 63d50f65b828..fb0de637411c 100644
|
|
--- a/arch/x86/Makefile
|
|
+++ b/arch/x86/Makefile
|
|
@@ -21,6 +21,12 @@ ifdef CONFIG_CC_IS_CLANG
|
|
RETPOLINE_CFLAGS := -mretpoline-external-thunk
|
|
RETPOLINE_VDSO_CFLAGS := -mretpoline
|
|
endif
|
|
+
|
|
+ifdef CONFIG_RETHUNK
|
|
+RETHUNK_CFLAGS := -mfunction-return=thunk-extern
|
|
+RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS)
|
|
+endif
|
|
+
|
|
export RETPOLINE_CFLAGS
|
|
export RETPOLINE_VDSO_CFLAGS
|
|
|
|
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
|
|
index 6dbd7e9f74c9..0352e4589efa 100644
|
|
--- a/arch/x86/boot/header.S
|
|
+++ b/arch/x86/boot/header.S
|
|
@@ -163,7 +163,11 @@ extra_header_fields:
|
|
.long 0x200 # SizeOfHeaders
|
|
.long 0 # CheckSum
|
|
.word IMAGE_SUBSYSTEM_EFI_APPLICATION # Subsystem (EFI application)
|
|
+#ifdef CONFIG_DXE_MEM_ATTRIBUTES
|
|
+ .word IMAGE_DLL_CHARACTERISTICS_NX_COMPAT # DllCharacteristics
|
|
+#else
|
|
.word 0 # DllCharacteristics
|
|
+#endif
|
|
#ifdef CONFIG_X86_32
|
|
.long 0 # SizeOfStackReserve
|
|
.long 0 # SizeOfStackCommit
|
|
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
|
|
index 7fec5dcf6438..eeadbd7d92cc 100644
|
|
--- a/arch/x86/entry/Makefile
|
|
+++ b/arch/x86/entry/Makefile
|
|
@@ -11,7 +11,7 @@ CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE)
|
|
|
|
CFLAGS_common.o += -fno-stack-protector
|
|
|
|
-obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
|
|
+obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
|
|
obj-y += common.o
|
|
|
|
obj-y += vdso/
|
|
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
|
|
index a4c061fb7c6e..b00a3a95fbfa 100644
|
|
--- a/arch/x86/entry/calling.h
|
|
+++ b/arch/x86/entry/calling.h
|
|
@@ -7,6 +7,8 @@
|
|
#include <asm/asm-offsets.h>
|
|
#include <asm/processor-flags.h>
|
|
#include <asm/ptrace-abi.h>
|
|
+#include <asm/msr.h>
|
|
+#include <asm/nospec-branch.h>
|
|
|
|
/*
|
|
|
|
@@ -119,27 +121,19 @@ For 32-bit we have the following conventions - kernel is built with
|
|
CLEAR_REGS
|
|
.endm
|
|
|
|
-.macro POP_REGS pop_rdi=1 skip_r11rcx=0
|
|
+.macro POP_REGS pop_rdi=1
|
|
popq %r15
|
|
popq %r14
|
|
popq %r13
|
|
popq %r12
|
|
popq %rbp
|
|
popq %rbx
|
|
- .if \skip_r11rcx
|
|
- popq %rsi
|
|
- .else
|
|
popq %r11
|
|
- .endif
|
|
popq %r10
|
|
popq %r9
|
|
popq %r8
|
|
popq %rax
|
|
- .if \skip_r11rcx
|
|
- popq %rsi
|
|
- .else
|
|
popq %rcx
|
|
- .endif
|
|
popq %rdx
|
|
popq %rsi
|
|
.if \pop_rdi
|
|
@@ -289,6 +283,66 @@ For 32-bit we have the following conventions - kernel is built with
|
|
|
|
#endif
|
|
|
|
+/*
|
|
+ * IBRS kernel mitigation for Spectre_v2.
|
|
+ *
|
|
+ * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers
|
|
+ * the regs it uses (AX, CX, DX). Must be called before the first RET
|
|
+ * instruction (NOTE! UNTRAIN_RET includes a RET instruction)
|
|
+ *
|
|
+ * The optional argument is used to save/restore the current value,
|
|
+ * which is used on the paranoid paths.
|
|
+ *
|
|
+ * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
|
|
+ */
|
|
+.macro IBRS_ENTER save_reg
|
|
+#ifdef CONFIG_CPU_IBRS_ENTRY
|
|
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
|
|
+ movl $MSR_IA32_SPEC_CTRL, %ecx
|
|
+
|
|
+.ifnb \save_reg
|
|
+ rdmsr
|
|
+ shl $32, %rdx
|
|
+ or %rdx, %rax
|
|
+ mov %rax, \save_reg
|
|
+ test $SPEC_CTRL_IBRS, %eax
|
|
+ jz .Ldo_wrmsr_\@
|
|
+ lfence
|
|
+ jmp .Lend_\@
|
|
+.Ldo_wrmsr_\@:
|
|
+.endif
|
|
+
|
|
+ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
|
|
+ movl %edx, %eax
|
|
+ shr $32, %rdx
|
|
+ wrmsr
|
|
+.Lend_\@:
|
|
+#endif
|
|
+.endm
|
|
+
|
|
+/*
|
|
+ * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX)
|
|
+ * regs. Must be called after the last RET.
|
|
+ */
|
|
+.macro IBRS_EXIT save_reg
|
|
+#ifdef CONFIG_CPU_IBRS_ENTRY
|
|
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
|
|
+ movl $MSR_IA32_SPEC_CTRL, %ecx
|
|
+
|
|
+.ifnb \save_reg
|
|
+ mov \save_reg, %rdx
|
|
+.else
|
|
+ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
|
|
+ andl $(~SPEC_CTRL_IBRS), %edx
|
|
+.endif
|
|
+
|
|
+ movl %edx, %eax
|
|
+ shr $32, %rdx
|
|
+ wrmsr
|
|
+.Lend_\@:
|
|
+#endif
|
|
+.endm
|
|
+
|
|
/*
|
|
* Mitigate Spectre v1 for conditional swapgs code paths.
|
|
*
|
|
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
|
|
new file mode 100644
|
|
index 000000000000..bfb7bcb362bc
|
|
--- /dev/null
|
|
+++ b/arch/x86/entry/entry.S
|
|
@@ -0,0 +1,22 @@
|
|
+/* SPDX-License-Identifier: GPL-2.0 */
|
|
+/*
|
|
+ * Common place for both 32- and 64-bit entry routines.
|
|
+ */
|
|
+
|
|
+#include <linux/linkage.h>
|
|
+#include <asm/export.h>
|
|
+#include <asm/msr-index.h>
|
|
+
|
|
+.pushsection .noinstr.text, "ax"
|
|
+
|
|
+SYM_FUNC_START(entry_ibpb)
|
|
+ movl $MSR_IA32_PRED_CMD, %ecx
|
|
+ movl $PRED_CMD_IBPB, %eax
|
|
+ xorl %edx, %edx
|
|
+ wrmsr
|
|
+ RET
|
|
+SYM_FUNC_END(entry_ibpb)
|
|
+/* For KVM */
|
|
+EXPORT_SYMBOL_GPL(entry_ibpb);
|
|
+
|
|
+.popsection
|
|
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
|
|
index 887420844066..e309e7156038 100644
|
|
--- a/arch/x86/entry/entry_32.S
|
|
+++ b/arch/x86/entry/entry_32.S
|
|
@@ -698,7 +698,6 @@ SYM_CODE_START(__switch_to_asm)
|
|
movl %ebx, PER_CPU_VAR(__stack_chk_guard)
|
|
#endif
|
|
|
|
-#ifdef CONFIG_RETPOLINE
|
|
/*
|
|
* When switching from a shallower to a deeper call stack
|
|
* the RSB may either underflow or use entries populated
|
|
@@ -707,7 +706,6 @@ SYM_CODE_START(__switch_to_asm)
|
|
* speculative execution to prevent attack.
|
|
*/
|
|
FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
|
|
-#endif
|
|
|
|
/* Restore flags or the incoming task to restore AC state. */
|
|
popfl
|
|
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
|
|
index d8376e5fe1af..2ea185d47cfd 100644
|
|
--- a/arch/x86/entry/entry_64.S
|
|
+++ b/arch/x86/entry/entry_64.S
|
|
@@ -85,7 +85,7 @@
|
|
*/
|
|
|
|
SYM_CODE_START(entry_SYSCALL_64)
|
|
- UNWIND_HINT_EMPTY
|
|
+ UNWIND_HINT_ENTRY
|
|
ENDBR
|
|
|
|
swapgs
|
|
@@ -112,6 +112,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
|
|
movq %rsp, %rdi
|
|
/* Sign extend the lower 32bit as syscall numbers are treated as int */
|
|
movslq %eax, %rsi
|
|
+
|
|
+ /* clobbers %rax, make sure it is after saving the syscall nr */
|
|
+ IBRS_ENTER
|
|
+ UNTRAIN_RET
|
|
+
|
|
call do_syscall_64 /* returns with IRQs disabled */
|
|
|
|
/*
|
|
@@ -191,8 +196,8 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
|
|
* perf profiles. Nothing jumps here.
|
|
*/
|
|
syscall_return_via_sysret:
|
|
- /* rcx and r11 are already restored (see code above) */
|
|
- POP_REGS pop_rdi=0 skip_r11rcx=1
|
|
+ IBRS_EXIT
|
|
+ POP_REGS pop_rdi=0
|
|
|
|
/*
|
|
* Now all regs are restored except RSP and RDI.
|
|
@@ -245,7 +250,6 @@ SYM_FUNC_START(__switch_to_asm)
|
|
movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
|
|
#endif
|
|
|
|
-#ifdef CONFIG_RETPOLINE
|
|
/*
|
|
* When switching from a shallower to a deeper call stack
|
|
* the RSB may either underflow or use entries populated
|
|
@@ -254,7 +258,6 @@ SYM_FUNC_START(__switch_to_asm)
|
|
* speculative execution to prevent attack.
|
|
*/
|
|
FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
|
|
-#endif
|
|
|
|
/* restore callee-saved registers */
|
|
popq %r15
|
|
@@ -318,6 +321,14 @@ SYM_CODE_END(ret_from_fork)
|
|
#endif
|
|
.endm
|
|
|
|
+SYM_CODE_START_LOCAL(xen_error_entry)
|
|
+ UNWIND_HINT_FUNC
|
|
+ PUSH_AND_CLEAR_REGS save_ret=1
|
|
+ ENCODE_FRAME_POINTER 8
|
|
+ UNTRAIN_RET
|
|
+ RET
|
|
+SYM_CODE_END(xen_error_entry)
|
|
+
|
|
/**
|
|
* idtentry_body - Macro to emit code calling the C function
|
|
* @cfunc: C function to be called
|
|
@@ -325,7 +336,18 @@ SYM_CODE_END(ret_from_fork)
|
|
*/
|
|
.macro idtentry_body cfunc has_error_code:req
|
|
|
|
- call error_entry
|
|
+ /*
|
|
+ * Call error_entry() and switch to the task stack if from userspace.
|
|
+ *
|
|
+ * When in XENPV, it is already in the task stack, and it can't fault
|
|
+ * for native_iret() nor native_load_gs_index() since XENPV uses its
|
|
+ * own pvops for IRET and load_gs_index(). And it doesn't need to
|
|
+ * switch the CR3. So it can skip invoking error_entry().
|
|
+ */
|
|
+ ALTERNATIVE "call error_entry; movq %rax, %rsp", \
|
|
+ "call xen_error_entry", X86_FEATURE_XENPV
|
|
+
|
|
+ ENCODE_FRAME_POINTER
|
|
UNWIND_HINT_REGS
|
|
|
|
movq %rsp, %rdi /* pt_regs pointer into 1st argument*/
|
|
@@ -582,6 +604,7 @@ __irqentry_text_end:
|
|
|
|
SYM_CODE_START_LOCAL(common_interrupt_return)
|
|
SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
|
|
+ IBRS_EXIT
|
|
#ifdef CONFIG_DEBUG_ENTRY
|
|
/* Assert that pt_regs indicates user mode. */
|
|
testb $3, CS(%rsp)
|
|
@@ -695,6 +718,7 @@ native_irq_return_ldt:
|
|
pushq %rdi /* Stash user RDI */
|
|
swapgs /* to kernel GS */
|
|
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
|
|
+ UNTRAIN_RET
|
|
|
|
movq PER_CPU_VAR(espfix_waddr), %rdi
|
|
movq %rax, (0*8)(%rdi) /* user RAX */
|
|
@@ -867,6 +891,9 @@ SYM_CODE_END(xen_failsafe_callback)
|
|
* 1 -> no SWAPGS on exit
|
|
*
|
|
* Y GSBASE value at entry, must be restored in paranoid_exit
|
|
+ *
|
|
+ * R14 - old CR3
|
|
+ * R15 - old SPEC_CTRL
|
|
*/
|
|
SYM_CODE_START_LOCAL(paranoid_entry)
|
|
UNWIND_HINT_FUNC
|
|
@@ -911,7 +938,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
|
|
* is needed here.
|
|
*/
|
|
SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
|
|
- RET
|
|
+ jmp .Lparanoid_gsbase_done
|
|
|
|
.Lparanoid_entry_checkgs:
|
|
/* EBX = 1 -> kernel GSBASE active, no restore required */
|
|
@@ -930,8 +957,16 @@ SYM_CODE_START_LOCAL(paranoid_entry)
|
|
xorl %ebx, %ebx
|
|
swapgs
|
|
.Lparanoid_kernel_gsbase:
|
|
-
|
|
FENCE_SWAPGS_KERNEL_ENTRY
|
|
+.Lparanoid_gsbase_done:
|
|
+
|
|
+ /*
|
|
+ * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like
|
|
+ * CR3 above, keep the old value in a callee saved register.
|
|
+ */
|
|
+ IBRS_ENTER save_reg=%r15
|
|
+ UNTRAIN_RET
|
|
+
|
|
RET
|
|
SYM_CODE_END(paranoid_entry)
|
|
|
|
@@ -953,9 +988,19 @@ SYM_CODE_END(paranoid_entry)
|
|
* 1 -> no SWAPGS on exit
|
|
*
|
|
* Y User space GSBASE, must be restored unconditionally
|
|
+ *
|
|
+ * R14 - old CR3
|
|
+ * R15 - old SPEC_CTRL
|
|
*/
|
|
SYM_CODE_START_LOCAL(paranoid_exit)
|
|
UNWIND_HINT_REGS
|
|
+
|
|
+ /*
|
|
+ * Must restore IBRS state before both CR3 and %GS since we need access
|
|
+ * to the per-CPU x86_spec_ctrl_shadow variable.
|
|
+ */
|
|
+ IBRS_EXIT save_reg=%r15
|
|
+
|
|
/*
|
|
* The order of operations is important. RESTORE_CR3 requires
|
|
* kernel GSBASE.
|
|
@@ -984,13 +1029,15 @@ SYM_CODE_START_LOCAL(paranoid_exit)
|
|
SYM_CODE_END(paranoid_exit)
|
|
|
|
/*
|
|
- * Save all registers in pt_regs, and switch GS if needed.
|
|
+ * Switch GS and CR3 if needed.
|
|
*/
|
|
SYM_CODE_START_LOCAL(error_entry)
|
|
UNWIND_HINT_FUNC
|
|
cld
|
|
+
|
|
PUSH_AND_CLEAR_REGS save_ret=1
|
|
ENCODE_FRAME_POINTER 8
|
|
+
|
|
testb $3, CS+8(%rsp)
|
|
jz .Lerror_kernelspace
|
|
|
|
@@ -1002,15 +1049,14 @@ SYM_CODE_START_LOCAL(error_entry)
|
|
FENCE_SWAPGS_USER_ENTRY
|
|
/* We have user CR3. Change to kernel CR3. */
|
|
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
|
|
+ IBRS_ENTER
|
|
+ UNTRAIN_RET
|
|
|
|
+ leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
|
|
.Lerror_entry_from_usermode_after_swapgs:
|
|
+
|
|
/* Put us onto the real thread stack. */
|
|
- popq %r12 /* save return addr in %12 */
|
|
- movq %rsp, %rdi /* arg0 = pt_regs pointer */
|
|
call sync_regs
|
|
- movq %rax, %rsp /* switch stack */
|
|
- ENCODE_FRAME_POINTER
|
|
- pushq %r12
|
|
RET
|
|
|
|
/*
|
|
@@ -1042,6 +1088,8 @@ SYM_CODE_START_LOCAL(error_entry)
|
|
*/
|
|
.Lerror_entry_done_lfence:
|
|
FENCE_SWAPGS_KERNEL_ENTRY
|
|
+ leaq 8(%rsp), %rax /* return pt_regs pointer */
|
|
+ ANNOTATE_UNRET_END
|
|
RET
|
|
|
|
.Lbstep_iret:
|
|
@@ -1057,14 +1105,16 @@ SYM_CODE_START_LOCAL(error_entry)
|
|
SWAPGS
|
|
FENCE_SWAPGS_USER_ENTRY
|
|
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
|
|
+ IBRS_ENTER
|
|
+ UNTRAIN_RET
|
|
|
|
/*
|
|
* Pretend that the exception came from user mode: set up pt_regs
|
|
* as if we faulted immediately after IRET.
|
|
*/
|
|
- mov %rsp, %rdi
|
|
+ leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
|
|
call fixup_bad_iret
|
|
- mov %rax, %rsp
|
|
+ mov %rax, %rdi
|
|
jmp .Lerror_entry_from_usermode_after_swapgs
|
|
SYM_CODE_END(error_entry)
|
|
|
|
@@ -1162,6 +1212,9 @@ SYM_CODE_START(asm_exc_nmi)
|
|
PUSH_AND_CLEAR_REGS rdx=(%rdx)
|
|
ENCODE_FRAME_POINTER
|
|
|
|
+ IBRS_ENTER
|
|
+ UNTRAIN_RET
|
|
+
|
|
/*
|
|
* At this point we no longer need to worry about stack damage
|
|
* due to nesting -- we're on the normal thread stack and we're
|
|
@@ -1386,6 +1439,9 @@ end_repeat_nmi:
|
|
movq $-1, %rsi
|
|
call exc_nmi
|
|
|
|
+ /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
|
|
+ IBRS_EXIT save_reg=%r15
|
|
+
|
|
/* Always restore stashed CR3 value (see paranoid_entry) */
|
|
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
|
|
|
|
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
|
|
index 4fdb007cddbd..4f479cdc7a40 100644
|
|
--- a/arch/x86/entry/entry_64_compat.S
|
|
+++ b/arch/x86/entry/entry_64_compat.S
|
|
@@ -4,7 +4,6 @@
|
|
*
|
|
* Copyright 2000-2002 Andi Kleen, SuSE Labs.
|
|
*/
|
|
-#include "calling.h"
|
|
#include <asm/asm-offsets.h>
|
|
#include <asm/current.h>
|
|
#include <asm/errno.h>
|
|
@@ -14,9 +13,12 @@
|
|
#include <asm/irqflags.h>
|
|
#include <asm/asm.h>
|
|
#include <asm/smap.h>
|
|
+#include <asm/nospec-branch.h>
|
|
#include <linux/linkage.h>
|
|
#include <linux/err.h>
|
|
|
|
+#include "calling.h"
|
|
+
|
|
.section .entry.text, "ax"
|
|
|
|
/*
|
|
@@ -47,7 +49,7 @@
|
|
* 0(%ebp) arg6
|
|
*/
|
|
SYM_CODE_START(entry_SYSENTER_compat)
|
|
- UNWIND_HINT_EMPTY
|
|
+ UNWIND_HINT_ENTRY
|
|
ENDBR
|
|
/* Interrupts are off on entry. */
|
|
SWAPGS
|
|
@@ -113,6 +115,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
|
|
|
|
cld
|
|
|
|
+ IBRS_ENTER
|
|
+ UNTRAIN_RET
|
|
+
|
|
/*
|
|
* SYSENTER doesn't filter flags, so we need to clear NT and AC
|
|
* ourselves. To save a few cycles, we can check whether
|
|
@@ -199,7 +204,7 @@ SYM_CODE_END(entry_SYSENTER_compat)
|
|
* 0(%esp) arg6
|
|
*/
|
|
SYM_CODE_START(entry_SYSCALL_compat)
|
|
- UNWIND_HINT_EMPTY
|
|
+ UNWIND_HINT_ENTRY
|
|
ENDBR
|
|
/* Interrupts are off on entry. */
|
|
swapgs
|
|
@@ -256,6 +261,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
|
|
|
|
UNWIND_HINT_REGS
|
|
|
|
+ IBRS_ENTER
|
|
+ UNTRAIN_RET
|
|
+
|
|
movq %rsp, %rdi
|
|
call do_fast_syscall_32
|
|
/* XEN PV guests always use IRET path */
|
|
@@ -270,6 +278,8 @@ sysret32_from_system_call:
|
|
*/
|
|
STACKLEAK_ERASE
|
|
|
|
+ IBRS_EXIT
|
|
+
|
|
movq RBX(%rsp), %rbx /* pt_regs->rbx */
|
|
movq RBP(%rsp), %rbp /* pt_regs->rbp */
|
|
movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
|
|
@@ -343,7 +353,7 @@ SYM_CODE_END(entry_SYSCALL_compat)
|
|
* ebp arg6
|
|
*/
|
|
SYM_CODE_START(entry_INT80_compat)
|
|
- UNWIND_HINT_EMPTY
|
|
+ UNWIND_HINT_ENTRY
|
|
ENDBR
|
|
/*
|
|
* Interrupts are off on entry.
|
|
@@ -414,6 +424,9 @@ SYM_CODE_START(entry_INT80_compat)
|
|
|
|
cld
|
|
|
|
+ IBRS_ENTER
|
|
+ UNTRAIN_RET
|
|
+
|
|
movq %rsp, %rdi
|
|
call do_int80_syscall_32
|
|
jmp swapgs_restore_regs_and_return_to_usermode
|
|
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
|
|
index 693f8b9031fb..e893af5aa8f5 100644
|
|
--- a/arch/x86/entry/vdso/Makefile
|
|
+++ b/arch/x86/entry/vdso/Makefile
|
|
@@ -92,6 +92,7 @@ endif
|
|
endif
|
|
|
|
$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
|
|
+$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO
|
|
|
|
#
|
|
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
|
|
diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
|
|
index 15e35159ebb6..ef2dd1827243 100644
|
|
--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S
|
|
+++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
|
|
@@ -19,17 +19,20 @@ __vsyscall_page:
|
|
|
|
mov $__NR_gettimeofday, %rax
|
|
syscall
|
|
- RET
|
|
+ ret
|
|
+ int3
|
|
|
|
.balign 1024, 0xcc
|
|
mov $__NR_time, %rax
|
|
syscall
|
|
- RET
|
|
+ ret
|
|
+ int3
|
|
|
|
.balign 1024, 0xcc
|
|
mov $__NR_getcpu, %rax
|
|
syscall
|
|
- RET
|
|
+ ret
|
|
+ int3
|
|
|
|
.balign 4096, 0xcc
|
|
|
|
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
|
|
index 9b10c8c76087..9542c582d546 100644
|
|
--- a/arch/x86/include/asm/alternative.h
|
|
+++ b/arch/x86/include/asm/alternative.h
|
|
@@ -76,6 +76,7 @@ extern int alternatives_patched;
|
|
extern void alternative_instructions(void);
|
|
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
|
|
extern void apply_retpolines(s32 *start, s32 *end);
|
|
+extern void apply_returns(s32 *start, s32 *end);
|
|
extern void apply_ibt_endbr(s32 *start, s32 *end);
|
|
|
|
struct module;
|
|
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
|
|
index e17de69faa54..5d09ded0c491 100644
|
|
--- a/arch/x86/include/asm/cpufeatures.h
|
|
+++ b/arch/x86/include/asm/cpufeatures.h
|
|
@@ -203,8 +203,8 @@
|
|
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
|
|
/* FREE! ( 7*32+10) */
|
|
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
|
|
-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
|
|
-#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
|
|
+#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
|
|
+#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
|
|
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
|
|
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
|
|
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
|
|
@@ -295,6 +295,12 @@
|
|
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
|
|
#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
|
|
#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
|
|
+#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
|
|
+#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
|
|
+#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
|
|
+#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
|
|
+#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
|
|
+#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
|
|
|
|
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
|
|
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
|
|
@@ -315,6 +321,7 @@
|
|
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
|
|
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
|
|
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
|
|
+#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
|
|
|
|
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
|
|
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
|
|
@@ -444,5 +451,6 @@
|
|
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
|
|
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
|
|
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
|
|
+#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
|
|
|
|
#endif /* _ASM_X86_CPUFEATURES_H */
|
|
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
|
|
index 1231d63f836d..f7be189e9723 100644
|
|
--- a/arch/x86/include/asm/disabled-features.h
|
|
+++ b/arch/x86/include/asm/disabled-features.h
|
|
@@ -56,6 +56,25 @@
|
|
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
|
|
#endif
|
|
|
|
+#ifdef CONFIG_RETPOLINE
|
|
+# define DISABLE_RETPOLINE 0
|
|
+#else
|
|
+# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
|
|
+ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_RETHUNK
|
|
+# define DISABLE_RETHUNK 0
|
|
+#else
|
|
+# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_CPU_UNRET_ENTRY
|
|
+# define DISABLE_UNRET 0
|
|
+#else
|
|
+# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
|
|
+#endif
|
|
+
|
|
#ifdef CONFIG_INTEL_IOMMU_SVM
|
|
# define DISABLE_ENQCMD 0
|
|
#else
|
|
@@ -82,7 +101,7 @@
|
|
#define DISABLED_MASK8 0
|
|
#define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX)
|
|
#define DISABLED_MASK10 0
|
|
-#define DISABLED_MASK11 0
|
|
+#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
|
|
#define DISABLED_MASK12 0
|
|
#define DISABLED_MASK13 0
|
|
#define DISABLED_MASK14 0
|
|
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
|
|
index 98938a68251c..bed74a0f2932 100644
|
|
--- a/arch/x86/include/asm/efi.h
|
|
+++ b/arch/x86/include/asm/efi.h
|
|
@@ -357,6 +357,11 @@ static inline u32 efi64_convert_status(efi_status_t status)
|
|
runtime), \
|
|
func, __VA_ARGS__))
|
|
|
|
+#define efi_dxe_call(func, ...) \
|
|
+ (efi_is_native() \
|
|
+ ? efi_dxe_table->func(__VA_ARGS__) \
|
|
+ : __efi64_thunk_map(efi_dxe_table, func, __VA_ARGS__))
|
|
+
|
|
#else /* CONFIG_EFI_MIXED */
|
|
|
|
static inline bool efi_is_64bit(void)
|
|
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
|
|
index 85865f1645bd..73ca20049835 100644
|
|
--- a/arch/x86/include/asm/linkage.h
|
|
+++ b/arch/x86/include/asm/linkage.h
|
|
@@ -19,19 +19,27 @@
|
|
#define __ALIGN_STR __stringify(__ALIGN)
|
|
#endif
|
|
|
|
+#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
|
|
+#define RET jmp __x86_return_thunk
|
|
+#else /* CONFIG_RETPOLINE */
|
|
#ifdef CONFIG_SLS
|
|
#define RET ret; int3
|
|
#else
|
|
#define RET ret
|
|
#endif
|
|
+#endif /* CONFIG_RETPOLINE */
|
|
|
|
#else /* __ASSEMBLY__ */
|
|
|
|
+#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
|
|
+#define ASM_RET "jmp __x86_return_thunk\n\t"
|
|
+#else /* CONFIG_RETPOLINE */
|
|
#ifdef CONFIG_SLS
|
|
#define ASM_RET "ret; int3\n\t"
|
|
#else
|
|
#define ASM_RET "ret\n\t"
|
|
#endif
|
|
+#endif /* CONFIG_RETPOLINE */
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
|
|
index 4425d6773183..ad084326f24c 100644
|
|
--- a/arch/x86/include/asm/msr-index.h
|
|
+++ b/arch/x86/include/asm/msr-index.h
|
|
@@ -51,6 +51,8 @@
|
|
#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
|
|
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
|
|
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
|
|
+#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
|
|
+#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
|
|
|
|
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
|
|
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
|
|
@@ -91,6 +93,7 @@
|
|
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
|
|
#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
|
|
#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
|
|
+#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
|
|
#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
|
|
#define ARCH_CAP_SSB_NO BIT(4) /*
|
|
* Not susceptible to Speculative Store Bypass
|
|
@@ -138,6 +141,13 @@
|
|
* bit available to control VERW
|
|
* behavior.
|
|
*/
|
|
+#define ARCH_CAP_RRSBA BIT(19) /*
|
|
+ * Indicates RET may use predictors
|
|
+ * other than the RSB. With eIBRS
|
|
+ * enabled predictions in kernel mode
|
|
+ * are restricted to targets in
|
|
+ * kernel.
|
|
+ */
|
|
|
|
#define MSR_IA32_FLUSH_CMD 0x0000010b
|
|
#define L1D_FLUSH BIT(0) /*
|
|
@@ -552,6 +562,9 @@
|
|
/* Fam 17h MSRs */
|
|
#define MSR_F17H_IRPERF 0xc00000e9
|
|
|
|
+#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
|
|
+#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
|
|
+
|
|
/* Fam 16h MSRs */
|
|
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
|
|
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
|
|
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
|
|
index da251a5645b0..10a3bfc1eb23 100644
|
|
--- a/arch/x86/include/asm/nospec-branch.h
|
|
+++ b/arch/x86/include/asm/nospec-branch.h
|
|
@@ -11,6 +11,7 @@
|
|
#include <asm/cpufeatures.h>
|
|
#include <asm/msr-index.h>
|
|
#include <asm/unwind_hints.h>
|
|
+#include <asm/percpu.h>
|
|
|
|
#define RETPOLINE_THUNK_SIZE 32
|
|
|
|
@@ -75,6 +76,23 @@
|
|
.popsection
|
|
.endm
|
|
|
|
+/*
|
|
+ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
|
|
+ * vs RETBleed validation.
|
|
+ */
|
|
+#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
|
|
+
|
|
+/*
|
|
+ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
|
|
+ * eventually turn into it's own annotation.
|
|
+ */
|
|
+.macro ANNOTATE_UNRET_END
|
|
+#ifdef CONFIG_DEBUG_ENTRY
|
|
+ ANNOTATE_RETPOLINE_SAFE
|
|
+ nop
|
|
+#endif
|
|
+.endm
|
|
+
|
|
/*
|
|
* JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
|
|
* indirect jmp/call which may be susceptible to the Spectre variant 2
|
|
@@ -105,10 +123,34 @@
|
|
* monstrosity above, manually.
|
|
*/
|
|
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
|
|
-#ifdef CONFIG_RETPOLINE
|
|
ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
|
|
__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
|
|
.Lskip_rsb_\@:
|
|
+.endm
|
|
+
|
|
+#ifdef CONFIG_CPU_UNRET_ENTRY
|
|
+#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret"
|
|
+#else
|
|
+#define CALL_ZEN_UNTRAIN_RET ""
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
|
|
+ * return thunk isn't mapped into the userspace tables (then again, AMD
|
|
+ * typically has NO_MELTDOWN).
|
|
+ *
|
|
+ * While zen_untrain_ret() doesn't clobber anything but requires stack,
|
|
+ * entry_ibpb() will clobber AX, CX, DX.
|
|
+ *
|
|
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
|
|
+ * where we have a stack but before any RET instruction.
|
|
+ */
|
|
+.macro UNTRAIN_RET
|
|
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY)
|
|
+ ANNOTATE_UNRET_END
|
|
+ ALTERNATIVE_2 "", \
|
|
+ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
|
|
+ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
|
|
#endif
|
|
.endm
|
|
|
|
@@ -120,17 +162,20 @@
|
|
_ASM_PTR " 999b\n\t" \
|
|
".popsection\n\t"
|
|
|
|
-#ifdef CONFIG_RETPOLINE
|
|
-
|
|
typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
|
|
+extern retpoline_thunk_t __x86_indirect_thunk_array[];
|
|
+
|
|
+extern void __x86_return_thunk(void);
|
|
+extern void zen_untrain_ret(void);
|
|
+extern void entry_ibpb(void);
|
|
+
|
|
+#ifdef CONFIG_RETPOLINE
|
|
|
|
#define GEN(reg) \
|
|
extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
|
|
#include <asm/GEN-for-each-reg.h>
|
|
#undef GEN
|
|
|
|
-extern retpoline_thunk_t __x86_indirect_thunk_array[];
|
|
-
|
|
#ifdef CONFIG_X86_64
|
|
|
|
/*
|
|
@@ -193,6 +238,7 @@ enum spectre_v2_mitigation {
|
|
SPECTRE_V2_EIBRS,
|
|
SPECTRE_V2_EIBRS_RETPOLINE,
|
|
SPECTRE_V2_EIBRS_LFENCE,
|
|
+ SPECTRE_V2_IBRS,
|
|
};
|
|
|
|
/* The indirect branch speculation control variants */
|
|
@@ -235,6 +281,9 @@ static inline void indirect_branch_prediction_barrier(void)
|
|
|
|
/* The Intel SPEC CTRL MSR base value cache */
|
|
extern u64 x86_spec_ctrl_base;
|
|
+DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
|
|
+extern void write_spec_ctrl_current(u64 val, bool force);
|
|
+extern u64 spec_ctrl_current(void);
|
|
|
|
/*
|
|
* With retpoline, we must use IBRS to restrict branch prediction
|
|
@@ -244,18 +293,16 @@ extern u64 x86_spec_ctrl_base;
|
|
*/
|
|
#define firmware_restrict_branch_speculation_start() \
|
|
do { \
|
|
- u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
|
|
- \
|
|
preempt_disable(); \
|
|
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
|
|
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
|
|
+ spec_ctrl_current() | SPEC_CTRL_IBRS, \
|
|
X86_FEATURE_USE_IBRS_FW); \
|
|
} while (0)
|
|
|
|
#define firmware_restrict_branch_speculation_end() \
|
|
do { \
|
|
- u64 val = x86_spec_ctrl_base; \
|
|
- \
|
|
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
|
|
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
|
|
+ spec_ctrl_current(), \
|
|
X86_FEATURE_USE_IBRS_FW); \
|
|
preempt_enable(); \
|
|
} while (0)
|
|
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
|
|
index 2d8dacd02643..343b722ccaf2 100644
|
|
--- a/arch/x86/include/asm/static_call.h
|
|
+++ b/arch/x86/include/asm/static_call.h
|
|
@@ -21,6 +21,16 @@
|
|
* relative displacement across sections.
|
|
*/
|
|
|
|
+/*
|
|
+ * The trampoline is 8 bytes and of the general form:
|
|
+ *
|
|
+ * jmp.d32 \func
|
|
+ * ud1 %esp, %ecx
|
|
+ *
|
|
+ * That trailing #UD provides both a speculation stop and serves as a unique
|
|
+ * 3 byte signature identifying static call trampolines. Also see tramp_ud[]
|
|
+ * and __static_call_fixup().
|
|
+ */
|
|
#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \
|
|
asm(".pushsection .static_call.text, \"ax\" \n" \
|
|
".align 4 \n" \
|
|
@@ -28,7 +38,7 @@
|
|
STATIC_CALL_TRAMP_STR(name) ": \n" \
|
|
ANNOTATE_NOENDBR \
|
|
insns " \n" \
|
|
- ".byte 0x53, 0x43, 0x54 \n" \
|
|
+ ".byte 0x0f, 0xb9, 0xcc \n" \
|
|
".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
|
|
".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
|
|
".popsection \n")
|
|
@@ -36,8 +46,13 @@
|
|
#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
|
|
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
|
|
|
|
+#ifdef CONFIG_RETHUNK
|
|
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
|
|
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
|
|
+#else
|
|
#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
|
|
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
|
|
+#endif
|
|
|
|
#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \
|
|
ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0)
|
|
@@ -48,4 +63,6 @@
|
|
".long " STATIC_CALL_KEY_STR(name) " - . \n" \
|
|
".popsection \n")
|
|
|
|
+extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
|
|
+
|
|
#endif /* _ASM_STATIC_CALL_H */
|
|
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
|
|
index 35317c5c551d..47ecfff2c83d 100644
|
|
--- a/arch/x86/include/asm/traps.h
|
|
+++ b/arch/x86/include/asm/traps.h
|
|
@@ -13,7 +13,7 @@
|
|
#ifdef CONFIG_X86_64
|
|
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
|
|
asmlinkage __visible notrace
|
|
-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
|
|
+struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs);
|
|
void __init trap_init(void);
|
|
asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
|
|
#endif
|
|
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
|
|
index 8b33674288ea..f66fbe6537dd 100644
|
|
--- a/arch/x86/include/asm/unwind_hints.h
|
|
+++ b/arch/x86/include/asm/unwind_hints.h
|
|
@@ -8,7 +8,11 @@
|
|
#ifdef __ASSEMBLY__
|
|
|
|
.macro UNWIND_HINT_EMPTY
|
|
- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1
|
|
+ UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1
|
|
+.endm
|
|
+
|
|
+.macro UNWIND_HINT_ENTRY
|
|
+ UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1
|
|
.endm
|
|
|
|
.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0
|
|
@@ -52,6 +56,14 @@
|
|
UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC
|
|
.endm
|
|
|
|
+.macro UNWIND_HINT_SAVE
|
|
+ UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
|
|
+.endm
|
|
+
|
|
+.macro UNWIND_HINT_RESTORE
|
|
+ UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
|
|
+.endm
|
|
+
|
|
#else
|
|
|
|
#define UNWIND_HINT_FUNC \
|
|
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
|
|
index d374cb3cf024..46427b785bc8 100644
|
|
--- a/arch/x86/kernel/alternative.c
|
|
+++ b/arch/x86/kernel/alternative.c
|
|
@@ -115,6 +115,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
|
|
}
|
|
|
|
extern s32 __retpoline_sites[], __retpoline_sites_end[];
|
|
+extern s32 __return_sites[], __return_sites_end[];
|
|
extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
|
|
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
|
|
extern s32 __smp_locks[], __smp_locks_end[];
|
|
@@ -507,9 +508,76 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
|
|
}
|
|
}
|
|
|
|
+#ifdef CONFIG_RETHUNK
|
|
+/*
|
|
+ * Rewrite the compiler generated return thunk tail-calls.
|
|
+ *
|
|
+ * For example, convert:
|
|
+ *
|
|
+ * JMP __x86_return_thunk
|
|
+ *
|
|
+ * into:
|
|
+ *
|
|
+ * RET
|
|
+ */
|
|
+static int patch_return(void *addr, struct insn *insn, u8 *bytes)
|
|
+{
|
|
+ int i = 0;
|
|
+
|
|
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
|
|
+ return -1;
|
|
+
|
|
+ bytes[i++] = RET_INSN_OPCODE;
|
|
+
|
|
+ for (; i < insn->length;)
|
|
+ bytes[i++] = INT3_INSN_OPCODE;
|
|
+
|
|
+ return i;
|
|
+}
|
|
+
|
|
+void __init_or_module noinline apply_returns(s32 *start, s32 *end)
|
|
+{
|
|
+ s32 *s;
|
|
+
|
|
+ for (s = start; s < end; s++) {
|
|
+ void *dest = NULL, *addr = (void *)s + *s;
|
|
+ struct insn insn;
|
|
+ int len, ret;
|
|
+ u8 bytes[16];
|
|
+ u8 op;
|
|
+
|
|
+ ret = insn_decode_kernel(&insn, addr);
|
|
+ if (WARN_ON_ONCE(ret < 0))
|
|
+ continue;
|
|
+
|
|
+ op = insn.opcode.bytes[0];
|
|
+ if (op == JMP32_INSN_OPCODE)
|
|
+ dest = addr + insn.length + insn.immediate.value;
|
|
+
|
|
+ if (__static_call_fixup(addr, op, dest) ||
|
|
+ WARN_ON_ONCE(dest != &__x86_return_thunk))
|
|
+ continue;
|
|
+
|
|
+ DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
|
|
+ addr, addr, insn.length,
|
|
+ addr + insn.length + insn.immediate.value);
|
|
+
|
|
+ len = patch_return(addr, &insn, bytes);
|
|
+ if (len == insn.length) {
|
|
+ DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
|
|
+ DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
|
|
+ text_poke_early(addr, bytes, len);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+#else
|
|
+void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
|
|
+#endif /* CONFIG_RETHUNK */
|
|
+
|
|
#else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
|
|
|
|
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
|
|
+void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
|
|
|
|
#endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */
|
|
|
|
@@ -860,6 +928,7 @@ void __init alternative_instructions(void)
|
|
* those can rewrite the retpoline thunks.
|
|
*/
|
|
apply_retpolines(__retpoline_sites, __retpoline_sites_end);
|
|
+ apply_returns(__return_sites, __return_sites_end);
|
|
|
|
/*
|
|
* Then patch alternatives, such that those paravirt calls that are in
|
|
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
|
|
index 9fb0a2f8b62a..6434ea941348 100644
|
|
--- a/arch/x86/kernel/asm-offsets.c
|
|
+++ b/arch/x86/kernel/asm-offsets.c
|
|
@@ -18,6 +18,7 @@
|
|
#include <asm/bootparam.h>
|
|
#include <asm/suspend.h>
|
|
#include <asm/tlbflush.h>
|
|
+#include "../kvm/vmx/vmx.h"
|
|
|
|
#ifdef CONFIG_XEN
|
|
#include <xen/interface/xen.h>
|
|
@@ -90,4 +91,9 @@ static void __used common(void)
|
|
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
|
|
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
|
|
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
|
|
+
|
|
+ if (IS_ENABLED(CONFIG_KVM_INTEL)) {
|
|
+ BLANK();
|
|
+ OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl);
|
|
+ }
|
|
}
|
|
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
|
|
index 0c0b09796ced..35d5288394cb 100644
|
|
--- a/arch/x86/kernel/cpu/amd.c
|
|
+++ b/arch/x86/kernel/cpu/amd.c
|
|
@@ -862,6 +862,28 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
|
|
clear_rdrand_cpuid_bit(c);
|
|
}
|
|
|
|
+void init_spectral_chicken(struct cpuinfo_x86 *c)
|
|
+{
|
|
+#ifdef CONFIG_CPU_UNRET_ENTRY
|
|
+ u64 value;
|
|
+
|
|
+ /*
|
|
+ * On Zen2 we offer this chicken (bit) on the altar of Speculation.
|
|
+ *
|
|
+ * This suppresses speculation from the middle of a basic block, i.e. it
|
|
+ * suppresses non-branch predictions.
|
|
+ *
|
|
+ * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H
|
|
+ */
|
|
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) {
|
|
+ if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) {
|
|
+ value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT;
|
|
+ wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value);
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+}
|
|
+
|
|
static void init_amd_zn(struct cpuinfo_x86 *c)
|
|
{
|
|
set_cpu_cap(c, X86_FEATURE_ZEN);
|
|
@@ -870,12 +892,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
|
|
node_reclaim_distance = 32;
|
|
#endif
|
|
|
|
- /*
|
|
- * Fix erratum 1076: CPB feature bit not being set in CPUID.
|
|
- * Always set it, except when running under a hypervisor.
|
|
- */
|
|
- if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB))
|
|
- set_cpu_cap(c, X86_FEATURE_CPB);
|
|
+ /* Fix up CPUID bits, but only if not virtualised. */
|
|
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
|
|
+
|
|
+ /* Erratum 1076: CPB feature bit not being set in CPUID. */
|
|
+ if (!cpu_has(c, X86_FEATURE_CPB))
|
|
+ set_cpu_cap(c, X86_FEATURE_CPB);
|
|
+
|
|
+ /*
|
|
+ * Zen3 (Fam19 model < 0x10) parts are not susceptible to
|
|
+ * Branch Type Confusion, but predate the allocation of the
|
|
+ * BTC_NO bit.
|
|
+ */
|
|
+ if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
|
|
+ set_cpu_cap(c, X86_FEATURE_BTC_NO);
|
|
+ }
|
|
}
|
|
|
|
static void init_amd(struct cpuinfo_x86 *c)
|
|
@@ -907,7 +938,8 @@ static void init_amd(struct cpuinfo_x86 *c)
|
|
case 0x12: init_amd_ln(c); break;
|
|
case 0x15: init_amd_bd(c); break;
|
|
case 0x16: init_amd_jg(c); break;
|
|
- case 0x17: fallthrough;
|
|
+ case 0x17: init_spectral_chicken(c);
|
|
+ fallthrough;
|
|
case 0x19: init_amd_zn(c); break;
|
|
}
|
|
|
|
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
|
|
index a8a9f6406331..0b64e894b383 100644
|
|
--- a/arch/x86/kernel/cpu/bugs.c
|
|
+++ b/arch/x86/kernel/cpu/bugs.c
|
|
@@ -38,6 +38,8 @@
|
|
|
|
static void __init spectre_v1_select_mitigation(void);
|
|
static void __init spectre_v2_select_mitigation(void);
|
|
+static void __init retbleed_select_mitigation(void);
|
|
+static void __init spectre_v2_user_select_mitigation(void);
|
|
static void __init ssb_select_mitigation(void);
|
|
static void __init l1tf_select_mitigation(void);
|
|
static void __init mds_select_mitigation(void);
|
|
@@ -48,16 +50,40 @@ static void __init mmio_select_mitigation(void);
|
|
static void __init srbds_select_mitigation(void);
|
|
static void __init l1d_flush_select_mitigation(void);
|
|
|
|
-/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
|
|
+/* The base value of the SPEC_CTRL MSR without task-specific bits set */
|
|
u64 x86_spec_ctrl_base;
|
|
EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
|
|
+
|
|
+/* The current value of the SPEC_CTRL MSR with task-specific bits set */
|
|
+DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
|
|
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
|
|
+
|
|
static DEFINE_MUTEX(spec_ctrl_mutex);
|
|
|
|
/*
|
|
- * The vendor and possibly platform specific bits which can be modified in
|
|
- * x86_spec_ctrl_base.
|
|
+ * Keep track of the SPEC_CTRL MSR value for the current task, which may differ
|
|
+ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
|
|
*/
|
|
-static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
|
|
+void write_spec_ctrl_current(u64 val, bool force)
|
|
+{
|
|
+ if (this_cpu_read(x86_spec_ctrl_current) == val)
|
|
+ return;
|
|
+
|
|
+ this_cpu_write(x86_spec_ctrl_current, val);
|
|
+
|
|
+ /*
|
|
+ * When KERNEL_IBRS this MSR is written on return-to-user, unless
|
|
+ * forced the update can be delayed until that time.
|
|
+ */
|
|
+ if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
|
|
+ wrmsrl(MSR_IA32_SPEC_CTRL, val);
|
|
+}
|
|
+
|
|
+u64 spec_ctrl_current(void)
|
|
+{
|
|
+ return this_cpu_read(x86_spec_ctrl_current);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(spec_ctrl_current);
|
|
|
|
/*
|
|
* AMD specific MSR info for Speculative Store Bypass control.
|
|
@@ -114,13 +140,21 @@ void __init check_bugs(void)
|
|
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
|
|
rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
|
|
|
|
- /* Allow STIBP in MSR_SPEC_CTRL if supported */
|
|
- if (boot_cpu_has(X86_FEATURE_STIBP))
|
|
- x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
|
|
-
|
|
/* Select the proper CPU mitigations before patching alternatives: */
|
|
spectre_v1_select_mitigation();
|
|
spectre_v2_select_mitigation();
|
|
+ /*
|
|
+ * retbleed_select_mitigation() relies on the state set by
|
|
+ * spectre_v2_select_mitigation(); specifically it wants to know about
|
|
+ * spectre_v2=ibrs.
|
|
+ */
|
|
+ retbleed_select_mitigation();
|
|
+ /*
|
|
+ * spectre_v2_user_select_mitigation() relies on the state set by
|
|
+ * retbleed_select_mitigation(); specifically the STIBP selection is
|
|
+ * forced for UNRET.
|
|
+ */
|
|
+ spectre_v2_user_select_mitigation();
|
|
ssb_select_mitigation();
|
|
l1tf_select_mitigation();
|
|
md_clear_select_mitigation();
|
|
@@ -161,31 +195,17 @@ void __init check_bugs(void)
|
|
#endif
|
|
}
|
|
|
|
+/*
|
|
+ * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is
|
|
+ * done in vmenter.S.
|
|
+ */
|
|
void
|
|
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
|
|
{
|
|
- u64 msrval, guestval, hostval = x86_spec_ctrl_base;
|
|
+ u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
|
|
struct thread_info *ti = current_thread_info();
|
|
|
|
- /* Is MSR_SPEC_CTRL implemented ? */
|
|
if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
|
|
- /*
|
|
- * Restrict guest_spec_ctrl to supported values. Clear the
|
|
- * modifiable bits in the host base value and or the
|
|
- * modifiable bits from the guest value.
|
|
- */
|
|
- guestval = hostval & ~x86_spec_ctrl_mask;
|
|
- guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
|
|
-
|
|
- /* SSBD controlled in MSR_SPEC_CTRL */
|
|
- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
|
|
- static_cpu_has(X86_FEATURE_AMD_SSBD))
|
|
- hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
|
|
-
|
|
- /* Conditional STIBP enabled? */
|
|
- if (static_branch_unlikely(&switch_to_cond_stibp))
|
|
- hostval |= stibp_tif_to_spec_ctrl(ti->flags);
|
|
-
|
|
if (hostval != guestval) {
|
|
msrval = setguest ? guestval : hostval;
|
|
wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
|
|
@@ -745,12 +765,180 @@ static int __init nospectre_v1_cmdline(char *str)
|
|
}
|
|
early_param("nospectre_v1", nospectre_v1_cmdline);
|
|
|
|
-#undef pr_fmt
|
|
-#define pr_fmt(fmt) "Spectre V2 : " fmt
|
|
-
|
|
static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
|
|
SPECTRE_V2_NONE;
|
|
|
|
+#undef pr_fmt
|
|
+#define pr_fmt(fmt) "RETBleed: " fmt
|
|
+
|
|
+enum retbleed_mitigation {
|
|
+ RETBLEED_MITIGATION_NONE,
|
|
+ RETBLEED_MITIGATION_UNRET,
|
|
+ RETBLEED_MITIGATION_IBPB,
|
|
+ RETBLEED_MITIGATION_IBRS,
|
|
+ RETBLEED_MITIGATION_EIBRS,
|
|
+};
|
|
+
|
|
+enum retbleed_mitigation_cmd {
|
|
+ RETBLEED_CMD_OFF,
|
|
+ RETBLEED_CMD_AUTO,
|
|
+ RETBLEED_CMD_UNRET,
|
|
+ RETBLEED_CMD_IBPB,
|
|
+};
|
|
+
|
|
+const char * const retbleed_strings[] = {
|
|
+ [RETBLEED_MITIGATION_NONE] = "Vulnerable",
|
|
+ [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk",
|
|
+ [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB",
|
|
+ [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS",
|
|
+ [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS",
|
|
+};
|
|
+
|
|
+static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
|
|
+ RETBLEED_MITIGATION_NONE;
|
|
+static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init =
|
|
+ RETBLEED_CMD_AUTO;
|
|
+
|
|
+static int __ro_after_init retbleed_nosmt = false;
|
|
+
|
|
+static int __init retbleed_parse_cmdline(char *str)
|
|
+{
|
|
+ if (!str)
|
|
+ return -EINVAL;
|
|
+
|
|
+ while (str) {
|
|
+ char *next = strchr(str, ',');
|
|
+ if (next) {
|
|
+ *next = 0;
|
|
+ next++;
|
|
+ }
|
|
+
|
|
+ if (!strcmp(str, "off")) {
|
|
+ retbleed_cmd = RETBLEED_CMD_OFF;
|
|
+ } else if (!strcmp(str, "auto")) {
|
|
+ retbleed_cmd = RETBLEED_CMD_AUTO;
|
|
+ } else if (!strcmp(str, "unret")) {
|
|
+ retbleed_cmd = RETBLEED_CMD_UNRET;
|
|
+ } else if (!strcmp(str, "ibpb")) {
|
|
+ retbleed_cmd = RETBLEED_CMD_IBPB;
|
|
+ } else if (!strcmp(str, "nosmt")) {
|
|
+ retbleed_nosmt = true;
|
|
+ } else {
|
|
+ pr_err("Ignoring unknown retbleed option (%s).", str);
|
|
+ }
|
|
+
|
|
+ str = next;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+early_param("retbleed", retbleed_parse_cmdline);
|
|
+
|
|
+#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
|
|
+#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
|
|
+
|
|
+static void __init retbleed_select_mitigation(void)
|
|
+{
|
|
+ bool mitigate_smt = false;
|
|
+
|
|
+ if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off())
|
|
+ return;
|
|
+
|
|
+ switch (retbleed_cmd) {
|
|
+ case RETBLEED_CMD_OFF:
|
|
+ return;
|
|
+
|
|
+ case RETBLEED_CMD_UNRET:
|
|
+ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) {
|
|
+ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
|
|
+ } else {
|
|
+ pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n");
|
|
+ goto do_cmd_auto;
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case RETBLEED_CMD_IBPB:
|
|
+ if (!boot_cpu_has(X86_FEATURE_IBPB)) {
|
|
+ pr_err("WARNING: CPU does not support IBPB.\n");
|
|
+ goto do_cmd_auto;
|
|
+ } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
|
|
+ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
|
|
+ } else {
|
|
+ pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
|
|
+ goto do_cmd_auto;
|
|
+ }
|
|
+ break;
|
|
+
|
|
+do_cmd_auto:
|
|
+ case RETBLEED_CMD_AUTO:
|
|
+ default:
|
|
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
|
|
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
|
|
+ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY))
|
|
+ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
|
|
+ else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB))
|
|
+ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * The Intel mitigation (IBRS or eIBRS) was already selected in
|
|
+ * spectre_v2_select_mitigation(). 'retbleed_mitigation' will
|
|
+ * be set accordingly below.
|
|
+ */
|
|
+
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ switch (retbleed_mitigation) {
|
|
+ case RETBLEED_MITIGATION_UNRET:
|
|
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
|
|
+ setup_force_cpu_cap(X86_FEATURE_UNRET);
|
|
+
|
|
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
|
|
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
|
|
+ pr_err(RETBLEED_UNTRAIN_MSG);
|
|
+
|
|
+ mitigate_smt = true;
|
|
+ break;
|
|
+
|
|
+ case RETBLEED_MITIGATION_IBPB:
|
|
+ setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
|
|
+ mitigate_smt = true;
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) &&
|
|
+ (retbleed_nosmt || cpu_mitigations_auto_nosmt()))
|
|
+ cpu_smt_disable(false);
|
|
+
|
|
+ /*
|
|
+ * Let IBRS trump all on Intel without affecting the effects of the
|
|
+ * retbleed= cmdline option.
|
|
+ */
|
|
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
|
|
+ switch (spectre_v2_enabled) {
|
|
+ case SPECTRE_V2_IBRS:
|
|
+ retbleed_mitigation = RETBLEED_MITIGATION_IBRS;
|
|
+ break;
|
|
+ case SPECTRE_V2_EIBRS:
|
|
+ case SPECTRE_V2_EIBRS_RETPOLINE:
|
|
+ case SPECTRE_V2_EIBRS_LFENCE:
|
|
+ retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
|
|
+ break;
|
|
+ default:
|
|
+ pr_err(RETBLEED_INTEL_MSG);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ pr_info("%s\n", retbleed_strings[retbleed_mitigation]);
|
|
+}
|
|
+
|
|
+#undef pr_fmt
|
|
+#define pr_fmt(fmt) "Spectre V2 : " fmt
|
|
+
|
|
static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
|
|
SPECTRE_V2_USER_NONE;
|
|
static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
|
|
@@ -821,6 +1009,7 @@ enum spectre_v2_mitigation_cmd {
|
|
SPECTRE_V2_CMD_EIBRS,
|
|
SPECTRE_V2_CMD_EIBRS_RETPOLINE,
|
|
SPECTRE_V2_CMD_EIBRS_LFENCE,
|
|
+ SPECTRE_V2_CMD_IBRS,
|
|
};
|
|
|
|
enum spectre_v2_user_cmd {
|
|
@@ -861,13 +1050,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure)
|
|
pr_info("spectre_v2_user=%s forced on command line.\n", reason);
|
|
}
|
|
|
|
+static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd;
|
|
+
|
|
static enum spectre_v2_user_cmd __init
|
|
-spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
|
|
+spectre_v2_parse_user_cmdline(void)
|
|
{
|
|
char arg[20];
|
|
int ret, i;
|
|
|
|
- switch (v2_cmd) {
|
|
+ switch (spectre_v2_cmd) {
|
|
case SPECTRE_V2_CMD_NONE:
|
|
return SPECTRE_V2_USER_CMD_NONE;
|
|
case SPECTRE_V2_CMD_FORCE:
|
|
@@ -893,15 +1084,16 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
|
|
return SPECTRE_V2_USER_CMD_AUTO;
|
|
}
|
|
|
|
-static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
|
|
+static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
|
|
{
|
|
- return (mode == SPECTRE_V2_EIBRS ||
|
|
- mode == SPECTRE_V2_EIBRS_RETPOLINE ||
|
|
- mode == SPECTRE_V2_EIBRS_LFENCE);
|
|
+ return mode == SPECTRE_V2_IBRS ||
|
|
+ mode == SPECTRE_V2_EIBRS ||
|
|
+ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
|
|
+ mode == SPECTRE_V2_EIBRS_LFENCE;
|
|
}
|
|
|
|
static void __init
|
|
-spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
|
|
+spectre_v2_user_select_mitigation(void)
|
|
{
|
|
enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
|
|
bool smt_possible = IS_ENABLED(CONFIG_SMP);
|
|
@@ -914,7 +1106,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
|
|
cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
|
|
smt_possible = false;
|
|
|
|
- cmd = spectre_v2_parse_user_cmdline(v2_cmd);
|
|
+ cmd = spectre_v2_parse_user_cmdline();
|
|
switch (cmd) {
|
|
case SPECTRE_V2_USER_CMD_NONE:
|
|
goto set_mode;
|
|
@@ -962,12 +1154,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
|
|
}
|
|
|
|
/*
|
|
- * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not
|
|
- * required.
|
|
+ * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible,
|
|
+ * STIBP is not required.
|
|
*/
|
|
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
|
|
!smt_possible ||
|
|
- spectre_v2_in_eibrs_mode(spectre_v2_enabled))
|
|
+ spectre_v2_in_ibrs_mode(spectre_v2_enabled))
|
|
return;
|
|
|
|
/*
|
|
@@ -979,6 +1171,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
|
|
boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
|
|
mode = SPECTRE_V2_USER_STRICT_PREFERRED;
|
|
|
|
+ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
|
|
+ if (mode != SPECTRE_V2_USER_STRICT &&
|
|
+ mode != SPECTRE_V2_USER_STRICT_PREFERRED)
|
|
+ pr_info("Selecting STIBP always-on mode to complement retbleed mitigation\n");
|
|
+ mode = SPECTRE_V2_USER_STRICT_PREFERRED;
|
|
+ }
|
|
+
|
|
spectre_v2_user_stibp = mode;
|
|
|
|
set_mode:
|
|
@@ -992,6 +1191,7 @@ static const char * const spectre_v2_strings[] = {
|
|
[SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS",
|
|
[SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE",
|
|
[SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines",
|
|
+ [SPECTRE_V2_IBRS] = "Mitigation: IBRS",
|
|
};
|
|
|
|
static const struct {
|
|
@@ -1009,6 +1209,7 @@ static const struct {
|
|
{ "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false },
|
|
{ "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false },
|
|
{ "auto", SPECTRE_V2_CMD_AUTO, false },
|
|
+ { "ibrs", SPECTRE_V2_CMD_IBRS, false },
|
|
};
|
|
|
|
static void __init spec_v2_print_cond(const char *reason, bool secure)
|
|
@@ -1071,6 +1272,30 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
|
|
return SPECTRE_V2_CMD_AUTO;
|
|
}
|
|
|
|
+ if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) {
|
|
+ pr_err("%s selected but not compiled in. Switching to AUTO select\n",
|
|
+ mitigation_options[i].option);
|
|
+ return SPECTRE_V2_CMD_AUTO;
|
|
+ }
|
|
+
|
|
+ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
|
|
+ pr_err("%s selected but not Intel CPU. Switching to AUTO select\n",
|
|
+ mitigation_options[i].option);
|
|
+ return SPECTRE_V2_CMD_AUTO;
|
|
+ }
|
|
+
|
|
+ if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) {
|
|
+ pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n",
|
|
+ mitigation_options[i].option);
|
|
+ return SPECTRE_V2_CMD_AUTO;
|
|
+ }
|
|
+
|
|
+ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) {
|
|
+ pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n",
|
|
+ mitigation_options[i].option);
|
|
+ return SPECTRE_V2_CMD_AUTO;
|
|
+ }
|
|
+
|
|
spec_v2_print_cond(mitigation_options[i].option,
|
|
mitigation_options[i].secure);
|
|
return cmd;
|
|
@@ -1086,6 +1311,22 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
|
|
return SPECTRE_V2_RETPOLINE;
|
|
}
|
|
|
|
+/* Disable in-kernel use of non-RSB RET predictors */
|
|
+static void __init spec_ctrl_disable_kernel_rrsba(void)
|
|
+{
|
|
+ u64 ia32_cap;
|
|
+
|
|
+ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
|
|
+ return;
|
|
+
|
|
+ ia32_cap = x86_read_arch_cap_msr();
|
|
+
|
|
+ if (ia32_cap & ARCH_CAP_RRSBA) {
|
|
+ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
|
|
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
|
|
+ }
|
|
+}
|
|
+
|
|
static void __init spectre_v2_select_mitigation(void)
|
|
{
|
|
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
|
|
@@ -1110,6 +1351,15 @@ static void __init spectre_v2_select_mitigation(void)
|
|
break;
|
|
}
|
|
|
|
+ if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) &&
|
|
+ boot_cpu_has_bug(X86_BUG_RETBLEED) &&
|
|
+ retbleed_cmd != RETBLEED_CMD_OFF &&
|
|
+ boot_cpu_has(X86_FEATURE_IBRS) &&
|
|
+ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
|
|
+ mode = SPECTRE_V2_IBRS;
|
|
+ break;
|
|
+ }
|
|
+
|
|
mode = spectre_v2_select_retpoline();
|
|
break;
|
|
|
|
@@ -1126,6 +1376,10 @@ static void __init spectre_v2_select_mitigation(void)
|
|
mode = spectre_v2_select_retpoline();
|
|
break;
|
|
|
|
+ case SPECTRE_V2_CMD_IBRS:
|
|
+ mode = SPECTRE_V2_IBRS;
|
|
+ break;
|
|
+
|
|
case SPECTRE_V2_CMD_EIBRS:
|
|
mode = SPECTRE_V2_EIBRS;
|
|
break;
|
|
@@ -1142,10 +1396,9 @@ static void __init spectre_v2_select_mitigation(void)
|
|
if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
|
|
pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
|
|
|
|
- if (spectre_v2_in_eibrs_mode(mode)) {
|
|
- /* Force it so VMEXIT will restore correctly */
|
|
+ if (spectre_v2_in_ibrs_mode(mode)) {
|
|
x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
|
|
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
|
|
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
|
|
}
|
|
|
|
switch (mode) {
|
|
@@ -1153,6 +1406,10 @@ static void __init spectre_v2_select_mitigation(void)
|
|
case SPECTRE_V2_EIBRS:
|
|
break;
|
|
|
|
+ case SPECTRE_V2_IBRS:
|
|
+ setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS);
|
|
+ break;
|
|
+
|
|
case SPECTRE_V2_LFENCE:
|
|
case SPECTRE_V2_EIBRS_LFENCE:
|
|
setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
|
|
@@ -1164,43 +1421,107 @@ static void __init spectre_v2_select_mitigation(void)
|
|
break;
|
|
}
|
|
|
|
+ /*
|
|
+ * Disable alternate RSB predictions in kernel when indirect CALLs and
|
|
+ * JMPs gets protection against BHI and Intramode-BTI, but RET
|
|
+ * prediction from a non-RSB predictor is still a risk.
|
|
+ */
|
|
+ if (mode == SPECTRE_V2_EIBRS_LFENCE ||
|
|
+ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
|
|
+ mode == SPECTRE_V2_RETPOLINE)
|
|
+ spec_ctrl_disable_kernel_rrsba();
|
|
+
|
|
spectre_v2_enabled = mode;
|
|
pr_info("%s\n", spectre_v2_strings[mode]);
|
|
|
|
/*
|
|
- * If spectre v2 protection has been enabled, unconditionally fill
|
|
- * RSB during a context switch; this protects against two independent
|
|
- * issues:
|
|
+ * If Spectre v2 protection has been enabled, fill the RSB during a
|
|
+ * context switch. In general there are two types of RSB attacks
|
|
+ * across context switches, for which the CALLs/RETs may be unbalanced.
|
|
*
|
|
- * - RSB underflow (and switch to BTB) on Skylake+
|
|
- * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs
|
|
+ * 1) RSB underflow
|
|
+ *
|
|
+ * Some Intel parts have "bottomless RSB". When the RSB is empty,
|
|
+ * speculated return targets may come from the branch predictor,
|
|
+ * which could have a user-poisoned BTB or BHB entry.
|
|
+ *
|
|
+ * AMD has it even worse: *all* returns are speculated from the BTB,
|
|
+ * regardless of the state of the RSB.
|
|
+ *
|
|
+ * When IBRS or eIBRS is enabled, the "user -> kernel" attack
|
|
+ * scenario is mitigated by the IBRS branch prediction isolation
|
|
+ * properties, so the RSB buffer filling wouldn't be necessary to
|
|
+ * protect against this type of attack.
|
|
+ *
|
|
+ * The "user -> user" attack scenario is mitigated by RSB filling.
|
|
+ *
|
|
+ * 2) Poisoned RSB entry
|
|
+ *
|
|
+ * If the 'next' in-kernel return stack is shorter than 'prev',
|
|
+ * 'next' could be tricked into speculating with a user-poisoned RSB
|
|
+ * entry.
|
|
+ *
|
|
+ * The "user -> kernel" attack scenario is mitigated by SMEP and
|
|
+ * eIBRS.
|
|
+ *
|
|
+ * The "user -> user" scenario, also known as SpectreBHB, requires
|
|
+ * RSB clearing.
|
|
+ *
|
|
+ * So to mitigate all cases, unconditionally fill RSB on context
|
|
+ * switches.
|
|
+ *
|
|
+ * FIXME: Is this pointless for retbleed-affected AMD?
|
|
*/
|
|
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
|
|
pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
|
|
|
|
/*
|
|
- * Retpoline means the kernel is safe because it has no indirect
|
|
- * branches. Enhanced IBRS protects firmware too, so, enable restricted
|
|
- * speculation around firmware calls only when Enhanced IBRS isn't
|
|
- * supported.
|
|
+ * Similar to context switches, there are two types of RSB attacks
|
|
+ * after vmexit:
|
|
+ *
|
|
+ * 1) RSB underflow
|
|
+ *
|
|
+ * 2) Poisoned RSB entry
|
|
+ *
|
|
+ * When retpoline is enabled, both are mitigated by filling/clearing
|
|
+ * the RSB.
|
|
+ *
|
|
+ * When IBRS is enabled, while #1 would be mitigated by the IBRS branch
|
|
+ * prediction isolation protections, RSB still needs to be cleared
|
|
+ * because of #2. Note that SMEP provides no protection here, unlike
|
|
+ * user-space-poisoned RSB entries.
|
|
+ *
|
|
+ * eIBRS, on the other hand, has RSB-poisoning protections, so it
|
|
+ * doesn't need RSB clearing after vmexit.
|
|
+ */
|
|
+ if (boot_cpu_has(X86_FEATURE_RETPOLINE) ||
|
|
+ boot_cpu_has(X86_FEATURE_KERNEL_IBRS))
|
|
+ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
|
|
+
|
|
+ /*
|
|
+ * Retpoline protects the kernel, but doesn't protect firmware. IBRS
|
|
+ * and Enhanced IBRS protect firmware too, so enable IBRS around
|
|
+ * firmware calls only when IBRS / Enhanced IBRS aren't otherwise
|
|
+ * enabled.
|
|
*
|
|
* Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
|
|
* the user might select retpoline on the kernel command line and if
|
|
* the CPU supports Enhanced IBRS, kernel might un-intentionally not
|
|
* enable IBRS around firmware calls.
|
|
*/
|
|
- if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) {
|
|
+ if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
|
|
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
|
|
pr_info("Enabling Restricted Speculation for firmware calls\n");
|
|
}
|
|
|
|
/* Set up IBPB and STIBP depending on the general spectre V2 command */
|
|
- spectre_v2_user_select_mitigation(cmd);
|
|
+ spectre_v2_cmd = cmd;
|
|
}
|
|
|
|
static void update_stibp_msr(void * __unused)
|
|
{
|
|
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
|
|
+ u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP);
|
|
+ write_spec_ctrl_current(val, true);
|
|
}
|
|
|
|
/* Update x86_spec_ctrl_base in case SMT state changed. */
|
|
@@ -1416,16 +1737,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
|
|
break;
|
|
}
|
|
|
|
- /*
|
|
- * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
|
|
- * bit in the mask to allow guests to use the mitigation even in the
|
|
- * case where the host does not enable it.
|
|
- */
|
|
- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
|
|
- static_cpu_has(X86_FEATURE_AMD_SSBD)) {
|
|
- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
|
|
- }
|
|
-
|
|
/*
|
|
* We have three CPU feature flags that are in play here:
|
|
* - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
|
|
@@ -1443,7 +1754,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
|
|
x86_amd_ssb_disable();
|
|
} else {
|
|
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
|
|
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
|
|
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
|
|
}
|
|
}
|
|
|
|
@@ -1694,7 +2005,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
|
|
void x86_spec_ctrl_setup_ap(void)
|
|
{
|
|
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
|
|
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
|
|
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
|
|
|
|
if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
|
|
x86_amd_ssb_disable();
|
|
@@ -1931,7 +2242,7 @@ static ssize_t mmio_stale_data_show_state(char *buf)
|
|
|
|
static char *stibp_state(void)
|
|
{
|
|
- if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
|
|
+ if (spectre_v2_in_ibrs_mode(spectre_v2_enabled))
|
|
return "";
|
|
|
|
switch (spectre_v2_user_stibp) {
|
|
@@ -1987,6 +2298,24 @@ static ssize_t srbds_show_state(char *buf)
|
|
return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
|
|
}
|
|
|
|
+static ssize_t retbleed_show_state(char *buf)
|
|
+{
|
|
+ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
|
|
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
|
|
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
|
|
+ return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n");
|
|
+
|
|
+ return sprintf(buf, "%s; SMT %s\n",
|
|
+ retbleed_strings[retbleed_mitigation],
|
|
+ !sched_smt_active() ? "disabled" :
|
|
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
|
|
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ?
|
|
+ "enabled with STIBP protection" : "vulnerable");
|
|
+ }
|
|
+
|
|
+ return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
|
|
+}
|
|
+
|
|
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
|
|
char *buf, unsigned int bug)
|
|
{
|
|
@@ -2032,6 +2361,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
|
|
case X86_BUG_MMIO_STALE_DATA:
|
|
return mmio_stale_data_show_state(buf);
|
|
|
|
+ case X86_BUG_RETBLEED:
|
|
+ return retbleed_show_state(buf);
|
|
+
|
|
default:
|
|
break;
|
|
}
|
|
@@ -2088,4 +2420,9 @@ ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *at
|
|
{
|
|
return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
|
|
}
|
|
+
|
|
+ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
|
|
+{
|
|
+ return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
|
|
+}
|
|
#endif
|
|
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
|
index af5d0c188f7b..1f43ddf2ffc3 100644
|
|
--- a/arch/x86/kernel/cpu/common.c
|
|
+++ b/arch/x86/kernel/cpu/common.c
|
|
@@ -1231,48 +1231,60 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
|
|
{}
|
|
};
|
|
|
|
+#define VULNBL(vendor, family, model, blacklist) \
|
|
+ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist)
|
|
+
|
|
#define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \
|
|
X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \
|
|
INTEL_FAM6_##model, steppings, \
|
|
X86_FEATURE_ANY, issues)
|
|
|
|
+#define VULNBL_AMD(family, blacklist) \
|
|
+ VULNBL(AMD, family, X86_MODEL_ANY, blacklist)
|
|
+
|
|
+#define VULNBL_HYGON(family, blacklist) \
|
|
+ VULNBL(HYGON, family, X86_MODEL_ANY, blacklist)
|
|
+
|
|
#define SRBDS BIT(0)
|
|
/* CPU is affected by X86_BUG_MMIO_STALE_DATA */
|
|
#define MMIO BIT(1)
|
|
/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
|
|
#define MMIO_SBDS BIT(2)
|
|
+/* CPU is affected by RETbleed, speculating where you would not expect it */
|
|
+#define RETBLEED BIT(3)
|
|
|
|
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
|
|
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
|
|
VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS),
|
|
VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS),
|
|
VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS),
|
|
- VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO),
|
|
- VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO),
|
|
+ VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO),
|
|
+ VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO),
|
|
VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
|
|
VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
|
|
VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
|
|
- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
|
|
- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS),
|
|
- VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) |
|
|
- BIT(7) | BIT(0xB), MMIO),
|
|
- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
|
|
- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS),
|
|
- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO),
|
|
- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS),
|
|
- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO),
|
|
- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS),
|
|
- VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS),
|
|
- VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO),
|
|
- VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO),
|
|
- VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS),
|
|
- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
|
|
- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO),
|
|
- VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
|
|
- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO),
|
|
- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
|
|
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
|
|
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED),
|
|
+ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
|
|
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
|
|
+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
|
|
+ VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED),
|
|
+ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
|
|
+ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO),
|
|
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO),
|
|
+ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
|
|
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED),
|
|
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
|
|
+ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
|
|
+ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED),
|
|
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
|
|
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
|
|
- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS),
|
|
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
|
|
+
|
|
+ VULNBL_AMD(0x15, RETBLEED),
|
|
+ VULNBL_AMD(0x16, RETBLEED),
|
|
+ VULNBL_AMD(0x17, RETBLEED),
|
|
+ VULNBL_HYGON(0x18, RETBLEED),
|
|
{}
|
|
};
|
|
|
|
@@ -1374,6 +1386,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
|
|
!arch_cap_mmio_immune(ia32_cap))
|
|
setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
|
|
|
|
+ if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
|
|
+ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
|
|
+ setup_force_cpu_bug(X86_BUG_RETBLEED);
|
|
+ }
|
|
+
|
|
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
|
|
return;
|
|
|
|
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
|
|
index 2a8e584fc991..7c9b5893c30a 100644
|
|
--- a/arch/x86/kernel/cpu/cpu.h
|
|
+++ b/arch/x86/kernel/cpu/cpu.h
|
|
@@ -61,6 +61,8 @@ static inline void tsx_init(void) { }
|
|
static inline void tsx_ap_init(void) { }
|
|
#endif /* CONFIG_CPU_SUP_INTEL */
|
|
|
|
+extern void init_spectral_chicken(struct cpuinfo_x86 *c);
|
|
+
|
|
extern void get_cpu_cap(struct cpuinfo_x86 *c);
|
|
extern void get_cpu_address_sizes(struct cpuinfo_x86 *c);
|
|
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
|
|
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
|
|
index 3fcdda4c1e11..21fd425088fe 100644
|
|
--- a/arch/x86/kernel/cpu/hygon.c
|
|
+++ b/arch/x86/kernel/cpu/hygon.c
|
|
@@ -302,6 +302,12 @@ static void init_hygon(struct cpuinfo_x86 *c)
|
|
/* get apicid instead of initial apic id from cpuid */
|
|
c->apicid = hard_smp_processor_id();
|
|
|
|
+ /*
|
|
+ * XXX someone from Hygon needs to confirm this DTRT
|
|
+ *
|
|
+ init_spectral_chicken(c);
|
|
+ */
|
|
+
|
|
set_cpu_cap(c, X86_FEATURE_ZEN);
|
|
set_cpu_cap(c, X86_FEATURE_CPB);
|
|
|
|
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
|
|
index 4143b1e4c5c6..fcfb03f5f89b 100644
|
|
--- a/arch/x86/kernel/cpu/scattered.c
|
|
+++ b/arch/x86/kernel/cpu/scattered.c
|
|
@@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = {
|
|
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
|
|
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
|
|
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
|
|
+ { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
|
|
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
|
|
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
|
|
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
|
|
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
|
|
index 1e31c7d21597..6892ca67d9c6 100644
|
|
--- a/arch/x86/kernel/ftrace.c
|
|
+++ b/arch/x86/kernel/ftrace.c
|
|
@@ -303,7 +303,7 @@ union ftrace_op_code_union {
|
|
} __attribute__((packed));
|
|
};
|
|
|
|
-#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS)
|
|
+#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS))
|
|
|
|
static unsigned long
|
|
create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
|
|
@@ -359,7 +359,10 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
|
|
goto fail;
|
|
|
|
ip = trampoline + size;
|
|
- memcpy(ip, retq, RET_SIZE);
|
|
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
|
|
+ __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE);
|
|
+ else
|
|
+ memcpy(ip, retq, sizeof(retq));
|
|
|
|
/* No need to test direct calls on created trampolines */
|
|
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
|
|
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
|
|
index eb8656bac99b..9b7acc9c7874 100644
|
|
--- a/arch/x86/kernel/head_32.S
|
|
+++ b/arch/x86/kernel/head_32.S
|
|
@@ -23,6 +23,7 @@
|
|
#include <asm/cpufeatures.h>
|
|
#include <asm/percpu.h>
|
|
#include <asm/nops.h>
|
|
+#include <asm/nospec-branch.h>
|
|
#include <asm/bootparam.h>
|
|
#include <asm/export.h>
|
|
#include <asm/pgtable_32.h>
|
|
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
|
|
index b8e3019547a5..3178fd81f93f 100644
|
|
--- a/arch/x86/kernel/head_64.S
|
|
+++ b/arch/x86/kernel/head_64.S
|
|
@@ -334,6 +334,8 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb)
|
|
UNWIND_HINT_IRET_REGS offset=8
|
|
ENDBR
|
|
|
|
+ ANNOTATE_UNRET_END
|
|
+
|
|
/* Build pt_regs */
|
|
PUSH_AND_CLEAR_REGS
|
|
|
|
@@ -393,6 +395,7 @@ SYM_CODE_END(early_idt_handler_array)
|
|
|
|
SYM_CODE_START_LOCAL(early_idt_handler_common)
|
|
UNWIND_HINT_IRET_REGS offset=16
|
|
+ ANNOTATE_UNRET_END
|
|
/*
|
|
* The stack is the hardware frame, an error code or zero, and the
|
|
* vector number.
|
|
@@ -442,6 +445,8 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb)
|
|
UNWIND_HINT_IRET_REGS offset=8
|
|
ENDBR
|
|
|
|
+ ANNOTATE_UNRET_END
|
|
+
|
|
/* Build pt_regs */
|
|
PUSH_AND_CLEAR_REGS
|
|
|
|
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
|
|
index b98ffcf4d250..67828d973389 100644
|
|
--- a/arch/x86/kernel/module.c
|
|
+++ b/arch/x86/kernel/module.c
|
|
@@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr,
|
|
{
|
|
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
|
|
*para = NULL, *orc = NULL, *orc_ip = NULL,
|
|
- *retpolines = NULL, *ibt_endbr = NULL;
|
|
+ *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL;
|
|
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
|
|
|
|
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
|
|
@@ -271,6 +271,8 @@ int module_finalize(const Elf_Ehdr *hdr,
|
|
orc_ip = s;
|
|
if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
|
|
retpolines = s;
|
|
+ if (!strcmp(".return_sites", secstrings + s->sh_name))
|
|
+ returns = s;
|
|
if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name))
|
|
ibt_endbr = s;
|
|
}
|
|
@@ -287,6 +289,10 @@ int module_finalize(const Elf_Ehdr *hdr,
|
|
void *rseg = (void *)retpolines->sh_addr;
|
|
apply_retpolines(rseg, rseg + retpolines->sh_size);
|
|
}
|
|
+ if (returns) {
|
|
+ void *rseg = (void *)returns->sh_addr;
|
|
+ apply_returns(rseg, rseg + returns->sh_size);
|
|
+ }
|
|
if (alt) {
|
|
/* patch .altinstructions */
|
|
void *aseg = (void *)alt->sh_addr;
|
|
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
|
|
index b370767f5b19..622dc3673c37 100644
|
|
--- a/arch/x86/kernel/process.c
|
|
+++ b/arch/x86/kernel/process.c
|
|
@@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
|
|
}
|
|
|
|
if (updmsr)
|
|
- wrmsrl(MSR_IA32_SPEC_CTRL, msr);
|
|
+ write_spec_ctrl_current(msr, false);
|
|
}
|
|
|
|
static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
|
|
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
|
|
index fcc8a7699103..c7c4b1917336 100644
|
|
--- a/arch/x86/kernel/relocate_kernel_32.S
|
|
+++ b/arch/x86/kernel/relocate_kernel_32.S
|
|
@@ -7,10 +7,12 @@
|
|
#include <linux/linkage.h>
|
|
#include <asm/page_types.h>
|
|
#include <asm/kexec.h>
|
|
+#include <asm/nospec-branch.h>
|
|
#include <asm/processor-flags.h>
|
|
|
|
/*
|
|
- * Must be relocatable PIC code callable as a C function
|
|
+ * Must be relocatable PIC code callable as a C function, in particular
|
|
+ * there must be a plain RET and not jump to return thunk.
|
|
*/
|
|
|
|
#define PTR(x) (x << 2)
|
|
@@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
|
|
movl %edi, %eax
|
|
addl $(identity_mapped - relocate_kernel), %eax
|
|
pushl %eax
|
|
- RET
|
|
+ ANNOTATE_UNRET_SAFE
|
|
+ ret
|
|
+ int3
|
|
SYM_CODE_END(relocate_kernel)
|
|
|
|
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
|
@@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
|
xorl %edx, %edx
|
|
xorl %esi, %esi
|
|
xorl %ebp, %ebp
|
|
- RET
|
|
+ ANNOTATE_UNRET_SAFE
|
|
+ ret
|
|
+ int3
|
|
1:
|
|
popl %edx
|
|
movl CP_PA_SWAP_PAGE(%edi), %esp
|
|
addl $PAGE_SIZE, %esp
|
|
2:
|
|
+ ANNOTATE_RETPOLINE_SAFE
|
|
call *%edx
|
|
|
|
/* get the re-entry point of the peer system */
|
|
@@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
|
movl %edi, %eax
|
|
addl $(virtual_mapped - relocate_kernel), %eax
|
|
pushl %eax
|
|
- RET
|
|
+ ANNOTATE_UNRET_SAFE
|
|
+ ret
|
|
+ int3
|
|
SYM_CODE_END(identity_mapped)
|
|
|
|
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
|
|
@@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
|
|
popl %edi
|
|
popl %esi
|
|
popl %ebx
|
|
- RET
|
|
+ ANNOTATE_UNRET_SAFE
|
|
+ ret
|
|
+ int3
|
|
SYM_CODE_END(virtual_mapped)
|
|
|
|
/* Do the copies */
|
|
@@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
|
|
popl %edi
|
|
popl %ebx
|
|
popl %ebp
|
|
- RET
|
|
+ ANNOTATE_UNRET_SAFE
|
|
+ ret
|
|
+ int3
|
|
SYM_CODE_END(swap_pages)
|
|
|
|
.globl kexec_control_code_size
|
|
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
|
|
index c1d8626c53b6..4809c0dc4eb0 100644
|
|
--- a/arch/x86/kernel/relocate_kernel_64.S
|
|
+++ b/arch/x86/kernel/relocate_kernel_64.S
|
|
@@ -13,7 +13,8 @@
|
|
#include <asm/unwind_hints.h>
|
|
|
|
/*
|
|
- * Must be relocatable PIC code callable as a C function
|
|
+ * Must be relocatable PIC code callable as a C function, in particular
|
|
+ * there must be a plain RET and not jump to return thunk.
|
|
*/
|
|
|
|
#define PTR(x) (x << 3)
|
|
@@ -105,7 +106,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
|
|
/* jump to identity mapped page */
|
|
addq $(identity_mapped - relocate_kernel), %r8
|
|
pushq %r8
|
|
- RET
|
|
+ ANNOTATE_UNRET_SAFE
|
|
+ ret
|
|
+ int3
|
|
SYM_CODE_END(relocate_kernel)
|
|
|
|
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
|
@@ -200,7 +203,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
|
xorl %r14d, %r14d
|
|
xorl %r15d, %r15d
|
|
|
|
- RET
|
|
+ ANNOTATE_UNRET_SAFE
|
|
+ ret
|
|
+ int3
|
|
|
|
1:
|
|
popq %rdx
|
|
@@ -219,7 +224,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
|
call swap_pages
|
|
movq $virtual_mapped, %rax
|
|
pushq %rax
|
|
- RET
|
|
+ ANNOTATE_UNRET_SAFE
|
|
+ ret
|
|
+ int3
|
|
SYM_CODE_END(identity_mapped)
|
|
|
|
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
|
|
@@ -241,7 +248,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
|
|
popq %r12
|
|
popq %rbp
|
|
popq %rbx
|
|
- RET
|
|
+ ANNOTATE_UNRET_SAFE
|
|
+ ret
|
|
+ int3
|
|
SYM_CODE_END(virtual_mapped)
|
|
|
|
/* Do the copies */
|
|
@@ -298,7 +307,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
|
|
lea PAGE_SIZE(%rax), %rsi
|
|
jmp 0b
|
|
3:
|
|
- RET
|
|
+ ANNOTATE_UNRET_SAFE
|
|
+ ret
|
|
+ int3
|
|
SYM_CODE_END(swap_pages)
|
|
|
|
.globl kexec_control_code_size
|
|
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
|
|
index c95b9ac5a457..6640be279dae 100644
|
|
--- a/arch/x86/kernel/setup.c
|
|
+++ b/arch/x86/kernel/setup.c
|
|
@@ -20,6 +20,7 @@
|
|
#include <linux/root_dev.h>
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/tboot.h>
|
|
+#include <linux/security.h>
|
|
#include <linux/usb/xhci-dbgp.h>
|
|
#include <linux/static_call.h>
|
|
#include <linux/swiotlb.h>
|
|
@@ -946,6 +947,13 @@ void __init setup_arch(char **cmdline_p)
|
|
if (efi_enabled(EFI_BOOT))
|
|
efi_init();
|
|
|
|
+ efi_set_secure_boot(boot_params.secure_boot);
|
|
+
|
|
+#ifdef CONFIG_LOCK_DOWN_IN_EFI_SECURE_BOOT
|
|
+ if (efi_enabled(EFI_SECURE_BOOT))
|
|
+ security_lock_kernel_down("EFI Secure Boot mode", LOCKDOWN_INTEGRITY_MAX);
|
|
+#endif
|
|
+
|
|
dmi_setup();
|
|
|
|
/*
|
|
@@ -1115,19 +1123,7 @@ void __init setup_arch(char **cmdline_p)
|
|
/* Allocate bigger log buffer */
|
|
setup_log_buf(1);
|
|
|
|
- if (efi_enabled(EFI_BOOT)) {
|
|
- switch (boot_params.secure_boot) {
|
|
- case efi_secureboot_mode_disabled:
|
|
- pr_info("Secure boot disabled\n");
|
|
- break;
|
|
- case efi_secureboot_mode_enabled:
|
|
- pr_info("Secure boot enabled\n");
|
|
- break;
|
|
- default:
|
|
- pr_info("Secure boot could not be determined\n");
|
|
- break;
|
|
- }
|
|
- }
|
|
+ efi_set_secure_boot(boot_params.secure_boot);
|
|
|
|
reserve_initrd();
|
|
|
|
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
|
|
index aa72cefdd5be..aaaba85d6d7f 100644
|
|
--- a/arch/x86/kernel/static_call.c
|
|
+++ b/arch/x86/kernel/static_call.c
|
|
@@ -11,6 +11,13 @@ enum insn_type {
|
|
RET = 3, /* tramp / site cond-tail-call */
|
|
};
|
|
|
|
+/*
|
|
+ * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such
|
|
+ * that there is no false-positive trampoline identification while also being a
|
|
+ * speculation stop.
|
|
+ */
|
|
+static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc };
|
|
+
|
|
/*
|
|
* cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax
|
|
*/
|
|
@@ -18,7 +25,8 @@ static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 };
|
|
|
|
static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
|
|
|
|
-static void __ref __static_call_transform(void *insn, enum insn_type type, void *func)
|
|
+static void __ref __static_call_transform(void *insn, enum insn_type type,
|
|
+ void *func, bool modinit)
|
|
{
|
|
const void *emulate = NULL;
|
|
int size = CALL_INSN_SIZE;
|
|
@@ -43,14 +51,17 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void
|
|
break;
|
|
|
|
case RET:
|
|
- code = &retinsn;
|
|
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
|
|
+ code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk);
|
|
+ else
|
|
+ code = &retinsn;
|
|
break;
|
|
}
|
|
|
|
if (memcmp(insn, code, size) == 0)
|
|
return;
|
|
|
|
- if (unlikely(system_state == SYSTEM_BOOTING))
|
|
+ if (system_state == SYSTEM_BOOTING || modinit)
|
|
return text_poke_early(insn, code, size);
|
|
|
|
text_poke_bp(insn, code, size, emulate);
|
|
@@ -60,7 +71,7 @@ static void __static_call_validate(void *insn, bool tail, bool tramp)
|
|
{
|
|
u8 opcode = *(u8 *)insn;
|
|
|
|
- if (tramp && memcmp(insn+5, "SCT", 3)) {
|
|
+ if (tramp && memcmp(insn+5, tramp_ud, 3)) {
|
|
pr_err("trampoline signature fail");
|
|
BUG();
|
|
}
|
|
@@ -104,14 +115,42 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
|
|
|
|
if (tramp) {
|
|
__static_call_validate(tramp, true, true);
|
|
- __static_call_transform(tramp, __sc_insn(!func, true), func);
|
|
+ __static_call_transform(tramp, __sc_insn(!func, true), func, false);
|
|
}
|
|
|
|
if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
|
|
__static_call_validate(site, tail, false);
|
|
- __static_call_transform(site, __sc_insn(!func, tail), func);
|
|
+ __static_call_transform(site, __sc_insn(!func, tail), func, false);
|
|
}
|
|
|
|
mutex_unlock(&text_mutex);
|
|
}
|
|
EXPORT_SYMBOL_GPL(arch_static_call_transform);
|
|
+
|
|
+#ifdef CONFIG_RETHUNK
|
|
+/*
|
|
+ * This is called by apply_returns() to fix up static call trampolines,
|
|
+ * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as
|
|
+ * having a return trampoline.
|
|
+ *
|
|
+ * The problem is that static_call() is available before determining
|
|
+ * X86_FEATURE_RETHUNK and, by implication, running alternatives.
|
|
+ *
|
|
+ * This means that __static_call_transform() above can have overwritten the
|
|
+ * return trampoline and we now need to fix things up to be consistent.
|
|
+ */
|
|
+bool __static_call_fixup(void *tramp, u8 op, void *dest)
|
|
+{
|
|
+ if (memcmp(tramp+5, tramp_ud, 3)) {
|
|
+ /* Not a trampoline site, not our problem. */
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ mutex_lock(&text_mutex);
|
|
+ if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk)
|
|
+ __static_call_transform(tramp, RET, NULL, true);
|
|
+ mutex_unlock(&text_mutex);
|
|
+
|
|
+ return true;
|
|
+}
|
|
+#endif
|
|
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
|
|
index 1563fb995005..4167215333fd 100644
|
|
--- a/arch/x86/kernel/traps.c
|
|
+++ b/arch/x86/kernel/traps.c
|
|
@@ -892,14 +892,10 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
|
|
}
|
|
#endif
|
|
|
|
-struct bad_iret_stack {
|
|
- void *error_entry_ret;
|
|
- struct pt_regs regs;
|
|
-};
|
|
-
|
|
-asmlinkage __visible noinstr
|
|
-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
|
|
+asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs)
|
|
{
|
|
+ struct pt_regs tmp, *new_stack;
|
|
+
|
|
/*
|
|
* This is called from entry_64.S early in handling a fault
|
|
* caused by a bad iret to user mode. To handle the fault
|
|
@@ -908,19 +904,18 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
|
|
* just below the IRET frame) and we want to pretend that the
|
|
* exception came from the IRET target.
|
|
*/
|
|
- struct bad_iret_stack tmp, *new_stack =
|
|
- (struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
|
|
+ new_stack = (struct pt_regs *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
|
|
|
|
/* Copy the IRET target to the temporary storage. */
|
|
- __memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8);
|
|
+ __memcpy(&tmp.ip, (void *)bad_regs->sp, 5*8);
|
|
|
|
/* Copy the remainder of the stack from the current stack. */
|
|
- __memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip));
|
|
+ __memcpy(&tmp, bad_regs, offsetof(struct pt_regs, ip));
|
|
|
|
/* Update the entry stack */
|
|
__memcpy(new_stack, &tmp, sizeof(tmp));
|
|
|
|
- BUG_ON(!user_mode(&new_stack->regs));
|
|
+ BUG_ON(!user_mode(new_stack));
|
|
return new_stack;
|
|
}
|
|
#endif
|
|
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
|
|
index 7fda7f27e762..071faf2c8a77 100644
|
|
--- a/arch/x86/kernel/vmlinux.lds.S
|
|
+++ b/arch/x86/kernel/vmlinux.lds.S
|
|
@@ -141,7 +141,7 @@ SECTIONS
|
|
|
|
#ifdef CONFIG_RETPOLINE
|
|
__indirect_thunk_start = .;
|
|
- *(.text.__x86.indirect_thunk)
|
|
+ *(.text.__x86.*)
|
|
__indirect_thunk_end = .;
|
|
#endif
|
|
} :text =0xcccc
|
|
@@ -283,6 +283,13 @@ SECTIONS
|
|
*(.retpoline_sites)
|
|
__retpoline_sites_end = .;
|
|
}
|
|
+
|
|
+ . = ALIGN(8);
|
|
+ .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) {
|
|
+ __return_sites = .;
|
|
+ *(.return_sites)
|
|
+ __return_sites_end = .;
|
|
+ }
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_KERNEL_IBT
|
|
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
|
|
index 89b11e7dca8a..f8382abe22ff 100644
|
|
--- a/arch/x86/kvm/emulate.c
|
|
+++ b/arch/x86/kvm/emulate.c
|
|
@@ -189,9 +189,6 @@
|
|
#define X8(x...) X4(x), X4(x)
|
|
#define X16(x...) X8(x), X8(x)
|
|
|
|
-#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
|
|
-#define FASTOP_SIZE (8 * (1 + HAS_KERNEL_IBT))
|
|
-
|
|
struct opcode {
|
|
u64 flags;
|
|
u8 intercept;
|
|
@@ -306,9 +303,15 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
|
|
* Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
|
|
* different operand sizes can be reached by calculation, rather than a jump
|
|
* table (which would be bigger than the code).
|
|
+ *
|
|
+ * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR
|
|
+ * and 1 for the straight line speculation INT3, leaves 7 bytes for the
|
|
+ * body of the function. Currently none is larger than 4.
|
|
*/
|
|
static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
|
|
|
|
+#define FASTOP_SIZE 16
|
|
+
|
|
#define __FOP_FUNC(name) \
|
|
".align " __stringify(FASTOP_SIZE) " \n\t" \
|
|
".type " name ", @function \n\t" \
|
|
@@ -325,13 +328,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
|
|
#define FOP_RET(name) \
|
|
__FOP_RET(#name)
|
|
|
|
-#define FOP_START(op) \
|
|
+#define __FOP_START(op, align) \
|
|
extern void em_##op(struct fastop *fake); \
|
|
asm(".pushsection .text, \"ax\" \n\t" \
|
|
".global em_" #op " \n\t" \
|
|
- ".align " __stringify(FASTOP_SIZE) " \n\t" \
|
|
+ ".align " __stringify(align) " \n\t" \
|
|
"em_" #op ":\n\t"
|
|
|
|
+#define FOP_START(op) __FOP_START(op, FASTOP_SIZE)
|
|
+
|
|
#define FOP_END \
|
|
".popsection")
|
|
|
|
@@ -435,17 +440,12 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
|
|
/*
|
|
* Depending on .config the SETcc functions look like:
|
|
*
|
|
- * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT]
|
|
- * SETcc %al [3 bytes]
|
|
- * RET [1 byte]
|
|
- * INT3 [1 byte; CONFIG_SLS]
|
|
- *
|
|
- * Which gives possible sizes 4, 5, 8 or 9. When rounded up to the
|
|
- * next power-of-two alignment they become 4, 8 or 16 resp.
|
|
+ * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT]
|
|
+ * SETcc %al [3 bytes]
|
|
+ * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK]
|
|
+ * INT3 [1 byte; CONFIG_SLS]
|
|
*/
|
|
-#define SETCC_LENGTH (ENDBR_INSN_SIZE + 4 + IS_ENABLED(CONFIG_SLS))
|
|
-#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS) << HAS_KERNEL_IBT)
|
|
-static_assert(SETCC_LENGTH <= SETCC_ALIGN);
|
|
+#define SETCC_ALIGN 16
|
|
|
|
#define FOP_SETCC(op) \
|
|
".align " __stringify(SETCC_ALIGN) " \n\t" \
|
|
@@ -453,9 +453,10 @@ static_assert(SETCC_LENGTH <= SETCC_ALIGN);
|
|
#op ": \n\t" \
|
|
ASM_ENDBR \
|
|
#op " %al \n\t" \
|
|
- __FOP_RET(#op)
|
|
+ __FOP_RET(#op) \
|
|
+ ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t"
|
|
|
|
-FOP_START(setcc)
|
|
+__FOP_START(setcc, SETCC_ALIGN)
|
|
FOP_SETCC(seto)
|
|
FOP_SETCC(setno)
|
|
FOP_SETCC(setc)
|
|
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
|
|
index dfaeb47fcf2a..723f8534986c 100644
|
|
--- a/arch/x86/kvm/svm/vmenter.S
|
|
+++ b/arch/x86/kvm/svm/vmenter.S
|
|
@@ -110,6 +110,15 @@ SYM_FUNC_START(__svm_vcpu_run)
|
|
mov %r15, VCPU_R15(%_ASM_AX)
|
|
#endif
|
|
|
|
+ /*
|
|
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
|
|
+ * untrained as soon as we exit the VM and are back to the
|
|
+ * kernel. This should be done before re-enabling interrupts
|
|
+ * because interrupt handlers won't sanitize 'ret' if the return is
|
|
+ * from the kernel.
|
|
+ */
|
|
+ UNTRAIN_RET
|
|
+
|
|
/*
|
|
* Clear all general purpose registers except RSP and RAX to prevent
|
|
* speculative use of the guest's values, even those that are reloaded
|
|
@@ -190,6 +199,15 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
|
|
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
|
|
#endif
|
|
|
|
+ /*
|
|
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
|
|
+ * untrained as soon as we exit the VM and are back to the
|
|
+ * kernel. This should be done before re-enabling interrupts
|
|
+ * because interrupt handlers won't sanitize RET if the return is
|
|
+ * from the kernel.
|
|
+ */
|
|
+ UNTRAIN_RET
|
|
+
|
|
pop %_ASM_BX
|
|
|
|
#ifdef CONFIG_X86_64
|
|
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
|
|
index 3f430e218375..c0e24826a86f 100644
|
|
--- a/arch/x86/kvm/vmx/capabilities.h
|
|
+++ b/arch/x86/kvm/vmx/capabilities.h
|
|
@@ -4,8 +4,8 @@
|
|
|
|
#include <asm/vmx.h>
|
|
|
|
-#include "lapic.h"
|
|
-#include "x86.h"
|
|
+#include "../lapic.h"
|
|
+#include "../x86.h"
|
|
|
|
extern bool __read_mostly enable_vpid;
|
|
extern bool __read_mostly flexpriority_enabled;
|
|
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
|
|
index ee7df31883cd..28ccf25c4124 100644
|
|
--- a/arch/x86/kvm/vmx/nested.c
|
|
+++ b/arch/x86/kvm/vmx/nested.c
|
|
@@ -3091,7 +3091,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
|
|
}
|
|
|
|
vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
|
|
- vmx->loaded_vmcs->launched);
|
|
+ __vmx_vcpu_run_flags(vmx));
|
|
|
|
if (vmx->msr_autoload.host.nr)
|
|
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
|
|
diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
|
|
new file mode 100644
|
|
index 000000000000..edc3f16cc189
|
|
--- /dev/null
|
|
+++ b/arch/x86/kvm/vmx/run_flags.h
|
|
@@ -0,0 +1,8 @@
|
|
+/* SPDX-License-Identifier: GPL-2.0 */
|
|
+#ifndef __KVM_X86_VMX_RUN_FLAGS_H
|
|
+#define __KVM_X86_VMX_RUN_FLAGS_H
|
|
+
|
|
+#define VMX_RUN_VMRESUME (1 << 0)
|
|
+#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
|
|
+
|
|
+#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
|
|
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
|
|
index 435c187927c4..4182c7ffc909 100644
|
|
--- a/arch/x86/kvm/vmx/vmenter.S
|
|
+++ b/arch/x86/kvm/vmx/vmenter.S
|
|
@@ -1,10 +1,13 @@
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#include <linux/linkage.h>
|
|
#include <asm/asm.h>
|
|
+#include <asm/asm-offsets.h>
|
|
#include <asm/bitsperlong.h>
|
|
#include <asm/kvm_vcpu_regs.h>
|
|
#include <asm/nospec-branch.h>
|
|
+#include <asm/percpu.h>
|
|
#include <asm/segment.h>
|
|
+#include "run_flags.h"
|
|
|
|
#define WORD_SIZE (BITS_PER_LONG / 8)
|
|
|
|
@@ -30,73 +33,12 @@
|
|
|
|
.section .noinstr.text, "ax"
|
|
|
|
-/**
|
|
- * vmx_vmenter - VM-Enter the current loaded VMCS
|
|
- *
|
|
- * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME
|
|
- *
|
|
- * Returns:
|
|
- * %RFLAGS.CF is set on VM-Fail Invalid
|
|
- * %RFLAGS.ZF is set on VM-Fail Valid
|
|
- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
|
|
- *
|
|
- * Note that VMRESUME/VMLAUNCH fall-through and return directly if
|
|
- * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
|
|
- * to vmx_vmexit.
|
|
- */
|
|
-SYM_FUNC_START_LOCAL(vmx_vmenter)
|
|
- /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
|
|
- je 2f
|
|
-
|
|
-1: vmresume
|
|
- RET
|
|
-
|
|
-2: vmlaunch
|
|
- RET
|
|
-
|
|
-3: cmpb $0, kvm_rebooting
|
|
- je 4f
|
|
- RET
|
|
-4: ud2
|
|
-
|
|
- _ASM_EXTABLE(1b, 3b)
|
|
- _ASM_EXTABLE(2b, 3b)
|
|
-
|
|
-SYM_FUNC_END(vmx_vmenter)
|
|
-
|
|
-/**
|
|
- * vmx_vmexit - Handle a VMX VM-Exit
|
|
- *
|
|
- * Returns:
|
|
- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
|
|
- *
|
|
- * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump
|
|
- * here after hardware loads the host's state, i.e. this is the destination
|
|
- * referred to by VMCS.HOST_RIP.
|
|
- */
|
|
-SYM_FUNC_START(vmx_vmexit)
|
|
-#ifdef CONFIG_RETPOLINE
|
|
- ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
|
|
- /* Preserve guest's RAX, it's used to stuff the RSB. */
|
|
- push %_ASM_AX
|
|
-
|
|
- /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
|
|
- FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
|
|
-
|
|
- /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */
|
|
- or $1, %_ASM_AX
|
|
-
|
|
- pop %_ASM_AX
|
|
-.Lvmexit_skip_rsb:
|
|
-#endif
|
|
- RET
|
|
-SYM_FUNC_END(vmx_vmexit)
|
|
-
|
|
/**
|
|
* __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
|
|
- * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
|
|
+ * @vmx: struct vcpu_vmx *
|
|
* @regs: unsigned long * (to guest registers)
|
|
- * @launched: %true if the VMCS has been launched
|
|
+ * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
|
|
+ * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl
|
|
*
|
|
* Returns:
|
|
* 0 on VM-Exit, 1 on VM-Fail
|
|
@@ -115,24 +57,56 @@ SYM_FUNC_START(__vmx_vcpu_run)
|
|
#endif
|
|
push %_ASM_BX
|
|
|
|
+ /* Save @vmx for SPEC_CTRL handling */
|
|
+ push %_ASM_ARG1
|
|
+
|
|
+ /* Save @flags for SPEC_CTRL handling */
|
|
+ push %_ASM_ARG3
|
|
+
|
|
/*
|
|
* Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
|
|
* @regs is needed after VM-Exit to save the guest's register values.
|
|
*/
|
|
push %_ASM_ARG2
|
|
|
|
- /* Copy @launched to BL, _ASM_ARG3 is volatile. */
|
|
+ /* Copy @flags to BL, _ASM_ARG3 is volatile. */
|
|
mov %_ASM_ARG3B, %bl
|
|
|
|
- /* Adjust RSP to account for the CALL to vmx_vmenter(). */
|
|
- lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2
|
|
+ lea (%_ASM_SP), %_ASM_ARG2
|
|
call vmx_update_host_rsp
|
|
|
|
+ ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL
|
|
+
|
|
+ /*
|
|
+ * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
|
|
+ * host's, write the MSR.
|
|
+ *
|
|
+ * IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
|
|
+ * there must not be any returns or indirect branches between this code
|
|
+ * and vmentry.
|
|
+ */
|
|
+ mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI
|
|
+ movl VMX_spec_ctrl(%_ASM_DI), %edi
|
|
+ movl PER_CPU_VAR(x86_spec_ctrl_current), %esi
|
|
+ cmp %edi, %esi
|
|
+ je .Lspec_ctrl_done
|
|
+ mov $MSR_IA32_SPEC_CTRL, %ecx
|
|
+ xor %edx, %edx
|
|
+ mov %edi, %eax
|
|
+ wrmsr
|
|
+
|
|
+.Lspec_ctrl_done:
|
|
+
|
|
+ /*
|
|
+ * Since vmentry is serializing on affected CPUs, there's no need for
|
|
+ * an LFENCE to stop speculation from skipping the wrmsr.
|
|
+ */
|
|
+
|
|
/* Load @regs to RAX. */
|
|
mov (%_ASM_SP), %_ASM_AX
|
|
|
|
/* Check if vmlaunch or vmresume is needed */
|
|
- testb %bl, %bl
|
|
+ testb $VMX_RUN_VMRESUME, %bl
|
|
|
|
/* Load guest registers. Don't clobber flags. */
|
|
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
|
|
@@ -154,11 +128,37 @@ SYM_FUNC_START(__vmx_vcpu_run)
|
|
/* Load guest RAX. This kills the @regs pointer! */
|
|
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
|
|
|
|
- /* Enter guest mode */
|
|
- call vmx_vmenter
|
|
+ /* Check EFLAGS.ZF from 'testb' above */
|
|
+ jz .Lvmlaunch
|
|
+
|
|
+ /*
|
|
+ * After a successful VMRESUME/VMLAUNCH, control flow "magically"
|
|
+ * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting.
|
|
+ * So this isn't a typical function and objtool needs to be told to
|
|
+ * save the unwind state here and restore it below.
|
|
+ */
|
|
+ UNWIND_HINT_SAVE
|
|
+
|
|
+/*
|
|
+ * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at
|
|
+ * the 'vmx_vmexit' label below.
|
|
+ */
|
|
+.Lvmresume:
|
|
+ vmresume
|
|
+ jmp .Lvmfail
|
|
+
|
|
+.Lvmlaunch:
|
|
+ vmlaunch
|
|
+ jmp .Lvmfail
|
|
|
|
- /* Jump on VM-Fail. */
|
|
- jbe 2f
|
|
+ _ASM_EXTABLE(.Lvmresume, .Lfixup)
|
|
+ _ASM_EXTABLE(.Lvmlaunch, .Lfixup)
|
|
+
|
|
+SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
|
|
+
|
|
+ /* Restore unwind state from before the VMRESUME/VMLAUNCH. */
|
|
+ UNWIND_HINT_RESTORE
|
|
+ ENDBR
|
|
|
|
/* Temporarily save guest's RAX. */
|
|
push %_ASM_AX
|
|
@@ -185,21 +185,23 @@ SYM_FUNC_START(__vmx_vcpu_run)
|
|
mov %r15, VCPU_R15(%_ASM_AX)
|
|
#endif
|
|
|
|
- /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
|
|
- xor %eax, %eax
|
|
+ /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */
|
|
+ xor %ebx, %ebx
|
|
|
|
+.Lclear_regs:
|
|
/*
|
|
- * Clear all general purpose registers except RSP and RAX to prevent
|
|
+ * Clear all general purpose registers except RSP and RBX to prevent
|
|
* speculative use of the guest's values, even those that are reloaded
|
|
* via the stack. In theory, an L1 cache miss when restoring registers
|
|
* could lead to speculative execution with the guest's values.
|
|
* Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
|
|
* free. RSP and RAX are exempt as RSP is restored by hardware during
|
|
- * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
|
|
+ * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
|
|
+ * value.
|
|
*/
|
|
-1: xor %ecx, %ecx
|
|
+ xor %eax, %eax
|
|
+ xor %ecx, %ecx
|
|
xor %edx, %edx
|
|
- xor %ebx, %ebx
|
|
xor %ebp, %ebp
|
|
xor %esi, %esi
|
|
xor %edi, %edi
|
|
@@ -216,8 +218,30 @@ SYM_FUNC_START(__vmx_vcpu_run)
|
|
|
|
/* "POP" @regs. */
|
|
add $WORD_SIZE, %_ASM_SP
|
|
- pop %_ASM_BX
|
|
|
|
+ /*
|
|
+ * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
|
|
+ * the first unbalanced RET after vmexit!
|
|
+ *
|
|
+ * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
|
|
+ * entries and (in some cases) RSB underflow.
|
|
+ *
|
|
+ * eIBRS has its own protection against poisoned RSB, so it doesn't
|
|
+ * need the RSB filling sequence. But it does need to be enabled
|
|
+ * before the first unbalanced RET.
|
|
+ */
|
|
+
|
|
+ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
|
|
+
|
|
+ pop %_ASM_ARG2 /* @flags */
|
|
+ pop %_ASM_ARG1 /* @vmx */
|
|
+
|
|
+ call vmx_spec_ctrl_restore_host
|
|
+
|
|
+ /* Put return value in AX */
|
|
+ mov %_ASM_BX, %_ASM_AX
|
|
+
|
|
+ pop %_ASM_BX
|
|
#ifdef CONFIG_X86_64
|
|
pop %r12
|
|
pop %r13
|
|
@@ -230,9 +254,15 @@ SYM_FUNC_START(__vmx_vcpu_run)
|
|
pop %_ASM_BP
|
|
RET
|
|
|
|
- /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */
|
|
-2: mov $1, %eax
|
|
- jmp 1b
|
|
+.Lfixup:
|
|
+ cmpb $0, kvm_rebooting
|
|
+ jne .Lvmfail
|
|
+ ud2
|
|
+.Lvmfail:
|
|
+ /* VM-Fail: set return value to 1 */
|
|
+ mov $1, %_ASM_BX
|
|
+ jmp .Lclear_regs
|
|
+
|
|
SYM_FUNC_END(__vmx_vcpu_run)
|
|
|
|
|
|
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
|
|
index 9646ae886b4b..4b6a0268c78e 100644
|
|
--- a/arch/x86/kvm/vmx/vmx.c
|
|
+++ b/arch/x86/kvm/vmx/vmx.c
|
|
@@ -383,9 +383,9 @@ static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
|
|
if (!vmx->disable_fb_clear)
|
|
return;
|
|
|
|
- rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
|
|
+ msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL);
|
|
msr |= FB_CLEAR_DIS;
|
|
- wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
|
|
+ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
|
|
/* Cache the MSR value to avoid reading it later */
|
|
vmx->msr_ia32_mcu_opt_ctrl = msr;
|
|
}
|
|
@@ -396,7 +396,7 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
|
|
return;
|
|
|
|
vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS;
|
|
- wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
|
|
+ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
|
|
}
|
|
|
|
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
|
|
@@ -839,6 +839,24 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
|
|
MSR_IA32_SPEC_CTRL);
|
|
}
|
|
|
|
+unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
|
|
+{
|
|
+ unsigned int flags = 0;
|
|
+
|
|
+ if (vmx->loaded_vmcs->launched)
|
|
+ flags |= VMX_RUN_VMRESUME;
|
|
+
|
|
+ /*
|
|
+ * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free
|
|
+ * to change it directly without causing a vmexit. In that case read
|
|
+ * it after vmexit and store it in vmx->spec_ctrl.
|
|
+ */
|
|
+ if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
|
|
+ flags |= VMX_RUN_SAVE_SPEC_CTRL;
|
|
+
|
|
+ return flags;
|
|
+}
|
|
+
|
|
static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
|
|
unsigned long entry, unsigned long exit)
|
|
{
|
|
@@ -6814,6 +6832,31 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
|
|
}
|
|
}
|
|
|
|
+void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
|
|
+ unsigned int flags)
|
|
+{
|
|
+ u64 hostval = this_cpu_read(x86_spec_ctrl_current);
|
|
+
|
|
+ if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
|
|
+ return;
|
|
+
|
|
+ if (flags & VMX_RUN_SAVE_SPEC_CTRL)
|
|
+ vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
|
|
+
|
|
+ /*
|
|
+ * If the guest/host SPEC_CTRL values differ, restore the host value.
|
|
+ *
|
|
+ * For legacy IBRS, the IBRS bit always needs to be written after
|
|
+ * transitioning from a less privileged predictor mode, regardless of
|
|
+ * whether the guest/host values differ.
|
|
+ */
|
|
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
|
|
+ vmx->spec_ctrl != hostval)
|
|
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
|
|
+
|
|
+ barrier_nospec();
|
|
+}
|
|
+
|
|
static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
|
|
{
|
|
switch (to_vmx(vcpu)->exit_reason.basic) {
|
|
@@ -6827,7 +6870,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
|
|
}
|
|
|
|
static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
|
|
- struct vcpu_vmx *vmx)
|
|
+ struct vcpu_vmx *vmx,
|
|
+ unsigned long flags)
|
|
{
|
|
guest_state_enter_irqoff();
|
|
|
|
@@ -6846,7 +6890,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
|
|
native_write_cr2(vcpu->arch.cr2);
|
|
|
|
vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
|
|
- vmx->loaded_vmcs->launched);
|
|
+ flags);
|
|
|
|
vcpu->arch.cr2 = native_read_cr2();
|
|
|
|
@@ -6945,36 +6989,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|
|
|
kvm_wait_lapic_expire(vcpu);
|
|
|
|
- /*
|
|
- * If this vCPU has touched SPEC_CTRL, restore the guest's value if
|
|
- * it's non-zero. Since vmentry is serialising on affected CPUs, there
|
|
- * is no need to worry about the conditional branch over the wrmsr
|
|
- * being speculatively taken.
|
|
- */
|
|
- x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
|
|
-
|
|
/* The actual VMENTER/EXIT is in the .noinstr.text section. */
|
|
- vmx_vcpu_enter_exit(vcpu, vmx);
|
|
-
|
|
- /*
|
|
- * We do not use IBRS in the kernel. If this vCPU has used the
|
|
- * SPEC_CTRL MSR it may have left it on; save the value and
|
|
- * turn it off. This is much more efficient than blindly adding
|
|
- * it to the atomic save/restore list. Especially as the former
|
|
- * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
|
|
- *
|
|
- * For non-nested case:
|
|
- * If the L01 MSR bitmap does not intercept the MSR, then we need to
|
|
- * save it.
|
|
- *
|
|
- * For nested case:
|
|
- * If the L02 MSR bitmap does not intercept the MSR, then we need to
|
|
- * save it.
|
|
- */
|
|
- if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
|
|
- vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
|
|
-
|
|
- x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
|
|
+ vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx));
|
|
|
|
/* All fields are clean at this point */
|
|
if (static_branch_unlikely(&enable_evmcs)) {
|
|
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
|
|
index 8d2342ede0c5..1e7f9453894b 100644
|
|
--- a/arch/x86/kvm/vmx/vmx.h
|
|
+++ b/arch/x86/kvm/vmx/vmx.h
|
|
@@ -8,11 +8,12 @@
|
|
#include <asm/intel_pt.h>
|
|
|
|
#include "capabilities.h"
|
|
-#include "kvm_cache_regs.h"
|
|
+#include "../kvm_cache_regs.h"
|
|
#include "posted_intr.h"
|
|
#include "vmcs.h"
|
|
#include "vmx_ops.h"
|
|
-#include "cpuid.h"
|
|
+#include "../cpuid.h"
|
|
+#include "run_flags.h"
|
|
|
|
#define MSR_TYPE_R 1
|
|
#define MSR_TYPE_W 2
|
|
@@ -404,7 +405,10 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
|
|
struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
|
|
void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
|
|
void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
|
|
-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
|
|
+void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags);
|
|
+unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
|
|
+bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
|
|
+ unsigned int flags);
|
|
int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
|
|
void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
|
|
|
|
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
|
|
index 5e7f41225780..5cfc49ddb1b4 100644
|
|
--- a/arch/x86/kvm/vmx/vmx_ops.h
|
|
+++ b/arch/x86/kvm/vmx/vmx_ops.h
|
|
@@ -8,7 +8,7 @@
|
|
|
|
#include "evmcs.h"
|
|
#include "vmcs.h"
|
|
-#include "x86.h"
|
|
+#include "../x86.h"
|
|
|
|
asmlinkage void vmread_error(unsigned long field, bool fault);
|
|
__attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field,
|
|
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
|
|
index 828f5cf1af45..53b6fdf30c99 100644
|
|
--- a/arch/x86/kvm/x86.c
|
|
+++ b/arch/x86/kvm/x86.c
|
|
@@ -12533,9 +12533,9 @@ void kvm_arch_end_assignment(struct kvm *kvm)
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
|
|
|
|
-bool kvm_arch_has_assigned_device(struct kvm *kvm)
|
|
+bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
|
|
{
|
|
- return atomic_read(&kvm->arch.assigned_device_count);
|
|
+ return arch_atomic_read(&kvm->arch.assigned_device_count);
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
|
|
|
|
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
|
|
index d83cba364e31..724bbf83eb5b 100644
|
|
--- a/arch/x86/lib/memmove_64.S
|
|
+++ b/arch/x86/lib/memmove_64.S
|
|
@@ -39,7 +39,7 @@ SYM_FUNC_START(__memmove)
|
|
/* FSRM implies ERMS => no length checks, do the copy directly */
|
|
.Lmemmove_begin_forward:
|
|
ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
|
|
- ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS
|
|
+ ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS
|
|
|
|
/*
|
|
* movsq instruction have many startup latency
|
|
@@ -205,6 +205,11 @@ SYM_FUNC_START(__memmove)
|
|
movb %r11b, (%rdi)
|
|
13:
|
|
RET
|
|
+
|
|
+.Lmemmove_erms:
|
|
+ movq %rdx, %rcx
|
|
+ rep movsb
|
|
+ RET
|
|
SYM_FUNC_END(__memmove)
|
|
EXPORT_SYMBOL(__memmove)
|
|
|
|
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
|
|
index b2b2366885a2..073289a55f84 100644
|
|
--- a/arch/x86/lib/retpoline.S
|
|
+++ b/arch/x86/lib/retpoline.S
|
|
@@ -33,9 +33,9 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
|
|
UNWIND_HINT_EMPTY
|
|
ANNOTATE_NOENDBR
|
|
|
|
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
|
|
- __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
|
|
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE
|
|
+ ALTERNATIVE_2 __stringify(RETPOLINE \reg), \
|
|
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \
|
|
+ __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE)
|
|
|
|
.endm
|
|
|
|
@@ -67,3 +67,76 @@ SYM_CODE_END(__x86_indirect_thunk_array)
|
|
#define GEN(reg) EXPORT_THUNK(reg)
|
|
#include <asm/GEN-for-each-reg.h>
|
|
#undef GEN
|
|
+
|
|
+/*
|
|
+ * This function name is magical and is used by -mfunction-return=thunk-extern
|
|
+ * for the compiler to generate JMPs to it.
|
|
+ */
|
|
+#ifdef CONFIG_RETHUNK
|
|
+
|
|
+ .section .text.__x86.return_thunk
|
|
+
|
|
+/*
|
|
+ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
|
|
+ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
|
|
+ * alignment within the BTB.
|
|
+ * 2) The instruction at zen_untrain_ret must contain, and not
|
|
+ * end with, the 0xc3 byte of the RET.
|
|
+ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
|
|
+ * from re-poisioning the BTB prediction.
|
|
+ */
|
|
+ .align 64
|
|
+ .skip 63, 0xcc
|
|
+SYM_FUNC_START_NOALIGN(zen_untrain_ret);
|
|
+
|
|
+ /*
|
|
+ * As executed from zen_untrain_ret, this is:
|
|
+ *
|
|
+ * TEST $0xcc, %bl
|
|
+ * LFENCE
|
|
+ * JMP __x86_return_thunk
|
|
+ *
|
|
+ * Executing the TEST instruction has a side effect of evicting any BTB
|
|
+ * prediction (potentially attacker controlled) attached to the RET, as
|
|
+ * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
|
|
+ */
|
|
+ .byte 0xf6
|
|
+
|
|
+ /*
|
|
+ * As executed from __x86_return_thunk, this is a plain RET.
|
|
+ *
|
|
+ * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
|
|
+ *
|
|
+ * We subsequently jump backwards and architecturally execute the RET.
|
|
+ * This creates a correct BTB prediction (type=ret), but in the
|
|
+ * meantime we suffer Straight Line Speculation (because the type was
|
|
+ * no branch) which is halted by the INT3.
|
|
+ *
|
|
+ * With SMT enabled and STIBP active, a sibling thread cannot poison
|
|
+ * RET's prediction to a type of its choice, but can evict the
|
|
+ * prediction due to competitive sharing. If the prediction is
|
|
+ * evicted, __x86_return_thunk will suffer Straight Line Speculation
|
|
+ * which will be contained safely by the INT3.
|
|
+ */
|
|
+SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
|
|
+ ret
|
|
+ int3
|
|
+SYM_CODE_END(__x86_return_thunk)
|
|
+
|
|
+ /*
|
|
+ * Ensure the TEST decoding / BTB invalidation is complete.
|
|
+ */
|
|
+ lfence
|
|
+
|
|
+ /*
|
|
+ * Jump back and execute the RET in the middle of the TEST instruction.
|
|
+ * INT3 is for SLS protection.
|
|
+ */
|
|
+ jmp __x86_return_thunk
|
|
+ int3
|
|
+SYM_FUNC_END(zen_untrain_ret)
|
|
+__EXPORT_THUNK(zen_untrain_ret)
|
|
+
|
|
+EXPORT_SYMBOL(__x86_return_thunk)
|
|
+
|
|
+#endif /* CONFIG_RETHUNK */
|
|
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
|
|
index 3d1dba05fce4..9de3d900bc92 100644
|
|
--- a/arch/x86/mm/mem_encrypt_boot.S
|
|
+++ b/arch/x86/mm/mem_encrypt_boot.S
|
|
@@ -65,7 +65,10 @@ SYM_FUNC_START(sme_encrypt_execute)
|
|
movq %rbp, %rsp /* Restore original stack pointer */
|
|
pop %rbp
|
|
|
|
- RET
|
|
+ /* Offset to __x86_return_thunk would be wrong here */
|
|
+ ANNOTATE_UNRET_SAFE
|
|
+ ret
|
|
+ int3
|
|
SYM_FUNC_END(sme_encrypt_execute)
|
|
|
|
SYM_FUNC_START(__enc_copy)
|
|
@@ -151,6 +154,9 @@ SYM_FUNC_START(__enc_copy)
|
|
pop %r12
|
|
pop %r15
|
|
|
|
- RET
|
|
+ /* Offset to __x86_return_thunk would be wrong here */
|
|
+ ANNOTATE_UNRET_SAFE
|
|
+ ret
|
|
+ int3
|
|
.L__enc_copy_end:
|
|
SYM_FUNC_END(__enc_copy)
|
|
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
|
|
index 4c71fa04e784..2dab2816b3f7 100644
|
|
--- a/arch/x86/net/bpf_jit_comp.c
|
|
+++ b/arch/x86/net/bpf_jit_comp.c
|
|
@@ -407,16 +407,30 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
|
|
{
|
|
u8 *prog = *pprog;
|
|
|
|
-#ifdef CONFIG_RETPOLINE
|
|
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
|
|
EMIT_LFENCE();
|
|
EMIT2(0xFF, 0xE0 + reg);
|
|
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
|
|
OPTIMIZER_HIDE_VAR(reg);
|
|
emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
|
|
- } else
|
|
-#endif
|
|
- EMIT2(0xFF, 0xE0 + reg);
|
|
+ } else {
|
|
+ EMIT2(0xFF, 0xE0 + reg);
|
|
+ }
|
|
+
|
|
+ *pprog = prog;
|
|
+}
|
|
+
|
|
+static void emit_return(u8 **pprog, u8 *ip)
|
|
+{
|
|
+ u8 *prog = *pprog;
|
|
+
|
|
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
|
|
+ emit_jump(&prog, &__x86_return_thunk, ip);
|
|
+ } else {
|
|
+ EMIT1(0xC3); /* ret */
|
|
+ if (IS_ENABLED(CONFIG_SLS))
|
|
+ EMIT1(0xCC); /* int3 */
|
|
+ }
|
|
|
|
*pprog = prog;
|
|
}
|
|
@@ -1681,7 +1695,7 @@ st: if (is_imm8(insn->off))
|
|
ctx->cleanup_addr = proglen;
|
|
pop_callee_regs(&prog, callee_regs_used);
|
|
EMIT1(0xC9); /* leave */
|
|
- EMIT1(0xC3); /* ret */
|
|
+ emit_return(&prog, image + addrs[i - 1] + (prog - temp));
|
|
break;
|
|
|
|
default:
|
|
@@ -2158,7 +2172,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
|
if (flags & BPF_TRAMP_F_SKIP_FRAME)
|
|
/* skip our return address and return to parent */
|
|
EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
|
|
- EMIT1(0xC3); /* ret */
|
|
+ emit_return(&prog, prog);
|
|
/* Make sure the trampoline generation logic doesn't overflow */
|
|
if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
|
|
ret = -EFAULT;
|
|
diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S
|
|
index 854dd81804b7..bc740a7c438c 100644
|
|
--- a/arch/x86/platform/efi/efi_thunk_64.S
|
|
+++ b/arch/x86/platform/efi/efi_thunk_64.S
|
|
@@ -23,6 +23,7 @@
|
|
#include <linux/objtool.h>
|
|
#include <asm/page_types.h>
|
|
#include <asm/segment.h>
|
|
+#include <asm/nospec-branch.h>
|
|
|
|
.text
|
|
.code64
|
|
@@ -75,7 +76,9 @@ STACK_FRAME_NON_STANDARD __efi64_thunk
|
|
1: movq 0x20(%rsp), %rsp
|
|
pop %rbx
|
|
pop %rbp
|
|
- RET
|
|
+ ANNOTATE_UNRET_SAFE
|
|
+ ret
|
|
+ int3
|
|
|
|
.code32
|
|
2: pushl $__KERNEL_CS
|
|
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
|
|
index 81aa46f770c5..cfa99e8f054b 100644
|
|
--- a/arch/x86/xen/setup.c
|
|
+++ b/arch/x86/xen/setup.c
|
|
@@ -918,7 +918,7 @@ void xen_enable_sysenter(void)
|
|
if (!boot_cpu_has(sysenter_feature))
|
|
return;
|
|
|
|
- ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
|
|
+ ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat);
|
|
if(ret != 0)
|
|
setup_clear_cpu_cap(sysenter_feature);
|
|
}
|
|
@@ -927,7 +927,7 @@ void xen_enable_syscall(void)
|
|
{
|
|
int ret;
|
|
|
|
- ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
|
|
+ ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64);
|
|
if (ret != 0) {
|
|
printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
|
|
/* Pretty fatal; 64-bit userspace has no other
|
|
@@ -936,7 +936,7 @@ void xen_enable_syscall(void)
|
|
|
|
if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
|
|
ret = register_callback(CALLBACKTYPE_syscall32,
|
|
- xen_syscall32_target);
|
|
+ xen_entry_SYSCALL_compat);
|
|
if (ret != 0)
|
|
setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
|
|
}
|
|
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
|
|
index caa9bc2fa100..6b4fdf6b9542 100644
|
|
--- a/arch/x86/xen/xen-asm.S
|
|
+++ b/arch/x86/xen/xen-asm.S
|
|
@@ -121,7 +121,7 @@ SYM_FUNC_END(xen_read_cr2_direct);
|
|
|
|
.macro xen_pv_trap name
|
|
SYM_CODE_START(xen_\name)
|
|
- UNWIND_HINT_EMPTY
|
|
+ UNWIND_HINT_ENTRY
|
|
ENDBR
|
|
pop %rcx
|
|
pop %r11
|
|
@@ -234,8 +234,8 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode)
|
|
*/
|
|
|
|
/* Normal 64-bit system call target */
|
|
-SYM_CODE_START(xen_syscall_target)
|
|
- UNWIND_HINT_EMPTY
|
|
+SYM_CODE_START(xen_entry_SYSCALL_64)
|
|
+ UNWIND_HINT_ENTRY
|
|
ENDBR
|
|
popq %rcx
|
|
popq %r11
|
|
@@ -249,13 +249,13 @@ SYM_CODE_START(xen_syscall_target)
|
|
movq $__USER_CS, 1*8(%rsp)
|
|
|
|
jmp entry_SYSCALL_64_after_hwframe
|
|
-SYM_CODE_END(xen_syscall_target)
|
|
+SYM_CODE_END(xen_entry_SYSCALL_64)
|
|
|
|
#ifdef CONFIG_IA32_EMULATION
|
|
|
|
/* 32-bit compat syscall target */
|
|
-SYM_CODE_START(xen_syscall32_target)
|
|
- UNWIND_HINT_EMPTY
|
|
+SYM_CODE_START(xen_entry_SYSCALL_compat)
|
|
+ UNWIND_HINT_ENTRY
|
|
ENDBR
|
|
popq %rcx
|
|
popq %r11
|
|
@@ -269,11 +269,11 @@ SYM_CODE_START(xen_syscall32_target)
|
|
movq $__USER32_CS, 1*8(%rsp)
|
|
|
|
jmp entry_SYSCALL_compat_after_hwframe
|
|
-SYM_CODE_END(xen_syscall32_target)
|
|
+SYM_CODE_END(xen_entry_SYSCALL_compat)
|
|
|
|
/* 32-bit compat sysenter target */
|
|
-SYM_CODE_START(xen_sysenter_target)
|
|
- UNWIND_HINT_EMPTY
|
|
+SYM_CODE_START(xen_entry_SYSENTER_compat)
|
|
+ UNWIND_HINT_ENTRY
|
|
ENDBR
|
|
/*
|
|
* NB: Xen is polite and clears TF from EFLAGS for us. This means
|
|
@@ -291,19 +291,19 @@ SYM_CODE_START(xen_sysenter_target)
|
|
movq $__USER32_CS, 1*8(%rsp)
|
|
|
|
jmp entry_SYSENTER_compat_after_hwframe
|
|
-SYM_CODE_END(xen_sysenter_target)
|
|
+SYM_CODE_END(xen_entry_SYSENTER_compat)
|
|
|
|
#else /* !CONFIG_IA32_EMULATION */
|
|
|
|
-SYM_CODE_START(xen_syscall32_target)
|
|
-SYM_CODE_START(xen_sysenter_target)
|
|
- UNWIND_HINT_EMPTY
|
|
+SYM_CODE_START(xen_entry_SYSCALL_compat)
|
|
+SYM_CODE_START(xen_entry_SYSENTER_compat)
|
|
+ UNWIND_HINT_ENTRY
|
|
ENDBR
|
|
lea 16(%rsp), %rsp /* strip %rcx, %r11 */
|
|
mov $-ENOSYS, %rax
|
|
pushq $0
|
|
jmp hypercall_iret
|
|
-SYM_CODE_END(xen_sysenter_target)
|
|
-SYM_CODE_END(xen_syscall32_target)
|
|
+SYM_CODE_END(xen_entry_SYSENTER_compat)
|
|
+SYM_CODE_END(xen_entry_SYSCALL_compat)
|
|
|
|
#endif /* CONFIG_IA32_EMULATION */
|
|
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
|
|
index 13af6fe453e3..ffaa62167f6e 100644
|
|
--- a/arch/x86/xen/xen-head.S
|
|
+++ b/arch/x86/xen/xen-head.S
|
|
@@ -26,6 +26,7 @@ SYM_CODE_START(hypercall_page)
|
|
.rept (PAGE_SIZE / 32)
|
|
UNWIND_HINT_FUNC
|
|
ANNOTATE_NOENDBR
|
|
+ ANNOTATE_UNRET_SAFE
|
|
ret
|
|
/*
|
|
* Xen will write the hypercall page, and sort out ENDBR.
|
|
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
|
|
index fd0fec6e92f4..9a8bb972193d 100644
|
|
--- a/arch/x86/xen/xen-ops.h
|
|
+++ b/arch/x86/xen/xen-ops.h
|
|
@@ -10,10 +10,10 @@
|
|
/* These are code, but not functions. Defined in entry.S */
|
|
extern const char xen_failsafe_callback[];
|
|
|
|
-void xen_sysenter_target(void);
|
|
+void xen_entry_SYSENTER_compat(void);
|
|
#ifdef CONFIG_X86_64
|
|
-void xen_syscall_target(void);
|
|
-void xen_syscall32_target(void);
|
|
+void xen_entry_SYSCALL_64(void);
|
|
+void xen_entry_SYSCALL_compat(void);
|
|
#endif
|
|
|
|
extern void *xen_initial_gdt;
|
|
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
|
|
index 6aef1ee5e1bd..8f146b1b4972 100644
|
|
--- a/drivers/acpi/apei/hest.c
|
|
+++ b/drivers/acpi/apei/hest.c
|
|
@@ -96,6 +96,14 @@ static int apei_hest_parse(apei_hest_func_t func, void *data)
|
|
if (hest_disable || !hest_tab)
|
|
return -EINVAL;
|
|
|
|
+#ifdef CONFIG_ARM64
|
|
+ /* Ignore broken firmware */
|
|
+ if (!strncmp(hest_tab->header.oem_id, "HPE ", 6) &&
|
|
+ !strncmp(hest_tab->header.oem_table_id, "ProLiant", 8) &&
|
|
+ MIDR_IMPLEMENTOR(read_cpuid_id()) == ARM_CPU_IMP_APM)
|
|
+ return -EINVAL;
|
|
+#endif
|
|
+
|
|
hest_hdr = (struct acpi_hest_header *)(hest_tab + 1);
|
|
for (i = 0; i < hest_tab->error_source_count; i++) {
|
|
len = hest_esrc_len(hest_hdr);
|
|
diff --git a/drivers/acpi/irq.c b/drivers/acpi/irq.c
|
|
index c68e694fca26..146cba5ae5bc 100644
|
|
--- a/drivers/acpi/irq.c
|
|
+++ b/drivers/acpi/irq.c
|
|
@@ -130,6 +130,7 @@ struct acpi_irq_parse_one_ctx {
|
|
unsigned int index;
|
|
unsigned long *res_flags;
|
|
struct irq_fwspec *fwspec;
|
|
+ bool skip_producer_check;
|
|
};
|
|
|
|
/**
|
|
@@ -201,7 +202,8 @@ static acpi_status acpi_irq_parse_one_cb(struct acpi_resource *ares,
|
|
return AE_CTRL_TERMINATE;
|
|
case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
|
|
eirq = &ares->data.extended_irq;
|
|
- if (eirq->producer_consumer == ACPI_PRODUCER)
|
|
+ if (!ctx->skip_producer_check &&
|
|
+ eirq->producer_consumer == ACPI_PRODUCER)
|
|
return AE_OK;
|
|
if (ctx->index >= eirq->interrupt_count) {
|
|
ctx->index -= eirq->interrupt_count;
|
|
@@ -236,8 +238,19 @@ static acpi_status acpi_irq_parse_one_cb(struct acpi_resource *ares,
|
|
static int acpi_irq_parse_one(acpi_handle handle, unsigned int index,
|
|
struct irq_fwspec *fwspec, unsigned long *flags)
|
|
{
|
|
- struct acpi_irq_parse_one_ctx ctx = { -EINVAL, index, flags, fwspec };
|
|
+ struct acpi_irq_parse_one_ctx ctx = { -EINVAL, index, flags, fwspec, false };
|
|
|
|
+ /*
|
|
+ * Firmware on arm64-based HPE m400 platform incorrectly marks
|
|
+ * its UART interrupt as ACPI_PRODUCER rather than ACPI_CONSUMER.
|
|
+ * Don't do the producer/consumer check for that device.
|
|
+ */
|
|
+ if (IS_ENABLED(CONFIG_ARM64)) {
|
|
+ struct acpi_device *adev = acpi_bus_get_acpi_device(handle);
|
|
+
|
|
+ if (adev && !strcmp(acpi_device_hid(adev), "APMC0D08"))
|
|
+ ctx.skip_producer_check = true;
|
|
+ }
|
|
acpi_walk_resources(handle, METHOD_NAME__CRS, acpi_irq_parse_one_cb, &ctx);
|
|
return ctx.rc;
|
|
}
|
|
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
|
|
index 762b61f67e6c..3581d4898dc4 100644
|
|
--- a/drivers/acpi/scan.c
|
|
+++ b/drivers/acpi/scan.c
|
|
@@ -1766,6 +1766,15 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device)
|
|
if (!acpi_match_device_ids(device, ignore_serial_bus_ids))
|
|
return false;
|
|
|
|
+ /*
|
|
+ * Firmware on some arm64 X-Gene platforms will make the UART
|
|
+ * device appear as both a UART and a slave of that UART. Just
|
|
+ * bail out here for X-Gene UARTs.
|
|
+ */
|
|
+ if (IS_ENABLED(CONFIG_ARM64) &&
|
|
+ !strcmp(acpi_device_hid(device), "APMC0D08"))
|
|
+ return false;
|
|
+
|
|
INIT_LIST_HEAD(&resource_list);
|
|
acpi_dev_get_resources(device, &resource_list,
|
|
acpi_check_serial_bus_slave,
|
|
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
|
|
index cf8c7fd59ada..28a8189be64f 100644
|
|
--- a/drivers/ata/libahci.c
|
|
+++ b/drivers/ata/libahci.c
|
|
@@ -690,6 +690,24 @@ int ahci_stop_engine(struct ata_port *ap)
|
|
tmp &= ~PORT_CMD_START;
|
|
writel(tmp, port_mmio + PORT_CMD);
|
|
|
|
+#ifdef CONFIG_ARM64
|
|
+ /* Rev Ax of Cavium CN99XX needs a hack for port stop */
|
|
+ if (dev_is_pci(ap->host->dev) &&
|
|
+ to_pci_dev(ap->host->dev)->vendor == 0x14e4 &&
|
|
+ to_pci_dev(ap->host->dev)->device == 0x9027 &&
|
|
+ midr_is_cpu_model_range(read_cpuid_id(),
|
|
+ MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN),
|
|
+ MIDR_CPU_VAR_REV(0, 0),
|
|
+ MIDR_CPU_VAR_REV(0, MIDR_REVISION_MASK))) {
|
|
+ tmp = readl(hpriv->mmio + 0x8000);
|
|
+ udelay(100);
|
|
+ writel(tmp | (1 << 26), hpriv->mmio + 0x8000);
|
|
+ udelay(100);
|
|
+ writel(tmp & ~(1 << 26), hpriv->mmio + 0x8000);
|
|
+ dev_warn(ap->host->dev, "CN99XX SATA reset workaround applied\n");
|
|
+ }
|
|
+#endif
|
|
+
|
|
/* wait for engine to stop. This could be as long as 500 msec */
|
|
tmp = ata_wait_register(ap, port_mmio + PORT_CMD,
|
|
PORT_CMD_LIST_ON, PORT_CMD_LIST_ON, 1, 500);
|
|
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
|
|
index a97776ea9d99..4c98849577d4 100644
|
|
--- a/drivers/base/cpu.c
|
|
+++ b/drivers/base/cpu.c
|
|
@@ -570,6 +570,12 @@ ssize_t __weak cpu_show_mmio_stale_data(struct device *dev,
|
|
return sysfs_emit(buf, "Not affected\n");
|
|
}
|
|
|
|
+ssize_t __weak cpu_show_retbleed(struct device *dev,
|
|
+ struct device_attribute *attr, char *buf)
|
|
+{
|
|
+ return sysfs_emit(buf, "Not affected\n");
|
|
+}
|
|
+
|
|
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
|
|
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
|
|
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
|
|
@@ -580,6 +586,7 @@ static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
|
|
static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
|
|
static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
|
|
static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
|
|
+static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
|
|
|
|
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
|
|
&dev_attr_meltdown.attr,
|
|
@@ -592,6 +599,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
|
|
&dev_attr_itlb_multihit.attr,
|
|
&dev_attr_srbds.attr,
|
|
&dev_attr_mmio_stale_data.attr,
|
|
+ &dev_attr_retbleed.attr,
|
|
NULL
|
|
};
|
|
|
|
diff --git a/drivers/char/ipmi/ipmi_dmi.c b/drivers/char/ipmi/ipmi_dmi.c
|
|
index bbf7029e224b..cf7faa970dd6 100644
|
|
--- a/drivers/char/ipmi/ipmi_dmi.c
|
|
+++ b/drivers/char/ipmi/ipmi_dmi.c
|
|
@@ -215,6 +215,21 @@ static int __init scan_for_dmi_ipmi(void)
|
|
{
|
|
const struct dmi_device *dev = NULL;
|
|
|
|
+#ifdef CONFIG_ARM64
|
|
+ /* RHEL-only
|
|
+ * If this is ARM-based HPE m400, return now, because that platform
|
|
+ * reports the host-side ipmi address as intel port-io space, which
|
|
+ * does not exist in the ARM architecture.
|
|
+ */
|
|
+ const char *dmistr = dmi_get_system_info(DMI_PRODUCT_NAME);
|
|
+
|
|
+ if (dmistr && (strcmp("ProLiant m400 Server", dmistr) == 0)) {
|
|
+ pr_debug("%s does not support host ipmi\n", dmistr);
|
|
+ return 0;
|
|
+ }
|
|
+ /* END RHEL-only */
|
|
+#endif
|
|
+
|
|
while ((dev = dmi_find_device(DMI_DEV_TYPE_IPMI, NULL, dev)))
|
|
dmi_decode_ipmi((const struct dmi_header *) dev->device_data);
|
|
|
|
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
|
|
index 2610e809c802..4f04a6bcf2e9 100644
|
|
--- a/drivers/char/ipmi/ipmi_msghandler.c
|
|
+++ b/drivers/char/ipmi/ipmi_msghandler.c
|
|
@@ -35,6 +35,7 @@
|
|
#include <linux/uuid.h>
|
|
#include <linux/nospec.h>
|
|
#include <linux/vmalloc.h>
|
|
+#include <linux/dmi.h>
|
|
#include <linux/delay.h>
|
|
|
|
#define IPMI_DRIVER_VERSION "39.2"
|
|
@@ -5427,8 +5428,21 @@ static int __init ipmi_init_msghandler_mod(void)
|
|
{
|
|
int rv;
|
|
|
|
- pr_info("version " IPMI_DRIVER_VERSION "\n");
|
|
+#ifdef CONFIG_ARM64
|
|
+ /* RHEL-only
|
|
+ * If this is ARM-based HPE m400, return now, because that platform
|
|
+ * reports the host-side ipmi address as intel port-io space, which
|
|
+ * does not exist in the ARM architecture.
|
|
+ */
|
|
+ const char *dmistr = dmi_get_system_info(DMI_PRODUCT_NAME);
|
|
|
|
+ if (dmistr && (strcmp("ProLiant m400 Server", dmistr) == 0)) {
|
|
+ pr_debug("%s does not support host ipmi\n", dmistr);
|
|
+ return -ENOSYS;
|
|
+ }
|
|
+ /* END RHEL-only */
|
|
+#endif
|
|
+ pr_info("version " IPMI_DRIVER_VERSION "\n");
|
|
mutex_lock(&ipmi_interfaces_mutex);
|
|
rv = ipmi_register_driver();
|
|
mutex_unlock(&ipmi_interfaces_mutex);
|
|
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
|
|
index 243882f5e5f9..4a0fa81a563d 100644
|
|
--- a/drivers/firmware/efi/Kconfig
|
|
+++ b/drivers/firmware/efi/Kconfig
|
|
@@ -91,6 +91,18 @@ config EFI_SOFT_RESERVE
|
|
|
|
If unsure, say Y.
|
|
|
|
+config EFI_DXE_MEM_ATTRIBUTES
|
|
+ bool "Adjust memory attributes in EFISTUB"
|
|
+ depends on EFI && EFI_STUB && X86
|
|
+ default y
|
|
+ help
|
|
+ UEFI specification does not guarantee all memory to be
|
|
+ accessible for both write and execute as the kernel expects
|
|
+ it to be.
|
|
+ Use DXE services to check and alter memory protection
|
|
+ attributes during boot via EFISTUB to ensure that memory
|
|
+ ranges used by the kernel are writable and executable.
|
|
+
|
|
config EFI_PARAMS_FROM_FDT
|
|
bool
|
|
help
|
|
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
|
|
index c02ff25dd477..d860f8eb9a81 100644
|
|
--- a/drivers/firmware/efi/Makefile
|
|
+++ b/drivers/firmware/efi/Makefile
|
|
@@ -28,6 +28,7 @@ obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_map.o
|
|
obj-$(CONFIG_EFI_BOOTLOADER_CONTROL) += efibc.o
|
|
obj-$(CONFIG_EFI_TEST) += test/
|
|
obj-$(CONFIG_EFI_DEV_PATH_PARSER) += dev-path-parser.o
|
|
+obj-$(CONFIG_EFI) += secureboot.o
|
|
obj-$(CONFIG_APPLE_PROPERTIES) += apple-properties.o
|
|
obj-$(CONFIG_EFI_RCI2_TABLE) += rci2-table.o
|
|
obj-$(CONFIG_EFI_EMBEDDED_FIRMWARE) += embedded-firmware.o
|
|
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
|
|
index ff57db8f8d05..d7dfde3260bf 100644
|
|
--- a/drivers/firmware/efi/efi.c
|
|
+++ b/drivers/firmware/efi/efi.c
|
|
@@ -31,6 +31,7 @@
|
|
#include <linux/ucs2_string.h>
|
|
#include <linux/memblock.h>
|
|
#include <linux/security.h>
|
|
+#include <linux/bsearch.h>
|
|
|
|
#include <asm/early_ioremap.h>
|
|
|
|
@@ -851,40 +852,101 @@ int efi_mem_type(unsigned long phys_addr)
|
|
}
|
|
#endif
|
|
|
|
+struct efi_error_code {
|
|
+ efi_status_t status;
|
|
+ int errno;
|
|
+ const char *description;
|
|
+};
|
|
+
|
|
+static const struct efi_error_code efi_error_codes[] = {
|
|
+ { EFI_SUCCESS, 0, "Success"},
|
|
+#if 0
|
|
+ { EFI_LOAD_ERROR, -EPICK_AN_ERRNO, "Load Error"},
|
|
+#endif
|
|
+ { EFI_INVALID_PARAMETER, -EINVAL, "Invalid Parameter"},
|
|
+ { EFI_UNSUPPORTED, -ENOSYS, "Unsupported"},
|
|
+ { EFI_BAD_BUFFER_SIZE, -ENOSPC, "Bad Buffer Size"},
|
|
+ { EFI_BUFFER_TOO_SMALL, -ENOSPC, "Buffer Too Small"},
|
|
+ { EFI_NOT_READY, -EAGAIN, "Not Ready"},
|
|
+ { EFI_DEVICE_ERROR, -EIO, "Device Error"},
|
|
+ { EFI_WRITE_PROTECTED, -EROFS, "Write Protected"},
|
|
+ { EFI_OUT_OF_RESOURCES, -ENOMEM, "Out of Resources"},
|
|
+#if 0
|
|
+ { EFI_VOLUME_CORRUPTED, -EPICK_AN_ERRNO, "Volume Corrupt"},
|
|
+ { EFI_VOLUME_FULL, -EPICK_AN_ERRNO, "Volume Full"},
|
|
+ { EFI_NO_MEDIA, -EPICK_AN_ERRNO, "No Media"},
|
|
+ { EFI_MEDIA_CHANGED, -EPICK_AN_ERRNO, "Media changed"},
|
|
+#endif
|
|
+ { EFI_NOT_FOUND, -ENOENT, "Not Found"},
|
|
+#if 0
|
|
+ { EFI_ACCESS_DENIED, -EPICK_AN_ERRNO, "Access Denied"},
|
|
+ { EFI_NO_RESPONSE, -EPICK_AN_ERRNO, "No Response"},
|
|
+ { EFI_NO_MAPPING, -EPICK_AN_ERRNO, "No mapping"},
|
|
+ { EFI_TIMEOUT, -EPICK_AN_ERRNO, "Time out"},
|
|
+ { EFI_NOT_STARTED, -EPICK_AN_ERRNO, "Not started"},
|
|
+ { EFI_ALREADY_STARTED, -EPICK_AN_ERRNO, "Already started"},
|
|
+#endif
|
|
+ { EFI_ABORTED, -EINTR, "Aborted"},
|
|
+#if 0
|
|
+ { EFI_ICMP_ERROR, -EPICK_AN_ERRNO, "ICMP Error"},
|
|
+ { EFI_TFTP_ERROR, -EPICK_AN_ERRNO, "TFTP Error"},
|
|
+ { EFI_PROTOCOL_ERROR, -EPICK_AN_ERRNO, "Protocol Error"},
|
|
+ { EFI_INCOMPATIBLE_VERSION, -EPICK_AN_ERRNO, "Incompatible Version"},
|
|
+#endif
|
|
+ { EFI_SECURITY_VIOLATION, -EACCES, "Security Policy Violation"},
|
|
+#if 0
|
|
+ { EFI_CRC_ERROR, -EPICK_AN_ERRNO, "CRC Error"},
|
|
+ { EFI_END_OF_MEDIA, -EPICK_AN_ERRNO, "End of Media"},
|
|
+ { EFI_END_OF_FILE, -EPICK_AN_ERRNO, "End of File"},
|
|
+ { EFI_INVALID_LANGUAGE, -EPICK_AN_ERRNO, "Invalid Languages"},
|
|
+ { EFI_COMPROMISED_DATA, -EPICK_AN_ERRNO, "Compromised Data"},
|
|
+
|
|
+ // warnings
|
|
+ { EFI_WARN_UNKOWN_GLYPH, -EPICK_AN_ERRNO, "Warning Unknown Glyph"},
|
|
+ { EFI_WARN_DELETE_FAILURE, -EPICK_AN_ERRNO, "Warning Delete Failure"},
|
|
+ { EFI_WARN_WRITE_FAILURE, -EPICK_AN_ERRNO, "Warning Write Failure"},
|
|
+ { EFI_WARN_BUFFER_TOO_SMALL, -EPICK_AN_ERRNO, "Warning Buffer Too Small"},
|
|
+#endif
|
|
+};
|
|
+
|
|
+static int
|
|
+efi_status_cmp_bsearch(const void *key, const void *item)
|
|
+{
|
|
+ u64 status = (u64)(uintptr_t)key;
|
|
+ struct efi_error_code *code = (struct efi_error_code *)item;
|
|
+
|
|
+ if (status < code->status)
|
|
+ return -1;
|
|
+ if (status > code->status)
|
|
+ return 1;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
int efi_status_to_err(efi_status_t status)
|
|
{
|
|
- int err;
|
|
-
|
|
- switch (status) {
|
|
- case EFI_SUCCESS:
|
|
- err = 0;
|
|
- break;
|
|
- case EFI_INVALID_PARAMETER:
|
|
- err = -EINVAL;
|
|
- break;
|
|
- case EFI_OUT_OF_RESOURCES:
|
|
- err = -ENOSPC;
|
|
- break;
|
|
- case EFI_DEVICE_ERROR:
|
|
- err = -EIO;
|
|
- break;
|
|
- case EFI_WRITE_PROTECTED:
|
|
- err = -EROFS;
|
|
- break;
|
|
- case EFI_SECURITY_VIOLATION:
|
|
- err = -EACCES;
|
|
- break;
|
|
- case EFI_NOT_FOUND:
|
|
- err = -ENOENT;
|
|
- break;
|
|
- case EFI_ABORTED:
|
|
- err = -EINTR;
|
|
- break;
|
|
- default:
|
|
- err = -EINVAL;
|
|
- }
|
|
+ struct efi_error_code *found;
|
|
+ size_t num = sizeof(efi_error_codes) / sizeof(struct efi_error_code);
|
|
|
|
- return err;
|
|
+ found = bsearch((void *)(uintptr_t)status, efi_error_codes,
|
|
+ sizeof(struct efi_error_code), num,
|
|
+ efi_status_cmp_bsearch);
|
|
+ if (!found)
|
|
+ return -EINVAL;
|
|
+ return found->errno;
|
|
+}
|
|
+
|
|
+const char *
|
|
+efi_status_to_str(efi_status_t status)
|
|
+{
|
|
+ struct efi_error_code *found;
|
|
+ size_t num = sizeof(efi_error_codes) / sizeof(struct efi_error_code);
|
|
+
|
|
+ found = bsearch((void *)(uintptr_t)status, efi_error_codes,
|
|
+ sizeof(struct efi_error_code), num,
|
|
+ efi_status_cmp_bsearch);
|
|
+ if (!found)
|
|
+ return "Unknown error code";
|
|
+ return found->description;
|
|
}
|
|
|
|
static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock);
|
|
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
|
|
index edb77b0621ea..2dc24776899a 100644
|
|
--- a/drivers/firmware/efi/libstub/efistub.h
|
|
+++ b/drivers/firmware/efi/libstub/efistub.h
|
|
@@ -36,6 +36,9 @@ extern bool efi_novamap;
|
|
|
|
extern const efi_system_table_t *efi_system_table;
|
|
|
|
+typedef union efi_dxe_services_table efi_dxe_services_table_t;
|
|
+extern const efi_dxe_services_table_t *efi_dxe_table;
|
|
+
|
|
efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
|
|
efi_system_table_t *sys_table_arg);
|
|
|
|
@@ -44,6 +47,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
|
|
#define efi_is_native() (true)
|
|
#define efi_bs_call(func, ...) efi_system_table->boottime->func(__VA_ARGS__)
|
|
#define efi_rt_call(func, ...) efi_system_table->runtime->func(__VA_ARGS__)
|
|
+#define efi_dxe_call(func, ...) efi_dxe_table->func(__VA_ARGS__)
|
|
#define efi_table_attr(inst, attr) (inst->attr)
|
|
#define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__)
|
|
|
|
@@ -329,6 +333,76 @@ union efi_boot_services {
|
|
} mixed_mode;
|
|
};
|
|
|
|
+typedef enum {
|
|
+ EfiGcdMemoryTypeNonExistent,
|
|
+ EfiGcdMemoryTypeReserved,
|
|
+ EfiGcdMemoryTypeSystemMemory,
|
|
+ EfiGcdMemoryTypeMemoryMappedIo,
|
|
+ EfiGcdMemoryTypePersistent,
|
|
+ EfiGcdMemoryTypeMoreReliable,
|
|
+ EfiGcdMemoryTypeMaximum
|
|
+} efi_gcd_memory_type_t;
|
|
+
|
|
+typedef struct {
|
|
+ efi_physical_addr_t base_address;
|
|
+ u64 length;
|
|
+ u64 capabilities;
|
|
+ u64 attributes;
|
|
+ efi_gcd_memory_type_t gcd_memory_type;
|
|
+ void *image_handle;
|
|
+ void *device_handle;
|
|
+} efi_gcd_memory_space_desc_t;
|
|
+
|
|
+/*
|
|
+ * EFI DXE Services table
|
|
+ */
|
|
+union efi_dxe_services_table {
|
|
+ struct {
|
|
+ efi_table_hdr_t hdr;
|
|
+ void *add_memory_space;
|
|
+ void *allocate_memory_space;
|
|
+ void *free_memory_space;
|
|
+ void *remove_memory_space;
|
|
+ efi_status_t (__efiapi *get_memory_space_descriptor)(efi_physical_addr_t,
|
|
+ efi_gcd_memory_space_desc_t *);
|
|
+ efi_status_t (__efiapi *set_memory_space_attributes)(efi_physical_addr_t,
|
|
+ u64, u64);
|
|
+ void *get_memory_space_map;
|
|
+ void *add_io_space;
|
|
+ void *allocate_io_space;
|
|
+ void *free_io_space;
|
|
+ void *remove_io_space;
|
|
+ void *get_io_space_descriptor;
|
|
+ void *get_io_space_map;
|
|
+ void *dispatch;
|
|
+ void *schedule;
|
|
+ void *trust;
|
|
+ void *process_firmware_volume;
|
|
+ void *set_memory_space_capabilities;
|
|
+ };
|
|
+ struct {
|
|
+ efi_table_hdr_t hdr;
|
|
+ u32 add_memory_space;
|
|
+ u32 allocate_memory_space;
|
|
+ u32 free_memory_space;
|
|
+ u32 remove_memory_space;
|
|
+ u32 get_memory_space_descriptor;
|
|
+ u32 set_memory_space_attributes;
|
|
+ u32 get_memory_space_map;
|
|
+ u32 add_io_space;
|
|
+ u32 allocate_io_space;
|
|
+ u32 free_io_space;
|
|
+ u32 remove_io_space;
|
|
+ u32 get_io_space_descriptor;
|
|
+ u32 get_io_space_map;
|
|
+ u32 dispatch;
|
|
+ u32 schedule;
|
|
+ u32 trust;
|
|
+ u32 process_firmware_volume;
|
|
+ u32 set_memory_space_capabilities;
|
|
+ } mixed_mode;
|
|
+};
|
|
+
|
|
typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t;
|
|
|
|
union efi_uga_draw_protocol {
|
|
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
|
|
index 01ddd4502e28..b14e88ccefca 100644
|
|
--- a/drivers/firmware/efi/libstub/x86-stub.c
|
|
+++ b/drivers/firmware/efi/libstub/x86-stub.c
|
|
@@ -22,6 +22,7 @@
|
|
#define MAXMEM_X86_64_4LEVEL (1ull << 46)
|
|
|
|
const efi_system_table_t *efi_system_table;
|
|
+const efi_dxe_services_table_t *efi_dxe_table;
|
|
extern u32 image_offset;
|
|
static efi_loaded_image_t *image = NULL;
|
|
|
|
@@ -211,9 +212,110 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
|
|
}
|
|
}
|
|
|
|
+static void
|
|
+adjust_memory_range_protection(unsigned long start, unsigned long size)
|
|
+{
|
|
+ efi_status_t status;
|
|
+ efi_gcd_memory_space_desc_t desc;
|
|
+ unsigned long end, next;
|
|
+ unsigned long rounded_start, rounded_end;
|
|
+ unsigned long unprotect_start, unprotect_size;
|
|
+ int has_system_memory = 0;
|
|
+
|
|
+ if (efi_dxe_table == NULL)
|
|
+ return;
|
|
+
|
|
+ rounded_start = rounddown(start, EFI_PAGE_SIZE);
|
|
+ rounded_end = roundup(start + size, EFI_PAGE_SIZE);
|
|
+
|
|
+ /*
|
|
+ * Don't modify memory region attributes, they are
|
|
+ * already suitable, to lower the possibility to
|
|
+ * encounter firmware bugs.
|
|
+ */
|
|
+
|
|
+ for (end = start + size; start < end; start = next) {
|
|
+
|
|
+ status = efi_dxe_call(get_memory_space_descriptor, start, &desc);
|
|
+
|
|
+ if (status != EFI_SUCCESS)
|
|
+ return;
|
|
+
|
|
+ next = desc.base_address + desc.length;
|
|
+
|
|
+ /*
|
|
+ * Only system memory is suitable for trampoline/kernel image placement,
|
|
+ * so only this type of memory needs its attributes to be modified.
|
|
+ */
|
|
+
|
|
+ if (desc.gcd_memory_type != EfiGcdMemoryTypeSystemMemory ||
|
|
+ (desc.attributes & (EFI_MEMORY_RO | EFI_MEMORY_XP)) == 0)
|
|
+ continue;
|
|
+
|
|
+ unprotect_start = max(rounded_start, (unsigned long)desc.base_address);
|
|
+ unprotect_size = min(rounded_end, next) - unprotect_start;
|
|
+
|
|
+ status = efi_dxe_call(set_memory_space_attributes,
|
|
+ unprotect_start, unprotect_size,
|
|
+ EFI_MEMORY_WB);
|
|
+
|
|
+ if (status != EFI_SUCCESS) {
|
|
+ efi_warn("Unable to unprotect memory range [%08lx,%08lx]: %d\n",
|
|
+ unprotect_start,
|
|
+ unprotect_start + unprotect_size,
|
|
+ (int)status);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Trampoline takes 2 pages and can be loaded in first megabyte of memory
|
|
+ * with its end placed between 128k and 640k where BIOS might start.
|
|
+ * (see arch/x86/boot/compressed/pgtable_64.c)
|
|
+ *
|
|
+ * We cannot find exact trampoline placement since memory map
|
|
+ * can be modified by UEFI, and it can alter the computed address.
|
|
+ */
|
|
+
|
|
+#define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024)
|
|
+#define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024)
|
|
+
|
|
+void startup_32(struct boot_params *boot_params);
|
|
+
|
|
+static void
|
|
+setup_memory_protection(unsigned long image_base, unsigned long image_size)
|
|
+{
|
|
+ /*
|
|
+ * Allow execution of possible trampoline used
|
|
+ * for switching between 4- and 5-level page tables
|
|
+ * and relocated kernel image.
|
|
+ */
|
|
+
|
|
+ adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE,
|
|
+ TRAMPOLINE_PLACEMENT_SIZE);
|
|
+
|
|
+#ifdef CONFIG_64BIT
|
|
+ if (image_base != (unsigned long)startup_32)
|
|
+ adjust_memory_range_protection(image_base, image_size);
|
|
+#else
|
|
+ /*
|
|
+ * Clear protection flags on a whole range of possible
|
|
+ * addresses used for KASLR. We don't need to do that
|
|
+ * on x86_64, since KASLR/extraction is performed after
|
|
+ * dedicated identity page tables are built and we only
|
|
+ * need to remove possible protection on relocated image
|
|
+ * itself disregarding further relocations.
|
|
+ */
|
|
+ adjust_memory_range_protection(LOAD_PHYSICAL_ADDR,
|
|
+ KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR);
|
|
+#endif
|
|
+}
|
|
+
|
|
static const efi_char16_t apple[] = L"Apple";
|
|
|
|
-static void setup_quirks(struct boot_params *boot_params)
|
|
+static void setup_quirks(struct boot_params *boot_params,
|
|
+ unsigned long image_base,
|
|
+ unsigned long image_size)
|
|
{
|
|
efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
|
|
efi_table_attr(efi_system_table, fw_vendor);
|
|
@@ -222,6 +324,9 @@ static void setup_quirks(struct boot_params *boot_params)
|
|
if (IS_ENABLED(CONFIG_APPLE_PROPERTIES))
|
|
retrieve_apple_device_properties(boot_params);
|
|
}
|
|
+
|
|
+ if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES))
|
|
+ setup_memory_protection(image_base, image_size);
|
|
}
|
|
|
|
/*
|
|
@@ -341,8 +446,6 @@ static void __noreturn efi_exit(efi_handle_t handle, efi_status_t status)
|
|
asm("hlt");
|
|
}
|
|
|
|
-void startup_32(struct boot_params *boot_params);
|
|
-
|
|
void __noreturn efi_stub_entry(efi_handle_t handle,
|
|
efi_system_table_t *sys_table_arg,
|
|
struct boot_params *boot_params);
|
|
@@ -677,11 +780,17 @@ unsigned long efi_main(efi_handle_t handle,
|
|
efi_status_t status;
|
|
|
|
efi_system_table = sys_table_arg;
|
|
-
|
|
/* Check if we were booted by the EFI firmware */
|
|
if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
|
|
efi_exit(handle, EFI_INVALID_PARAMETER);
|
|
|
|
+ efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID);
|
|
+ if (efi_dxe_table &&
|
|
+ efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) {
|
|
+ efi_warn("Ignoring DXE services table: invalid signature\n");
|
|
+ efi_dxe_table = NULL;
|
|
+ }
|
|
+
|
|
/*
|
|
* If the kernel isn't already loaded at a suitable address,
|
|
* relocate it.
|
|
@@ -791,7 +900,7 @@ unsigned long efi_main(efi_handle_t handle,
|
|
|
|
setup_efi_pci(boot_params);
|
|
|
|
- setup_quirks(boot_params);
|
|
+ setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start);
|
|
|
|
status = exit_boot(boot_params, handle);
|
|
if (status != EFI_SUCCESS) {
|
|
diff --git a/drivers/firmware/efi/secureboot.c b/drivers/firmware/efi/secureboot.c
|
|
new file mode 100644
|
|
index 000000000000..de0a3714a5d4
|
|
--- /dev/null
|
|
+++ b/drivers/firmware/efi/secureboot.c
|
|
@@ -0,0 +1,38 @@
|
|
+/* Core kernel secure boot support.
|
|
+ *
|
|
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
|
|
+ * Written by David Howells (dhowells@redhat.com)
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public Licence
|
|
+ * as published by the Free Software Foundation; either version
|
|
+ * 2 of the Licence, or (at your option) any later version.
|
|
+ */
|
|
+
|
|
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
+
|
|
+#include <linux/efi.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/printk.h>
|
|
+
|
|
+/*
|
|
+ * Decide what to do when UEFI secure boot mode is enabled.
|
|
+ */
|
|
+void __init efi_set_secure_boot(enum efi_secureboot_mode mode)
|
|
+{
|
|
+ if (efi_enabled(EFI_BOOT)) {
|
|
+ switch (mode) {
|
|
+ case efi_secureboot_mode_disabled:
|
|
+ pr_info("Secure boot disabled\n");
|
|
+ break;
|
|
+ case efi_secureboot_mode_enabled:
|
|
+ set_bit(EFI_SECURE_BOOT, &efi.flags);
|
|
+ pr_info("Secure boot enabled\n");
|
|
+ break;
|
|
+ default:
|
|
+ pr_warn("Secure boot could not be determined (mode %u)\n",
|
|
+ mode);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+}
|
|
diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c
|
|
index 1f276f108cc9..7039ad9bdf7f 100644
|
|
--- a/drivers/firmware/sysfb.c
|
|
+++ b/drivers/firmware/sysfb.c
|
|
@@ -34,6 +34,22 @@
|
|
#include <linux/screen_info.h>
|
|
#include <linux/sysfb.h>
|
|
|
|
+static int skip_simpledrm;
|
|
+
|
|
+static int __init simpledrm_disable(char *opt)
|
|
+{
|
|
+ if (!opt)
|
|
+ return -EINVAL;
|
|
+
|
|
+ get_option(&opt, &skip_simpledrm);
|
|
+
|
|
+ if (skip_simpledrm)
|
|
+ pr_info("The simpledrm driver will not be probed\n");
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+early_param("nvidia-drm.modeset", simpledrm_disable);
|
|
+
|
|
static struct platform_device *pd;
|
|
static DEFINE_MUTEX(disable_lock);
|
|
static bool disabled;
|
|
@@ -83,7 +99,7 @@ static __init int sysfb_init(void)
|
|
|
|
/* try to create a simple-framebuffer device */
|
|
compatible = sysfb_parse_mode(si, &mode);
|
|
- if (compatible) {
|
|
+ if (compatible && !skip_simpledrm) {
|
|
pd = sysfb_create_simplefb(si, &mode);
|
|
if (!IS_ERR(pd))
|
|
goto unlock_mutex;
|
|
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
|
|
index 6c9e6e7f0afd..f0ff2f1f5fcb 100644
|
|
--- a/drivers/gpu/drm/i915/display/intel_psr.c
|
|
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
|
|
@@ -744,6 +744,15 @@ static bool intel_psr2_sel_fetch_config_valid(struct intel_dp *intel_dp,
|
|
return false;
|
|
}
|
|
|
|
+ /* Temporary workaround for Lenovo's issues with the X1 Carbon 10th Gen
|
|
+ * See: https://bugzilla.redhat.com/show_bug.cgi?id=2065794
|
|
+ */
|
|
+ if (IS_ALDERLAKE_P(dev_priv)) {
|
|
+ drm_dbg_kms(&dev_priv->drm,
|
|
+ "PSR2 sel fetch disabled on ADL-P, see rhbz2065794\n");
|
|
+ return false;
|
|
+ }
|
|
+
|
|
return crtc_state->enable_psr2_sel_fetch = true;
|
|
}
|
|
|
|
diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c
|
|
index 311eee599ce9..2460c6bd46f8 100644
|
|
--- a/drivers/hid/hid-rmi.c
|
|
+++ b/drivers/hid/hid-rmi.c
|
|
@@ -322,19 +322,12 @@ static int rmi_input_event(struct hid_device *hdev, u8 *data, int size)
|
|
{
|
|
struct rmi_data *hdata = hid_get_drvdata(hdev);
|
|
struct rmi_device *rmi_dev = hdata->xport.rmi_dev;
|
|
- unsigned long flags;
|
|
|
|
if (!(test_bit(RMI_STARTED, &hdata->flags)))
|
|
return 0;
|
|
|
|
- local_irq_save(flags);
|
|
-
|
|
rmi_set_attn_data(rmi_dev, data[1], &data[2], size - 2);
|
|
|
|
- generic_handle_irq(hdata->rmi_irq);
|
|
-
|
|
- local_irq_restore(flags);
|
|
-
|
|
return 1;
|
|
}
|
|
|
|
@@ -591,56 +584,6 @@ static const struct rmi_transport_ops hid_rmi_ops = {
|
|
.reset = rmi_hid_reset,
|
|
};
|
|
|
|
-static void rmi_irq_teardown(void *data)
|
|
-{
|
|
- struct rmi_data *hdata = data;
|
|
- struct irq_domain *domain = hdata->domain;
|
|
-
|
|
- if (!domain)
|
|
- return;
|
|
-
|
|
- irq_dispose_mapping(irq_find_mapping(domain, 0));
|
|
-
|
|
- irq_domain_remove(domain);
|
|
- hdata->domain = NULL;
|
|
- hdata->rmi_irq = 0;
|
|
-}
|
|
-
|
|
-static int rmi_irq_map(struct irq_domain *h, unsigned int virq,
|
|
- irq_hw_number_t hw_irq_num)
|
|
-{
|
|
- irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq);
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static const struct irq_domain_ops rmi_irq_ops = {
|
|
- .map = rmi_irq_map,
|
|
-};
|
|
-
|
|
-static int rmi_setup_irq_domain(struct hid_device *hdev)
|
|
-{
|
|
- struct rmi_data *hdata = hid_get_drvdata(hdev);
|
|
- int ret;
|
|
-
|
|
- hdata->domain = irq_domain_create_linear(hdev->dev.fwnode, 1,
|
|
- &rmi_irq_ops, hdata);
|
|
- if (!hdata->domain)
|
|
- return -ENOMEM;
|
|
-
|
|
- ret = devm_add_action_or_reset(&hdev->dev, &rmi_irq_teardown, hdata);
|
|
- if (ret)
|
|
- return ret;
|
|
-
|
|
- hdata->rmi_irq = irq_create_mapping(hdata->domain, 0);
|
|
- if (hdata->rmi_irq <= 0) {
|
|
- hid_err(hdev, "Can't allocate an IRQ\n");
|
|
- return hdata->rmi_irq < 0 ? hdata->rmi_irq : -ENXIO;
|
|
- }
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
static int rmi_probe(struct hid_device *hdev, const struct hid_device_id *id)
|
|
{
|
|
struct rmi_data *data = NULL;
|
|
@@ -713,18 +656,11 @@ static int rmi_probe(struct hid_device *hdev, const struct hid_device_id *id)
|
|
|
|
mutex_init(&data->page_mutex);
|
|
|
|
- ret = rmi_setup_irq_domain(hdev);
|
|
- if (ret) {
|
|
- hid_err(hdev, "failed to allocate IRQ domain\n");
|
|
- return ret;
|
|
- }
|
|
-
|
|
if (data->device_flags & RMI_DEVICE_HAS_PHYS_BUTTONS)
|
|
rmi_hid_pdata.gpio_data.disable = true;
|
|
|
|
data->xport.dev = hdev->dev.parent;
|
|
data->xport.pdata = rmi_hid_pdata;
|
|
- data->xport.pdata.irq = data->rmi_irq;
|
|
data->xport.proto_name = "hid";
|
|
data->xport.ops = &hid_rmi_ops;
|
|
|
|
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
|
|
index 7f416a12000e..68be4afaa58a 100644
|
|
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
|
|
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
|
|
@@ -9,6 +9,7 @@
|
|
#include <linux/init.h>
|
|
#include <linux/types.h>
|
|
#include <linux/device.h>
|
|
+#include <linux/dmi.h>
|
|
#include <linux/io.h>
|
|
#include <linux/err.h>
|
|
#include <linux/fs.h>
|
|
@@ -2171,6 +2172,16 @@ static const struct amba_id etm4_ids[] = {
|
|
{},
|
|
};
|
|
|
|
+static const struct dmi_system_id broken_coresight[] = {
|
|
+ {
|
|
+ .matches = {
|
|
+ DMI_MATCH(DMI_SYS_VENDOR, "HPE"),
|
|
+ DMI_MATCH(DMI_PRODUCT_NAME, "Apollo 70"),
|
|
+ },
|
|
+ },
|
|
+ { } /* terminating entry */
|
|
+};
|
|
+
|
|
MODULE_DEVICE_TABLE(amba, etm4_ids);
|
|
|
|
static struct amba_driver etm4x_amba_driver = {
|
|
@@ -2204,6 +2215,11 @@ static int __init etm4x_init(void)
|
|
{
|
|
int ret;
|
|
|
|
+ if (dmi_check_system(broken_coresight)) {
|
|
+ pr_info("ETM4 disabled due to firmware bug\n");
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
ret = etm4_pm_setup();
|
|
|
|
/* etm4_pm_setup() does its own cleanup - exit on error */
|
|
@@ -2230,6 +2246,9 @@ static int __init etm4x_init(void)
|
|
|
|
static void __exit etm4x_exit(void)
|
|
{
|
|
+ if (dmi_check_system(broken_coresight))
|
|
+ return;
|
|
+
|
|
amba_driver_unregister(&etm4x_amba_driver);
|
|
platform_driver_unregister(&etm4_platform_driver);
|
|
etm4_pm_clear();
|
|
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
|
|
index c5a019eab5ec..b463d85bfb35 100644
|
|
--- a/drivers/idle/intel_idle.c
|
|
+++ b/drivers/idle/intel_idle.c
|
|
@@ -47,11 +47,13 @@
|
|
#include <linux/tick.h>
|
|
#include <trace/events/power.h>
|
|
#include <linux/sched.h>
|
|
+#include <linux/sched/smt.h>
|
|
#include <linux/notifier.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/moduleparam.h>
|
|
#include <asm/cpu_device_id.h>
|
|
#include <asm/intel-family.h>
|
|
+#include <asm/nospec-branch.h>
|
|
#include <asm/mwait.h>
|
|
#include <asm/msr.h>
|
|
|
|
@@ -105,6 +107,12 @@ static unsigned int mwait_substates __initdata;
|
|
*/
|
|
#define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15)
|
|
|
|
+/*
|
|
+ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
|
|
+ * above.
|
|
+ */
|
|
+#define CPUIDLE_FLAG_IBRS BIT(16)
|
|
+
|
|
/*
|
|
* MWAIT takes an 8-bit "hint" in EAX "suggesting"
|
|
* the C-state (top nibble) and sub-state (bottom nibble)
|
|
@@ -159,6 +167,24 @@ static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
|
|
return ret;
|
|
}
|
|
|
|
+static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
|
|
+ struct cpuidle_driver *drv, int index)
|
|
+{
|
|
+ bool smt_active = sched_smt_active();
|
|
+ u64 spec_ctrl = spec_ctrl_current();
|
|
+ int ret;
|
|
+
|
|
+ if (smt_active)
|
|
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
|
|
+
|
|
+ ret = __intel_idle(dev, drv, index);
|
|
+
|
|
+ if (smt_active)
|
|
+ wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
/**
|
|
* intel_idle_s2idle - Ask the processor to enter the given idle state.
|
|
* @dev: cpuidle device of the target CPU.
|
|
@@ -680,7 +706,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
|
{
|
|
.name = "C6",
|
|
.desc = "MWAIT 0x20",
|
|
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
|
|
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
|
|
.exit_latency = 85,
|
|
.target_residency = 200,
|
|
.enter = &intel_idle,
|
|
@@ -688,7 +714,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
|
{
|
|
.name = "C7s",
|
|
.desc = "MWAIT 0x33",
|
|
- .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
|
|
+ .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
|
|
.exit_latency = 124,
|
|
.target_residency = 800,
|
|
.enter = &intel_idle,
|
|
@@ -696,7 +722,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
|
{
|
|
.name = "C8",
|
|
.desc = "MWAIT 0x40",
|
|
- .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
|
|
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
|
|
.exit_latency = 200,
|
|
.target_residency = 800,
|
|
.enter = &intel_idle,
|
|
@@ -704,7 +730,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
|
{
|
|
.name = "C9",
|
|
.desc = "MWAIT 0x50",
|
|
- .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
|
|
+ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
|
|
.exit_latency = 480,
|
|
.target_residency = 5000,
|
|
.enter = &intel_idle,
|
|
@@ -712,7 +738,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
|
{
|
|
.name = "C10",
|
|
.desc = "MWAIT 0x60",
|
|
- .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
|
|
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
|
|
.exit_latency = 890,
|
|
.target_residency = 5000,
|
|
.enter = &intel_idle,
|
|
@@ -741,7 +767,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
|
|
{
|
|
.name = "C6",
|
|
.desc = "MWAIT 0x20",
|
|
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
|
|
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
|
|
.exit_latency = 133,
|
|
.target_residency = 600,
|
|
.enter = &intel_idle,
|
|
@@ -1686,6 +1712,12 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
|
|
if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE)
|
|
drv->states[drv->state_count].enter = intel_idle_irq;
|
|
|
|
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
|
|
+ cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
|
|
+ WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE);
|
|
+ drv->states[drv->state_count].enter = intel_idle_ibrs;
|
|
+ }
|
|
+
|
|
if ((disabled_states_mask & BIT(drv->state_count)) ||
|
|
((icpu->use_acpi || force_use_acpi) &&
|
|
intel_idle_off_by_default(mwait_hint) &&
|
|
diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
|
|
index 258d5fe3d395..f7298e3dc8f3 100644
|
|
--- a/drivers/input/rmi4/rmi_driver.c
|
|
+++ b/drivers/input/rmi4/rmi_driver.c
|
|
@@ -182,34 +182,47 @@ void rmi_set_attn_data(struct rmi_device *rmi_dev, unsigned long irq_status,
|
|
attn_data.data = fifo_data;
|
|
|
|
kfifo_put(&drvdata->attn_fifo, attn_data);
|
|
+
|
|
+ schedule_work(&drvdata->attn_work);
|
|
}
|
|
EXPORT_SYMBOL_GPL(rmi_set_attn_data);
|
|
|
|
-static irqreturn_t rmi_irq_fn(int irq, void *dev_id)
|
|
+static void attn_callback(struct work_struct *work)
|
|
{
|
|
- struct rmi_device *rmi_dev = dev_id;
|
|
- struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev);
|
|
+ struct rmi_driver_data *drvdata = container_of(work,
|
|
+ struct rmi_driver_data,
|
|
+ attn_work);
|
|
struct rmi4_attn_data attn_data = {0};
|
|
int ret, count;
|
|
|
|
count = kfifo_get(&drvdata->attn_fifo, &attn_data);
|
|
- if (count) {
|
|
- *(drvdata->irq_status) = attn_data.irq_status;
|
|
- drvdata->attn_data = attn_data;
|
|
- }
|
|
+ if (!count)
|
|
+ return;
|
|
|
|
- ret = rmi_process_interrupt_requests(rmi_dev);
|
|
+ *(drvdata->irq_status) = attn_data.irq_status;
|
|
+ drvdata->attn_data = attn_data;
|
|
+
|
|
+ ret = rmi_process_interrupt_requests(drvdata->rmi_dev);
|
|
if (ret)
|
|
- rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev,
|
|
+ rmi_dbg(RMI_DEBUG_CORE, &drvdata->rmi_dev->dev,
|
|
"Failed to process interrupt request: %d\n", ret);
|
|
|
|
- if (count) {
|
|
- kfree(attn_data.data);
|
|
- drvdata->attn_data.data = NULL;
|
|
- }
|
|
+ kfree(attn_data.data);
|
|
+ drvdata->attn_data.data = NULL;
|
|
|
|
if (!kfifo_is_empty(&drvdata->attn_fifo))
|
|
- return rmi_irq_fn(irq, dev_id);
|
|
+ schedule_work(&drvdata->attn_work);
|
|
+}
|
|
+
|
|
+static irqreturn_t rmi_irq_fn(int irq, void *dev_id)
|
|
+{
|
|
+ struct rmi_device *rmi_dev = dev_id;
|
|
+ int ret;
|
|
+
|
|
+ ret = rmi_process_interrupt_requests(rmi_dev);
|
|
+ if (ret)
|
|
+ rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev,
|
|
+ "Failed to process interrupt request: %d\n", ret);
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
@@ -217,7 +230,6 @@ static irqreturn_t rmi_irq_fn(int irq, void *dev_id)
|
|
static int rmi_irq_init(struct rmi_device *rmi_dev)
|
|
{
|
|
struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev);
|
|
- struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev);
|
|
int irq_flags = irq_get_trigger_type(pdata->irq);
|
|
int ret;
|
|
|
|
@@ -235,8 +247,6 @@ static int rmi_irq_init(struct rmi_device *rmi_dev)
|
|
return ret;
|
|
}
|
|
|
|
- data->enabled = true;
|
|
-
|
|
return 0;
|
|
}
|
|
|
|
@@ -886,23 +896,27 @@ void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake)
|
|
if (data->enabled)
|
|
goto out;
|
|
|
|
- enable_irq(irq);
|
|
- data->enabled = true;
|
|
- if (clear_wake && device_may_wakeup(rmi_dev->xport->dev)) {
|
|
- retval = disable_irq_wake(irq);
|
|
- if (retval)
|
|
- dev_warn(&rmi_dev->dev,
|
|
- "Failed to disable irq for wake: %d\n",
|
|
- retval);
|
|
- }
|
|
+ if (irq) {
|
|
+ enable_irq(irq);
|
|
+ data->enabled = true;
|
|
+ if (clear_wake && device_may_wakeup(rmi_dev->xport->dev)) {
|
|
+ retval = disable_irq_wake(irq);
|
|
+ if (retval)
|
|
+ dev_warn(&rmi_dev->dev,
|
|
+ "Failed to disable irq for wake: %d\n",
|
|
+ retval);
|
|
+ }
|
|
|
|
- /*
|
|
- * Call rmi_process_interrupt_requests() after enabling irq,
|
|
- * otherwise we may lose interrupt on edge-triggered systems.
|
|
- */
|
|
- irq_flags = irq_get_trigger_type(pdata->irq);
|
|
- if (irq_flags & IRQ_TYPE_EDGE_BOTH)
|
|
- rmi_process_interrupt_requests(rmi_dev);
|
|
+ /*
|
|
+ * Call rmi_process_interrupt_requests() after enabling irq,
|
|
+ * otherwise we may lose interrupt on edge-triggered systems.
|
|
+ */
|
|
+ irq_flags = irq_get_trigger_type(pdata->irq);
|
|
+ if (irq_flags & IRQ_TYPE_EDGE_BOTH)
|
|
+ rmi_process_interrupt_requests(rmi_dev);
|
|
+ } else {
|
|
+ data->enabled = true;
|
|
+ }
|
|
|
|
out:
|
|
mutex_unlock(&data->enabled_mutex);
|
|
@@ -922,20 +936,22 @@ void rmi_disable_irq(struct rmi_device *rmi_dev, bool enable_wake)
|
|
goto out;
|
|
|
|
data->enabled = false;
|
|
- disable_irq(irq);
|
|
- if (enable_wake && device_may_wakeup(rmi_dev->xport->dev)) {
|
|
- retval = enable_irq_wake(irq);
|
|
- if (retval)
|
|
- dev_warn(&rmi_dev->dev,
|
|
- "Failed to enable irq for wake: %d\n",
|
|
- retval);
|
|
- }
|
|
-
|
|
- /* make sure the fifo is clean */
|
|
- while (!kfifo_is_empty(&data->attn_fifo)) {
|
|
- count = kfifo_get(&data->attn_fifo, &attn_data);
|
|
- if (count)
|
|
- kfree(attn_data.data);
|
|
+ if (irq) {
|
|
+ disable_irq(irq);
|
|
+ if (enable_wake && device_may_wakeup(rmi_dev->xport->dev)) {
|
|
+ retval = enable_irq_wake(irq);
|
|
+ if (retval)
|
|
+ dev_warn(&rmi_dev->dev,
|
|
+ "Failed to enable irq for wake: %d\n",
|
|
+ retval);
|
|
+ }
|
|
+ } else {
|
|
+ /* make sure the fifo is clean */
|
|
+ while (!kfifo_is_empty(&data->attn_fifo)) {
|
|
+ count = kfifo_get(&data->attn_fifo, &attn_data);
|
|
+ if (count)
|
|
+ kfree(attn_data.data);
|
|
+ }
|
|
}
|
|
|
|
out:
|
|
@@ -981,6 +997,8 @@ static int rmi_driver_remove(struct device *dev)
|
|
irq_domain_remove(data->irqdomain);
|
|
data->irqdomain = NULL;
|
|
|
|
+ cancel_work_sync(&data->attn_work);
|
|
+
|
|
rmi_f34_remove_sysfs(rmi_dev);
|
|
rmi_free_function_list(rmi_dev);
|
|
|
|
@@ -1219,9 +1237,15 @@ static int rmi_driver_probe(struct device *dev)
|
|
}
|
|
}
|
|
|
|
- retval = rmi_irq_init(rmi_dev);
|
|
- if (retval < 0)
|
|
- goto err_destroy_functions;
|
|
+ if (pdata->irq) {
|
|
+ retval = rmi_irq_init(rmi_dev);
|
|
+ if (retval < 0)
|
|
+ goto err_destroy_functions;
|
|
+ }
|
|
+
|
|
+ data->enabled = true;
|
|
+
|
|
+ INIT_WORK(&data->attn_work, attn_callback);
|
|
|
|
if (data->f01_container->dev.driver) {
|
|
/* Driver already bound, so enable ATTN now. */
|
|
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
|
|
index 857d4c2fd1a2..9353941f3a97 100644
|
|
--- a/drivers/iommu/iommu.c
|
|
+++ b/drivers/iommu/iommu.c
|
|
@@ -7,6 +7,7 @@
|
|
#define pr_fmt(fmt) "iommu: " fmt
|
|
|
|
#include <linux/device.h>
|
|
+#include <linux/dmi.h>
|
|
#include <linux/dma-iommu.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/bits.h>
|
|
@@ -2870,6 +2871,27 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle)
|
|
}
|
|
EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
|
|
|
|
+#ifdef CONFIG_ARM64
|
|
+static int __init iommu_quirks(void)
|
|
+{
|
|
+ const char *vendor, *name;
|
|
+
|
|
+ vendor = dmi_get_system_info(DMI_SYS_VENDOR);
|
|
+ name = dmi_get_system_info(DMI_PRODUCT_NAME);
|
|
+
|
|
+ if (vendor &&
|
|
+ (strncmp(vendor, "GIGABYTE", 8) == 0 && name &&
|
|
+ (strncmp(name, "R120", 4) == 0 ||
|
|
+ strncmp(name, "R270", 4) == 0))) {
|
|
+ pr_warn("Gigabyte %s detected, force iommu passthrough mode", name);
|
|
+ iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+arch_initcall(iommu_quirks);
|
|
+#endif
|
|
+
|
|
/*
|
|
* Changes the default domain of an iommu group that has *only* one device
|
|
*
|
|
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
|
|
index c9831daafbc6..5c4bb1e9ba0a 100644
|
|
--- a/drivers/nvme/host/core.c
|
|
+++ b/drivers/nvme/host/core.c
|
|
@@ -240,6 +240,9 @@ static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
|
|
|
|
static blk_status_t nvme_error_status(u16 status)
|
|
{
|
|
+ if (unlikely(status & NVME_SC_DNR))
|
|
+ return BLK_STS_TARGET;
|
|
+
|
|
switch (status & 0x7ff) {
|
|
case NVME_SC_SUCCESS:
|
|
return BLK_STS_OK;
|
|
@@ -330,6 +333,7 @@ enum nvme_disposition {
|
|
COMPLETE,
|
|
RETRY,
|
|
FAILOVER,
|
|
+ FAILUP,
|
|
};
|
|
|
|
static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
|
|
@@ -337,15 +341,16 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
|
|
if (likely(nvme_req(req)->status == 0))
|
|
return COMPLETE;
|
|
|
|
- if (blk_noretry_request(req) ||
|
|
+ if ((req->cmd_flags & (REQ_FAILFAST_DEV | REQ_FAILFAST_DRIVER)) ||
|
|
(nvme_req(req)->status & NVME_SC_DNR) ||
|
|
nvme_req(req)->retries >= nvme_max_retries)
|
|
return COMPLETE;
|
|
|
|
- if (req->cmd_flags & REQ_NVME_MPATH) {
|
|
+ if (req->cmd_flags & (REQ_NVME_MPATH | REQ_FAILFAST_TRANSPORT)) {
|
|
if (nvme_is_path_error(nvme_req(req)->status) ||
|
|
blk_queue_dying(req->q))
|
|
- return FAILOVER;
|
|
+ return (req->cmd_flags & REQ_NVME_MPATH) ?
|
|
+ FAILOVER : FAILUP;
|
|
} else {
|
|
if (blk_queue_dying(req->q))
|
|
return COMPLETE;
|
|
@@ -373,6 +378,14 @@ static inline void nvme_end_req(struct request *req)
|
|
blk_mq_end_request(req, status);
|
|
}
|
|
|
|
+static inline void nvme_failup_req(struct request *req)
|
|
+{
|
|
+ nvme_update_ana(req);
|
|
+
|
|
+ nvme_req(req)->status = NVME_SC_HOST_PATH_ERROR;
|
|
+ nvme_end_req(req);
|
|
+}
|
|
+
|
|
void nvme_complete_rq(struct request *req)
|
|
{
|
|
trace_nvme_complete_rq(req);
|
|
@@ -391,6 +404,9 @@ void nvme_complete_rq(struct request *req)
|
|
case FAILOVER:
|
|
nvme_failover_req(req);
|
|
return;
|
|
+ case FAILUP:
|
|
+ nvme_failup_req(req);
|
|
+ return;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(nvme_complete_rq);
|
|
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
|
|
index d464fdf978fb..acdaab3d7697 100644
|
|
--- a/drivers/nvme/host/multipath.c
|
|
+++ b/drivers/nvme/host/multipath.c
|
|
@@ -80,14 +80,10 @@ void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
|
|
blk_freeze_queue_start(h->disk->queue);
|
|
}
|
|
|
|
-void nvme_failover_req(struct request *req)
|
|
+void nvme_update_ana(struct request *req)
|
|
{
|
|
struct nvme_ns *ns = req->q->queuedata;
|
|
u16 status = nvme_req(req)->status & 0x7ff;
|
|
- unsigned long flags;
|
|
- struct bio *bio;
|
|
-
|
|
- nvme_mpath_clear_current_path(ns);
|
|
|
|
/*
|
|
* If we got back an ANA error, we know the controller is alive but not
|
|
@@ -98,6 +94,16 @@ void nvme_failover_req(struct request *req)
|
|
set_bit(NVME_NS_ANA_PENDING, &ns->flags);
|
|
queue_work(nvme_wq, &ns->ctrl->ana_work);
|
|
}
|
|
+}
|
|
+
|
|
+void nvme_failover_req(struct request *req)
|
|
+{
|
|
+ struct nvme_ns *ns = req->q->queuedata;
|
|
+ unsigned long flags;
|
|
+ struct bio *bio;
|
|
+
|
|
+ nvme_mpath_clear_current_path(ns);
|
|
+ nvme_update_ana(req);
|
|
|
|
spin_lock_irqsave(&ns->head->requeue_lock, flags);
|
|
for (bio = req->bio; bio; bio = bio->bi_next) {
|
|
@@ -869,8 +875,7 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
|
|
int error = 0;
|
|
|
|
/* check if multipath is enabled and we have the capability */
|
|
- if (!multipath || !ctrl->subsys ||
|
|
- !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA))
|
|
+ if (!ctrl->subsys || !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA))
|
|
return 0;
|
|
|
|
if (!ctrl->max_namespaces ||
|
|
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
|
|
index 337ae1e3ad25..727cc7fb542e 100644
|
|
--- a/drivers/nvme/host/nvme.h
|
|
+++ b/drivers/nvme/host/nvme.h
|
|
@@ -801,6 +801,7 @@ void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
|
|
void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
|
|
void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys);
|
|
void nvme_failover_req(struct request *req);
|
|
+void nvme_update_ana(struct request *req);
|
|
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
|
|
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
|
|
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
|
|
@@ -837,6 +838,9 @@ static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
|
|
static inline void nvme_failover_req(struct request *req)
|
|
{
|
|
}
|
|
+static inline void nvme_update_ana(struct request *req)
|
|
+{
|
|
+}
|
|
static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
|
|
{
|
|
}
|
|
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
|
|
index 41aeaa235132..e8137d31cc93 100644
|
|
--- a/drivers/pci/quirks.c
|
|
+++ b/drivers/pci/quirks.c
|
|
@@ -4285,6 +4285,30 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9000,
|
|
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9084,
|
|
quirk_bridge_cavm_thrx2_pcie_root);
|
|
|
|
+/*
|
|
+ * PCI BAR 5 is not setup correctly for the on-board AHCI controller
|
|
+ * on Broadcom's Vulcan processor. Added a quirk to fix BAR 5 by
|
|
+ * using BAR 4's resources which are populated correctly and NOT
|
|
+ * actually used by the AHCI controller.
|
|
+ */
|
|
+static void quirk_fix_vulcan_ahci_bars(struct pci_dev *dev)
|
|
+{
|
|
+ struct resource *r = &dev->resource[4];
|
|
+
|
|
+ if (!(r->flags & IORESOURCE_MEM) || (r->start == 0))
|
|
+ return;
|
|
+
|
|
+ /* Set BAR5 resource to BAR4 */
|
|
+ dev->resource[5] = *r;
|
|
+
|
|
+ /* Update BAR5 in pci config space */
|
|
+ pci_write_config_dword(dev, PCI_BASE_ADDRESS_5, r->start);
|
|
+
|
|
+ /* Clear BAR4's resource */
|
|
+ memset(r, 0, sizeof(*r));
|
|
+}
|
|
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9027, quirk_fix_vulcan_ahci_bars);
|
|
+
|
|
/*
|
|
* Intersil/Techwell TW686[4589]-based video capture cards have an empty (zero)
|
|
* class code. Fix it.
|
|
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
|
|
index 1460857026e0..7e1964891089 100644
|
|
--- a/drivers/usb/core/hub.c
|
|
+++ b/drivers/usb/core/hub.c
|
|
@@ -5688,6 +5688,13 @@ static void hub_event(struct work_struct *work)
|
|
(u16) hub->change_bits[0],
|
|
(u16) hub->event_bits[0]);
|
|
|
|
+ /* Don't disconnect USB-SATA on TrimSlice */
|
|
+ if (strcmp(dev_name(hdev->bus->controller), "tegra-ehci.0") == 0) {
|
|
+ if ((hdev->state == 7) && (hub->change_bits[0] == 0) &&
|
|
+ (hub->event_bits[0] == 0x2))
|
|
+ hub->event_bits[0] = 0;
|
|
+ }
|
|
+
|
|
/* Lock the device, then check to see if we were
|
|
* disconnected while waiting for the lock to succeed. */
|
|
usb_lock_device(hdev);
|
|
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
|
|
index 2c7477354744..314802f98b9d 100644
|
|
--- a/include/linux/cpu.h
|
|
+++ b/include/linux/cpu.h
|
|
@@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr,
|
|
extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
|
|
struct device_attribute *attr,
|
|
char *buf);
|
|
+extern ssize_t cpu_show_retbleed(struct device *dev,
|
|
+ struct device_attribute *attr, char *buf);
|
|
|
|
extern __printf(4, 5)
|
|
struct device *cpu_device_create(struct device *parent, void *drvdata,
|
|
diff --git a/include/linux/efi.h b/include/linux/efi.h
|
|
index cc6d2be2ffd5..418d814d2eb7 100644
|
|
--- a/include/linux/efi.h
|
|
+++ b/include/linux/efi.h
|
|
@@ -43,6 +43,8 @@
|
|
#define EFI_ABORTED (21 | (1UL << (BITS_PER_LONG-1)))
|
|
#define EFI_SECURITY_VIOLATION (26 | (1UL << (BITS_PER_LONG-1)))
|
|
|
|
+#define EFI_IS_ERROR(x) ((x) & (1UL << (BITS_PER_LONG-1)))
|
|
+
|
|
typedef unsigned long efi_status_t;
|
|
typedef u8 efi_bool_t;
|
|
typedef u16 efi_char16_t; /* UNICODE character */
|
|
@@ -385,6 +387,7 @@ void efi_native_runtime_setup(void);
|
|
#define EFI_LOAD_FILE_PROTOCOL_GUID EFI_GUID(0x56ec3091, 0x954c, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
|
|
#define EFI_LOAD_FILE2_PROTOCOL_GUID EFI_GUID(0x4006c0c1, 0xfcb3, 0x403e, 0x99, 0x6d, 0x4a, 0x6c, 0x87, 0x24, 0xe0, 0x6d)
|
|
#define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9)
|
|
+#define EFI_DXE_SERVICES_TABLE_GUID EFI_GUID(0x05ad34ba, 0x6f02, 0x4214, 0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9)
|
|
|
|
#define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f)
|
|
#define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23)
|
|
@@ -437,6 +440,7 @@ typedef struct {
|
|
} efi_config_table_type_t;
|
|
|
|
#define EFI_SYSTEM_TABLE_SIGNATURE ((u64)0x5453595320494249ULL)
|
|
+#define EFI_DXE_SERVICES_TABLE_SIGNATURE ((u64)0x565245535f455844ULL)
|
|
|
|
#define EFI_2_30_SYSTEM_TABLE_REVISION ((2 << 16) | (30))
|
|
#define EFI_2_20_SYSTEM_TABLE_REVISION ((2 << 16) | (20))
|
|
@@ -831,6 +835,14 @@ extern int __init efi_setup_pcdp_console(char *);
|
|
#define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */
|
|
#define EFI_MEM_NO_SOFT_RESERVE 11 /* Is the kernel configured to ignore soft reservations? */
|
|
#define EFI_PRESERVE_BS_REGIONS 12 /* Are EFI boot-services memory segments available? */
|
|
+#define EFI_SECURE_BOOT 13 /* Are we in Secure Boot mode? */
|
|
+
|
|
+enum efi_secureboot_mode {
|
|
+ efi_secureboot_mode_unset,
|
|
+ efi_secureboot_mode_unknown,
|
|
+ efi_secureboot_mode_disabled,
|
|
+ efi_secureboot_mode_enabled,
|
|
+};
|
|
|
|
#ifdef CONFIG_EFI
|
|
/*
|
|
@@ -842,6 +854,8 @@ static inline bool efi_enabled(int feature)
|
|
}
|
|
extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused);
|
|
|
|
+extern void __init efi_set_secure_boot(enum efi_secureboot_mode mode);
|
|
+
|
|
bool __pure __efi_soft_reserve_enabled(void);
|
|
|
|
static inline bool __pure efi_soft_reserve_enabled(void)
|
|
@@ -862,6 +876,8 @@ static inline bool efi_enabled(int feature)
|
|
static inline void
|
|
efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {}
|
|
|
|
+static inline void efi_set_secure_boot(enum efi_secureboot_mode mode) {}
|
|
+
|
|
static inline bool efi_soft_reserve_enabled(void)
|
|
{
|
|
return false;
|
|
@@ -874,6 +890,7 @@ static inline bool efi_rt_services_supported(unsigned int mask)
|
|
#endif
|
|
|
|
extern int efi_status_to_err(efi_status_t status);
|
|
+extern const char *efi_status_to_str(efi_status_t status);
|
|
|
|
/*
|
|
* Variable Attributes
|
|
@@ -1126,13 +1143,6 @@ static inline bool efi_runtime_disabled(void) { return true; }
|
|
extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
|
|
extern unsigned long efi_call_virt_save_flags(void);
|
|
|
|
-enum efi_secureboot_mode {
|
|
- efi_secureboot_mode_unset,
|
|
- efi_secureboot_mode_unknown,
|
|
- efi_secureboot_mode_disabled,
|
|
- efi_secureboot_mode_enabled,
|
|
-};
|
|
-
|
|
static inline
|
|
enum efi_secureboot_mode efi_get_secureboot_mode(efi_get_variable_t *get_var)
|
|
{
|
|
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
|
|
index 34eed5f85ed6..88d94cf515e1 100644
|
|
--- a/include/linux/kvm_host.h
|
|
+++ b/include/linux/kvm_host.h
|
|
@@ -1511,7 +1511,7 @@ static inline void kvm_arch_end_assignment(struct kvm *kvm)
|
|
{
|
|
}
|
|
|
|
-static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
|
|
+static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
|
|
{
|
|
return false;
|
|
}
|
|
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
|
|
index db924fe379c9..1169d78af2de 100644
|
|
--- a/include/linux/lsm_hook_defs.h
|
|
+++ b/include/linux/lsm_hook_defs.h
|
|
@@ -394,6 +394,8 @@ LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux)
|
|
#endif /* CONFIG_BPF_SYSCALL */
|
|
|
|
LSM_HOOK(int, 0, locked_down, enum lockdown_reason what)
|
|
+LSM_HOOK(int, 0, lock_kernel_down, const char *where, enum lockdown_reason level)
|
|
+
|
|
|
|
#ifdef CONFIG_PERF_EVENTS
|
|
LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type)
|
|
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
|
|
index 419b5febc3ca..491323dfe4e0 100644
|
|
--- a/include/linux/lsm_hooks.h
|
|
+++ b/include/linux/lsm_hooks.h
|
|
@@ -1549,6 +1549,12 @@
|
|
*
|
|
* @what: kernel feature being accessed
|
|
*
|
|
+ * @lock_kernel_down
|
|
+ * Put the kernel into lock-down mode.
|
|
+ *
|
|
+ * @where: Where the lock-down is originating from (e.g. command line option)
|
|
+ * @level: The lock-down level (can only increase)
|
|
+ *
|
|
* Security hooks for perf events
|
|
*
|
|
* @perf_event_open:
|
|
diff --git a/include/linux/objtool.h b/include/linux/objtool.h
|
|
index c81ea2264ad8..376110ead758 100644
|
|
--- a/include/linux/objtool.h
|
|
+++ b/include/linux/objtool.h
|
|
@@ -32,11 +32,16 @@ struct unwind_hint {
|
|
*
|
|
* UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
|
|
* Useful for code which doesn't have an ELF function annotation.
|
|
+ *
|
|
+ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
|
|
*/
|
|
#define UNWIND_HINT_TYPE_CALL 0
|
|
#define UNWIND_HINT_TYPE_REGS 1
|
|
#define UNWIND_HINT_TYPE_REGS_PARTIAL 2
|
|
#define UNWIND_HINT_TYPE_FUNC 3
|
|
+#define UNWIND_HINT_TYPE_ENTRY 4
|
|
+#define UNWIND_HINT_TYPE_SAVE 5
|
|
+#define UNWIND_HINT_TYPE_RESTORE 6
|
|
|
|
#ifdef CONFIG_STACK_VALIDATION
|
|
|
|
@@ -122,7 +127,7 @@ struct unwind_hint {
|
|
* the debuginfo as necessary. It will also warn if it sees any
|
|
* inconsistencies.
|
|
*/
|
|
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
|
|
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
|
|
.Lunwind_hint_ip_\@:
|
|
.pushsection .discard.unwind_hints
|
|
/* struct unwind_hint */
|
|
@@ -175,7 +180,7 @@ struct unwind_hint {
|
|
#define ASM_REACHABLE
|
|
#else
|
|
#define ANNOTATE_INTRA_FUNCTION_CALL
|
|
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
|
|
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
|
|
.endm
|
|
.macro STACK_FRAME_NON_STANDARD func:req
|
|
.endm
|
|
diff --git a/include/linux/rmi.h b/include/linux/rmi.h
|
|
index ab7eea01ab42..fff7c5f737fc 100644
|
|
--- a/include/linux/rmi.h
|
|
+++ b/include/linux/rmi.h
|
|
@@ -364,6 +364,7 @@ struct rmi_driver_data {
|
|
|
|
struct rmi4_attn_data attn_data;
|
|
DECLARE_KFIFO(attn_fifo, struct rmi4_attn_data, 16);
|
|
+ struct work_struct attn_work;
|
|
};
|
|
|
|
int rmi_register_transport_device(struct rmi_transport_dev *xport);
|
|
diff --git a/include/linux/security.h b/include/linux/security.h
|
|
index 7fc4e9f49f54..6f0b0b2dc73d 100644
|
|
--- a/include/linux/security.h
|
|
+++ b/include/linux/security.h
|
|
@@ -473,6 +473,7 @@ int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
|
|
int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
|
|
int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
|
|
int security_locked_down(enum lockdown_reason what);
|
|
+int security_lock_kernel_down(const char *where, enum lockdown_reason level);
|
|
#else /* CONFIG_SECURITY */
|
|
|
|
static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data)
|
|
@@ -1349,6 +1350,10 @@ static inline int security_locked_down(enum lockdown_reason what)
|
|
{
|
|
return 0;
|
|
}
|
|
+static inline int security_lock_kernel_down(const char *where, enum lockdown_reason level)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
#endif /* CONFIG_SECURITY */
|
|
|
|
#if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE)
|
|
diff --git a/init/Kconfig b/init/Kconfig
|
|
index fa63cc019ebf..8643b5cef9f1 100644
|
|
--- a/init/Kconfig
|
|
+++ b/init/Kconfig
|
|
@@ -1663,7 +1663,7 @@ config AIO
|
|
this option saves about 7k.
|
|
|
|
config IO_URING
|
|
- bool "Enable IO uring support" if EXPERT
|
|
+ bool "Enable IO uring support"
|
|
select IO_WQ
|
|
default y
|
|
help
|
|
diff --git a/kernel/module_signing.c b/kernel/module_signing.c
|
|
index 8723ae70ea1f..fb2d773498c2 100644
|
|
--- a/kernel/module_signing.c
|
|
+++ b/kernel/module_signing.c
|
|
@@ -38,8 +38,15 @@ int mod_verify_sig(const void *mod, struct load_info *info)
|
|
modlen -= sig_len + sizeof(ms);
|
|
info->len = modlen;
|
|
|
|
- return verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len,
|
|
+ ret = verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len,
|
|
VERIFY_USE_SECONDARY_KEYRING,
|
|
VERIFYING_MODULE_SIGNATURE,
|
|
NULL, NULL);
|
|
+ if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) {
|
|
+ ret = verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len,
|
|
+ VERIFY_USE_PLATFORM_KEYRING,
|
|
+ VERIFYING_MODULE_SIGNATURE,
|
|
+ NULL, NULL);
|
|
+ }
|
|
+ return ret;
|
|
}
|
|
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
|
|
index 33c1ed581522..2a0521f77e5f 100644
|
|
--- a/scripts/Makefile.build
|
|
+++ b/scripts/Makefile.build
|
|
@@ -233,6 +233,7 @@ objtool_args = \
|
|
$(if $(CONFIG_FRAME_POINTER),, --no-fp) \
|
|
$(if $(CONFIG_GCOV_KERNEL), --no-unreachable) \
|
|
$(if $(CONFIG_RETPOLINE), --retpoline) \
|
|
+ $(if $(CONFIG_RETHUNK), --rethunk) \
|
|
$(if $(CONFIG_X86_SMAP), --uaccess) \
|
|
$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \
|
|
$(if $(CONFIG_SLS), --sls)
|
|
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
|
|
index 9361a1ef02c9..d4d028595fb4 100755
|
|
--- a/scripts/link-vmlinux.sh
|
|
+++ b/scripts/link-vmlinux.sh
|
|
@@ -130,6 +130,9 @@ objtool_link()
|
|
|
|
if is_enabled CONFIG_VMLINUX_VALIDATION; then
|
|
objtoolopt="${objtoolopt} --noinstr"
|
|
+ if is_enabled CONFIG_CPU_UNRET_ENTRY; then
|
|
+ objtoolopt="${objtoolopt} --unret"
|
|
+ fi
|
|
fi
|
|
|
|
if [ -n "${objtoolopt}" ]; then
|
|
diff --git a/scripts/tags.sh b/scripts/tags.sh
|
|
index 16d475b3e203..4e333f14b84e 100755
|
|
--- a/scripts/tags.sh
|
|
+++ b/scripts/tags.sh
|
|
@@ -16,6 +16,8 @@ fi
|
|
ignore="$(echo "$RCS_FIND_IGNORE" | sed 's|\\||g' )"
|
|
# tags and cscope files should also ignore MODVERSION *.mod.c files
|
|
ignore="$ignore ( -name *.mod.c ) -prune -o"
|
|
+# RHEL tags and cscope should also ignore redhat/rpm
|
|
+ignore="$ignore ( -path redhat/rpm ) -prune -o"
|
|
|
|
# Use make KBUILD_ABS_SRCTREE=1 {tags|cscope}
|
|
# to force full paths for a non-O= build
|
|
diff --git a/security/Kconfig b/security/Kconfig
|
|
index 9b2c4925585a..34e2d7edd085 100644
|
|
--- a/security/Kconfig
|
|
+++ b/security/Kconfig
|
|
@@ -54,17 +54,6 @@ config SECURITY_NETWORK
|
|
implement socket and networking access controls.
|
|
If you are unsure how to answer this question, answer N.
|
|
|
|
-config PAGE_TABLE_ISOLATION
|
|
- bool "Remove the kernel mapping in user mode"
|
|
- default y
|
|
- depends on (X86_64 || X86_PAE) && !UML
|
|
- help
|
|
- This feature reduces the number of hardware side channels by
|
|
- ensuring that the majority of kernel addresses are not mapped
|
|
- into userspace.
|
|
-
|
|
- See Documentation/x86/pti.rst for more details.
|
|
-
|
|
config SECURITY_INFINIBAND
|
|
bool "Infiniband Security Hooks"
|
|
depends on SECURITY && INFINIBAND
|
|
diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c
|
|
index 093894a640dc..1c43a9462b4b 100644
|
|
--- a/security/integrity/platform_certs/load_uefi.c
|
|
+++ b/security/integrity/platform_certs/load_uefi.c
|
|
@@ -73,7 +73,8 @@ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid,
|
|
return NULL;
|
|
|
|
if (*status != EFI_BUFFER_TOO_SMALL) {
|
|
- pr_err("Couldn't get size: 0x%lx\n", *status);
|
|
+ pr_err("Couldn't get size: %s (0x%lx)\n",
|
|
+ efi_status_to_str(*status), *status);
|
|
return NULL;
|
|
}
|
|
|
|
@@ -84,7 +85,8 @@ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid,
|
|
*status = efi.get_variable(name, guid, NULL, &lsize, db);
|
|
if (*status != EFI_SUCCESS) {
|
|
kfree(db);
|
|
- pr_err("Error reading db var: 0x%lx\n", *status);
|
|
+ pr_err("Error reading db var: %s (0x%lx)\n",
|
|
+ efi_status_to_str(*status), *status);
|
|
return NULL;
|
|
}
|
|
|
|
diff --git a/security/lockdown/Kconfig b/security/lockdown/Kconfig
|
|
index e84ddf484010..d0501353a4b9 100644
|
|
--- a/security/lockdown/Kconfig
|
|
+++ b/security/lockdown/Kconfig
|
|
@@ -16,6 +16,19 @@ config SECURITY_LOCKDOWN_LSM_EARLY
|
|
subsystem is fully initialised. If enabled, lockdown will
|
|
unconditionally be called before any other LSMs.
|
|
|
|
+config LOCK_DOWN_IN_EFI_SECURE_BOOT
|
|
+ bool "Lock down the kernel in EFI Secure Boot mode"
|
|
+ default n
|
|
+ depends on EFI && SECURITY_LOCKDOWN_LSM_EARLY
|
|
+ help
|
|
+ UEFI Secure Boot provides a mechanism for ensuring that the firmware
|
|
+ will only load signed bootloaders and kernels. Secure boot mode may
|
|
+ be determined from EFI variables provided by the system firmware if
|
|
+ not indicated by the boot parameters.
|
|
+
|
|
+ Enabling this option results in kernel lockdown being triggered if
|
|
+ EFI Secure Boot is set.
|
|
+
|
|
choice
|
|
prompt "Kernel default lockdown mode"
|
|
default LOCK_DOWN_KERNEL_FORCE_NONE
|
|
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
|
|
index 87cbdc64d272..18555cf18da7 100644
|
|
--- a/security/lockdown/lockdown.c
|
|
+++ b/security/lockdown/lockdown.c
|
|
@@ -73,6 +73,7 @@ static int lockdown_is_locked_down(enum lockdown_reason what)
|
|
|
|
static struct security_hook_list lockdown_hooks[] __lsm_ro_after_init = {
|
|
LSM_HOOK_INIT(locked_down, lockdown_is_locked_down),
|
|
+ LSM_HOOK_INIT(lock_kernel_down, lock_kernel_down),
|
|
};
|
|
|
|
static int __init lockdown_lsm_init(void)
|
|
diff --git a/security/security.c b/security/security.c
|
|
index aaf6566deb9f..86926966c15d 100644
|
|
--- a/security/security.c
|
|
+++ b/security/security.c
|
|
@@ -2625,6 +2625,12 @@ int security_locked_down(enum lockdown_reason what)
|
|
}
|
|
EXPORT_SYMBOL(security_locked_down);
|
|
|
|
+int security_lock_kernel_down(const char *where, enum lockdown_reason level)
|
|
+{
|
|
+ return call_int_hook(lock_kernel_down, 0, where, level);
|
|
+}
|
|
+EXPORT_SYMBOL(security_lock_kernel_down);
|
|
+
|
|
#ifdef CONFIG_PERF_EVENTS
|
|
int security_perf_event_open(struct perf_event_attr *attr, int type)
|
|
{
|
|
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
|
|
index e17de69faa54..5d09ded0c491 100644
|
|
--- a/tools/arch/x86/include/asm/cpufeatures.h
|
|
+++ b/tools/arch/x86/include/asm/cpufeatures.h
|
|
@@ -203,8 +203,8 @@
|
|
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
|
|
/* FREE! ( 7*32+10) */
|
|
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
|
|
-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
|
|
-#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
|
|
+#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
|
|
+#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
|
|
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
|
|
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
|
|
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
|
|
@@ -295,6 +295,12 @@
|
|
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
|
|
#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
|
|
#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
|
|
+#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
|
|
+#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
|
|
+#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
|
|
+#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
|
|
+#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
|
|
+#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
|
|
|
|
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
|
|
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
|
|
@@ -315,6 +321,7 @@
|
|
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
|
|
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
|
|
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
|
|
+#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
|
|
|
|
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
|
|
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
|
|
@@ -444,5 +451,6 @@
|
|
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
|
|
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
|
|
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
|
|
+#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
|
|
|
|
#endif /* _ASM_X86_CPUFEATURES_H */
|
|
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
|
|
index 1231d63f836d..f7be189e9723 100644
|
|
--- a/tools/arch/x86/include/asm/disabled-features.h
|
|
+++ b/tools/arch/x86/include/asm/disabled-features.h
|
|
@@ -56,6 +56,25 @@
|
|
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
|
|
#endif
|
|
|
|
+#ifdef CONFIG_RETPOLINE
|
|
+# define DISABLE_RETPOLINE 0
|
|
+#else
|
|
+# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
|
|
+ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_RETHUNK
|
|
+# define DISABLE_RETHUNK 0
|
|
+#else
|
|
+# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_CPU_UNRET_ENTRY
|
|
+# define DISABLE_UNRET 0
|
|
+#else
|
|
+# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
|
|
+#endif
|
|
+
|
|
#ifdef CONFIG_INTEL_IOMMU_SVM
|
|
# define DISABLE_ENQCMD 0
|
|
#else
|
|
@@ -82,7 +101,7 @@
|
|
#define DISABLED_MASK8 0
|
|
#define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX)
|
|
#define DISABLED_MASK10 0
|
|
-#define DISABLED_MASK11 0
|
|
+#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
|
|
#define DISABLED_MASK12 0
|
|
#define DISABLED_MASK13 0
|
|
#define DISABLED_MASK14 0
|
|
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
|
|
index 4425d6773183..ad084326f24c 100644
|
|
--- a/tools/arch/x86/include/asm/msr-index.h
|
|
+++ b/tools/arch/x86/include/asm/msr-index.h
|
|
@@ -51,6 +51,8 @@
|
|
#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
|
|
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
|
|
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
|
|
+#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
|
|
+#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
|
|
|
|
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
|
|
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
|
|
@@ -91,6 +93,7 @@
|
|
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
|
|
#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
|
|
#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
|
|
+#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
|
|
#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
|
|
#define ARCH_CAP_SSB_NO BIT(4) /*
|
|
* Not susceptible to Speculative Store Bypass
|
|
@@ -138,6 +141,13 @@
|
|
* bit available to control VERW
|
|
* behavior.
|
|
*/
|
|
+#define ARCH_CAP_RRSBA BIT(19) /*
|
|
+ * Indicates RET may use predictors
|
|
+ * other than the RSB. With eIBRS
|
|
+ * enabled predictions in kernel mode
|
|
+ * are restricted to targets in
|
|
+ * kernel.
|
|
+ */
|
|
|
|
#define MSR_IA32_FLUSH_CMD 0x0000010b
|
|
#define L1D_FLUSH BIT(0) /*
|
|
@@ -552,6 +562,9 @@
|
|
/* Fam 17h MSRs */
|
|
#define MSR_F17H_IRPERF 0xc00000e9
|
|
|
|
+#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
|
|
+#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
|
|
+
|
|
/* Fam 16h MSRs */
|
|
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
|
|
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
|
|
diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
|
|
index c81ea2264ad8..376110ead758 100644
|
|
--- a/tools/include/linux/objtool.h
|
|
+++ b/tools/include/linux/objtool.h
|
|
@@ -32,11 +32,16 @@ struct unwind_hint {
|
|
*
|
|
* UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
|
|
* Useful for code which doesn't have an ELF function annotation.
|
|
+ *
|
|
+ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
|
|
*/
|
|
#define UNWIND_HINT_TYPE_CALL 0
|
|
#define UNWIND_HINT_TYPE_REGS 1
|
|
#define UNWIND_HINT_TYPE_REGS_PARTIAL 2
|
|
#define UNWIND_HINT_TYPE_FUNC 3
|
|
+#define UNWIND_HINT_TYPE_ENTRY 4
|
|
+#define UNWIND_HINT_TYPE_SAVE 5
|
|
+#define UNWIND_HINT_TYPE_RESTORE 6
|
|
|
|
#ifdef CONFIG_STACK_VALIDATION
|
|
|
|
@@ -122,7 +127,7 @@ struct unwind_hint {
|
|
* the debuginfo as necessary. It will also warn if it sees any
|
|
* inconsistencies.
|
|
*/
|
|
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
|
|
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
|
|
.Lunwind_hint_ip_\@:
|
|
.pushsection .discard.unwind_hints
|
|
/* struct unwind_hint */
|
|
@@ -175,7 +180,7 @@ struct unwind_hint {
|
|
#define ASM_REACHABLE
|
|
#else
|
|
#define ANNOTATE_INTRA_FUNCTION_CALL
|
|
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
|
|
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
|
|
.endm
|
|
.macro STACK_FRAME_NON_STANDARD func:req
|
|
.endm
|
|
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
|
|
index 943cb41cddf7..1ecf50bbd554 100644
|
|
--- a/tools/objtool/arch/x86/decode.c
|
|
+++ b/tools/objtool/arch/x86/decode.c
|
|
@@ -787,3 +787,8 @@ bool arch_is_retpoline(struct symbol *sym)
|
|
{
|
|
return !strncmp(sym->name, "__x86_indirect_", 15);
|
|
}
|
|
+
|
|
+bool arch_is_rethunk(struct symbol *sym)
|
|
+{
|
|
+ return !strcmp(sym->name, "__x86_return_thunk");
|
|
+}
|
|
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
|
|
index fc6975ab8b06..cd4bbc98f8c1 100644
|
|
--- a/tools/objtool/builtin-check.c
|
|
+++ b/tools/objtool/builtin-check.c
|
|
@@ -21,7 +21,7 @@
|
|
|
|
bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
|
|
lto, vmlinux, mcount, noinstr, backup, sls, dryrun,
|
|
- ibt;
|
|
+ ibt, unret, rethunk;
|
|
|
|
static const char * const check_usage[] = {
|
|
"objtool check [<options>] file.o",
|
|
@@ -37,6 +37,8 @@ const struct option check_options[] = {
|
|
OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
|
|
OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
|
|
OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
|
|
+ OPT_BOOLEAN(0, "rethunk", &rethunk, "validate and annotate rethunk usage"),
|
|
+ OPT_BOOLEAN(0, "unret", &unret, "validate entry unret placement"),
|
|
OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
|
|
OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
|
|
OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
|
|
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
|
|
index f66e4ac0af94..57b7a68d3b66 100644
|
|
--- a/tools/objtool/check.c
|
|
+++ b/tools/objtool/check.c
|
|
@@ -374,7 +374,8 @@ static int decode_instructions(struct objtool_file *file)
|
|
sec->text = true;
|
|
|
|
if (!strcmp(sec->name, ".noinstr.text") ||
|
|
- !strcmp(sec->name, ".entry.text"))
|
|
+ !strcmp(sec->name, ".entry.text") ||
|
|
+ !strncmp(sec->name, ".text.__x86.", 12))
|
|
sec->noinstr = true;
|
|
|
|
for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
|
|
@@ -747,6 +748,52 @@ static int create_retpoline_sites_sections(struct objtool_file *file)
|
|
return 0;
|
|
}
|
|
|
|
+static int create_return_sites_sections(struct objtool_file *file)
|
|
+{
|
|
+ struct instruction *insn;
|
|
+ struct section *sec;
|
|
+ int idx;
|
|
+
|
|
+ sec = find_section_by_name(file->elf, ".return_sites");
|
|
+ if (sec) {
|
|
+ WARN("file already has .return_sites, skipping");
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ idx = 0;
|
|
+ list_for_each_entry(insn, &file->return_thunk_list, call_node)
|
|
+ idx++;
|
|
+
|
|
+ if (!idx)
|
|
+ return 0;
|
|
+
|
|
+ sec = elf_create_section(file->elf, ".return_sites", 0,
|
|
+ sizeof(int), idx);
|
|
+ if (!sec) {
|
|
+ WARN("elf_create_section: .return_sites");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ idx = 0;
|
|
+ list_for_each_entry(insn, &file->return_thunk_list, call_node) {
|
|
+
|
|
+ int *site = (int *)sec->data->d_buf + idx;
|
|
+ *site = 0;
|
|
+
|
|
+ if (elf_add_reloc_to_insn(file->elf, sec,
|
|
+ idx * sizeof(int),
|
|
+ R_X86_64_PC32,
|
|
+ insn->sec, insn->offset)) {
|
|
+ WARN("elf_add_reloc_to_insn: .return_sites");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ idx++;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
static int create_ibt_endbr_seal_sections(struct objtool_file *file)
|
|
{
|
|
struct instruction *insn;
|
|
@@ -1081,6 +1128,11 @@ __weak bool arch_is_retpoline(struct symbol *sym)
|
|
return false;
|
|
}
|
|
|
|
+__weak bool arch_is_rethunk(struct symbol *sym)
|
|
+{
|
|
+ return false;
|
|
+}
|
|
+
|
|
#define NEGATIVE_RELOC ((void *)-1L)
|
|
|
|
static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
|
|
@@ -1248,6 +1300,20 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in
|
|
annotate_call_site(file, insn, false);
|
|
}
|
|
|
|
+static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add)
|
|
+{
|
|
+ /*
|
|
+ * Return thunk tail calls are really just returns in disguise,
|
|
+ * so convert them accordingly.
|
|
+ */
|
|
+ insn->type = INSN_RETURN;
|
|
+ insn->retpoline_safe = true;
|
|
+
|
|
+ /* Skip the non-text sections, specially .discard ones */
|
|
+ if (add && insn->sec->text)
|
|
+ list_add_tail(&insn->call_node, &file->return_thunk_list);
|
|
+}
|
|
+
|
|
static bool same_function(struct instruction *insn1, struct instruction *insn2)
|
|
{
|
|
return insn1->func->pfunc == insn2->func->pfunc;
|
|
@@ -1300,6 +1366,9 @@ static int add_jump_destinations(struct objtool_file *file)
|
|
} else if (reloc->sym->retpoline_thunk) {
|
|
add_retpoline_call(file, insn);
|
|
continue;
|
|
+ } else if (reloc->sym->return_thunk) {
|
|
+ add_return_call(file, insn, true);
|
|
+ continue;
|
|
} else if (insn->func) {
|
|
/*
|
|
* External sibling call or internal sibling call with
|
|
@@ -1318,6 +1387,21 @@ static int add_jump_destinations(struct objtool_file *file)
|
|
|
|
jump_dest = find_insn(file, dest_sec, dest_off);
|
|
if (!jump_dest) {
|
|
+ struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
|
|
+
|
|
+ /*
|
|
+ * This is a special case for zen_untrain_ret().
|
|
+ * It jumps to __x86_return_thunk(), but objtool
|
|
+ * can't find the thunk's starting RET
|
|
+ * instruction, because the RET is also in the
|
|
+ * middle of another instruction. Objtool only
|
|
+ * knows about the outer instruction.
|
|
+ */
|
|
+ if (sym && sym->return_thunk) {
|
|
+ add_return_call(file, insn, false);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
|
|
insn->sec, insn->offset, dest_sec->name,
|
|
dest_off);
|
|
@@ -1947,16 +2031,35 @@ static int read_unwind_hints(struct objtool_file *file)
|
|
|
|
insn->hint = true;
|
|
|
|
- if (ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
|
|
+ if (hint->type == UNWIND_HINT_TYPE_SAVE) {
|
|
+ insn->hint = false;
|
|
+ insn->save = true;
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
|
|
+ insn->restore = true;
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
|
|
struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
|
|
|
|
- if (sym && sym->bind == STB_GLOBAL &&
|
|
- insn->type != INSN_ENDBR && !insn->noendbr) {
|
|
- WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR",
|
|
- insn->sec, insn->offset);
|
|
+ if (sym && sym->bind == STB_GLOBAL) {
|
|
+ if (ibt && insn->type != INSN_ENDBR && !insn->noendbr) {
|
|
+ WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR",
|
|
+ insn->sec, insn->offset);
|
|
+ }
|
|
+
|
|
+ insn->entry = 1;
|
|
}
|
|
}
|
|
|
|
+ if (hint->type == UNWIND_HINT_TYPE_ENTRY) {
|
|
+ hint->type = UNWIND_HINT_TYPE_CALL;
|
|
+ insn->entry = 1;
|
|
+ }
|
|
+
|
|
if (hint->type == UNWIND_HINT_TYPE_FUNC) {
|
|
insn->cfi = &func_cfi;
|
|
continue;
|
|
@@ -2030,8 +2133,10 @@ static int read_retpoline_hints(struct objtool_file *file)
|
|
}
|
|
|
|
if (insn->type != INSN_JUMP_DYNAMIC &&
|
|
- insn->type != INSN_CALL_DYNAMIC) {
|
|
- WARN_FUNC("retpoline_safe hint not an indirect jump/call",
|
|
+ insn->type != INSN_CALL_DYNAMIC &&
|
|
+ insn->type != INSN_RETURN &&
|
|
+ insn->type != INSN_NOP) {
|
|
+ WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop",
|
|
insn->sec, insn->offset);
|
|
return -1;
|
|
}
|
|
@@ -2182,6 +2287,9 @@ static int classify_symbols(struct objtool_file *file)
|
|
if (arch_is_retpoline(func))
|
|
func->retpoline_thunk = true;
|
|
|
|
+ if (arch_is_rethunk(func))
|
|
+ func->return_thunk = true;
|
|
+
|
|
if (!strcmp(func->name, "__fentry__"))
|
|
func->fentry = true;
|
|
|
|
@@ -3324,8 +3432,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
|
|
return 1;
|
|
}
|
|
|
|
- visited = 1 << state.uaccess;
|
|
- if (insn->visited) {
|
|
+ visited = VISITED_BRANCH << state.uaccess;
|
|
+ if (insn->visited & VISITED_BRANCH_MASK) {
|
|
if (!insn->hint && !insn_cfi_match(insn, &state.cfi))
|
|
return 1;
|
|
|
|
@@ -3339,6 +3447,35 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
|
|
state.instr += insn->instr;
|
|
|
|
if (insn->hint) {
|
|
+ if (insn->restore) {
|
|
+ struct instruction *save_insn, *i;
|
|
+
|
|
+ i = insn;
|
|
+ save_insn = NULL;
|
|
+
|
|
+ sym_for_each_insn_continue_reverse(file, func, i) {
|
|
+ if (i->save) {
|
|
+ save_insn = i;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (!save_insn) {
|
|
+ WARN_FUNC("no corresponding CFI save for CFI restore",
|
|
+ sec, insn->offset);
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ if (!save_insn->visited) {
|
|
+ WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
|
|
+ sec, insn->offset);
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ insn->cfi = save_insn->cfi;
|
|
+ nr_cfi_reused++;
|
|
+ }
|
|
+
|
|
state.cfi = *insn->cfi;
|
|
} else {
|
|
/* XXX track if we actually changed state.cfi */
|
|
@@ -3554,6 +3691,145 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
|
|
return warnings;
|
|
}
|
|
|
|
+/*
|
|
+ * Validate rethunk entry constraint: must untrain RET before the first RET.
|
|
+ *
|
|
+ * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes
|
|
+ * before an actual RET instruction.
|
|
+ */
|
|
+static int validate_entry(struct objtool_file *file, struct instruction *insn)
|
|
+{
|
|
+ struct instruction *next, *dest;
|
|
+ int ret, warnings = 0;
|
|
+
|
|
+ for (;;) {
|
|
+ next = next_insn_to_validate(file, insn);
|
|
+
|
|
+ if (insn->visited & VISITED_ENTRY)
|
|
+ return 0;
|
|
+
|
|
+ insn->visited |= VISITED_ENTRY;
|
|
+
|
|
+ if (!insn->ignore_alts && !list_empty(&insn->alts)) {
|
|
+ struct alternative *alt;
|
|
+ bool skip_orig = false;
|
|
+
|
|
+ list_for_each_entry(alt, &insn->alts, list) {
|
|
+ if (alt->skip_orig)
|
|
+ skip_orig = true;
|
|
+
|
|
+ ret = validate_entry(file, alt->insn);
|
|
+ if (ret) {
|
|
+ if (backtrace)
|
|
+ BT_FUNC("(alt)", insn);
|
|
+ return ret;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (skip_orig)
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ switch (insn->type) {
|
|
+
|
|
+ case INSN_CALL_DYNAMIC:
|
|
+ case INSN_JUMP_DYNAMIC:
|
|
+ case INSN_JUMP_DYNAMIC_CONDITIONAL:
|
|
+ WARN_FUNC("early indirect call", insn->sec, insn->offset);
|
|
+ return 1;
|
|
+
|
|
+ case INSN_JUMP_UNCONDITIONAL:
|
|
+ case INSN_JUMP_CONDITIONAL:
|
|
+ if (!is_sibling_call(insn)) {
|
|
+ if (!insn->jump_dest) {
|
|
+ WARN_FUNC("unresolved jump target after linking?!?",
|
|
+ insn->sec, insn->offset);
|
|
+ return -1;
|
|
+ }
|
|
+ ret = validate_entry(file, insn->jump_dest);
|
|
+ if (ret) {
|
|
+ if (backtrace) {
|
|
+ BT_FUNC("(branch%s)", insn,
|
|
+ insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : "");
|
|
+ }
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ if (insn->type == INSN_JUMP_UNCONDITIONAL)
|
|
+ return 0;
|
|
+
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* fallthrough */
|
|
+ case INSN_CALL:
|
|
+ dest = find_insn(file, insn->call_dest->sec,
|
|
+ insn->call_dest->offset);
|
|
+ if (!dest) {
|
|
+ WARN("Unresolved function after linking!?: %s",
|
|
+ insn->call_dest->name);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ ret = validate_entry(file, dest);
|
|
+ if (ret) {
|
|
+ if (backtrace)
|
|
+ BT_FUNC("(call)", insn);
|
|
+ return ret;
|
|
+ }
|
|
+ /*
|
|
+ * If a call returns without error, it must have seen UNTRAIN_RET.
|
|
+ * Therefore any non-error return is a success.
|
|
+ */
|
|
+ return 0;
|
|
+
|
|
+ case INSN_RETURN:
|
|
+ WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset);
|
|
+ return 1;
|
|
+
|
|
+ case INSN_NOP:
|
|
+ if (insn->retpoline_safe)
|
|
+ return 0;
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (!next) {
|
|
+ WARN_FUNC("teh end!", insn->sec, insn->offset);
|
|
+ return -1;
|
|
+ }
|
|
+ insn = next;
|
|
+ }
|
|
+
|
|
+ return warnings;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Validate that all branches starting at 'insn->entry' encounter UNRET_END
|
|
+ * before RET.
|
|
+ */
|
|
+static int validate_unret(struct objtool_file *file)
|
|
+{
|
|
+ struct instruction *insn;
|
|
+ int ret, warnings = 0;
|
|
+
|
|
+ for_each_insn(file, insn) {
|
|
+ if (!insn->entry)
|
|
+ continue;
|
|
+
|
|
+ ret = validate_entry(file, insn);
|
|
+ if (ret < 0) {
|
|
+ WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset);
|
|
+ return ret;
|
|
+ }
|
|
+ warnings += ret;
|
|
+ }
|
|
+
|
|
+ return warnings;
|
|
+}
|
|
+
|
|
static int validate_retpoline(struct objtool_file *file)
|
|
{
|
|
struct instruction *insn;
|
|
@@ -3561,7 +3837,8 @@ static int validate_retpoline(struct objtool_file *file)
|
|
|
|
for_each_insn(file, insn) {
|
|
if (insn->type != INSN_JUMP_DYNAMIC &&
|
|
- insn->type != INSN_CALL_DYNAMIC)
|
|
+ insn->type != INSN_CALL_DYNAMIC &&
|
|
+ insn->type != INSN_RETURN)
|
|
continue;
|
|
|
|
if (insn->retpoline_safe)
|
|
@@ -3576,9 +3853,17 @@ static int validate_retpoline(struct objtool_file *file)
|
|
if (!strcmp(insn->sec->name, ".init.text") && !module)
|
|
continue;
|
|
|
|
- WARN_FUNC("indirect %s found in RETPOLINE build",
|
|
- insn->sec, insn->offset,
|
|
- insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
|
|
+ if (insn->type == INSN_RETURN) {
|
|
+ if (rethunk) {
|
|
+ WARN_FUNC("'naked' return found in RETHUNK build",
|
|
+ insn->sec, insn->offset);
|
|
+ } else
|
|
+ continue;
|
|
+ } else {
|
|
+ WARN_FUNC("indirect %s found in RETPOLINE build",
|
|
+ insn->sec, insn->offset,
|
|
+ insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
|
|
+ }
|
|
|
|
warnings++;
|
|
}
|
|
@@ -3911,6 +4196,17 @@ int check(struct objtool_file *file)
|
|
goto out;
|
|
warnings += ret;
|
|
|
|
+ if (unret) {
|
|
+ /*
|
|
+ * Must be after validate_branch() and friends, it plays
|
|
+ * further games with insn->visited.
|
|
+ */
|
|
+ ret = validate_unret(file);
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+ warnings += ret;
|
|
+ }
|
|
+
|
|
if (ibt) {
|
|
ret = validate_ibt(file);
|
|
if (ret < 0)
|
|
@@ -3937,6 +4233,13 @@ int check(struct objtool_file *file)
|
|
warnings += ret;
|
|
}
|
|
|
|
+ if (rethunk) {
|
|
+ ret = create_return_sites_sections(file);
|
|
+ if (ret < 0)
|
|
+ goto out;
|
|
+ warnings += ret;
|
|
+ }
|
|
+
|
|
if (mcount) {
|
|
ret = create_mcount_loc_sections(file);
|
|
if (ret < 0)
|
|
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
|
|
index 9b19cc304195..beb2f3aa94ff 100644
|
|
--- a/tools/objtool/include/objtool/arch.h
|
|
+++ b/tools/objtool/include/objtool/arch.h
|
|
@@ -89,6 +89,7 @@ const char *arch_ret_insn(int len);
|
|
int arch_decode_hint_reg(u8 sp_reg, int *base);
|
|
|
|
bool arch_is_retpoline(struct symbol *sym);
|
|
+bool arch_is_rethunk(struct symbol *sym);
|
|
|
|
int arch_rewrite_retpolines(struct objtool_file *file);
|
|
|
|
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
|
|
index c39dbfaef6dc..b6bb605faf3f 100644
|
|
--- a/tools/objtool/include/objtool/builtin.h
|
|
+++ b/tools/objtool/include/objtool/builtin.h
|
|
@@ -10,7 +10,7 @@
|
|
extern const struct option check_options[];
|
|
extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
|
|
lto, vmlinux, mcount, noinstr, backup, sls, dryrun,
|
|
- ibt;
|
|
+ ibt, unret, rethunk;
|
|
|
|
extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
|
|
|
|
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
|
|
index f10d7374f388..036129cebeee 100644
|
|
--- a/tools/objtool/include/objtool/check.h
|
|
+++ b/tools/objtool/include/objtool/check.h
|
|
@@ -46,16 +46,19 @@ struct instruction {
|
|
enum insn_type type;
|
|
unsigned long immediate;
|
|
|
|
- u8 dead_end : 1,
|
|
- ignore : 1,
|
|
- ignore_alts : 1,
|
|
- hint : 1,
|
|
- retpoline_safe : 1,
|
|
- noendbr : 1;
|
|
- /* 2 bit hole */
|
|
+ u16 dead_end : 1,
|
|
+ ignore : 1,
|
|
+ ignore_alts : 1,
|
|
+ hint : 1,
|
|
+ save : 1,
|
|
+ restore : 1,
|
|
+ retpoline_safe : 1,
|
|
+ noendbr : 1,
|
|
+ entry : 1;
|
|
+ /* 7 bit hole */
|
|
+
|
|
s8 instr;
|
|
u8 visited;
|
|
- /* u8 hole */
|
|
|
|
struct alt_group *alt_group;
|
|
struct symbol *call_dest;
|
|
@@ -69,6 +72,11 @@ struct instruction {
|
|
struct cfi_state *cfi;
|
|
};
|
|
|
|
+#define VISITED_BRANCH 0x01
|
|
+#define VISITED_BRANCH_UACCESS 0x02
|
|
+#define VISITED_BRANCH_MASK 0x03
|
|
+#define VISITED_ENTRY 0x04
|
|
+
|
|
static inline bool is_static_jump(struct instruction *insn)
|
|
{
|
|
return insn->type == INSN_JUMP_CONDITIONAL ||
|
|
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
|
|
index 82e57eb4b4c5..94a618e2a79e 100644
|
|
--- a/tools/objtool/include/objtool/elf.h
|
|
+++ b/tools/objtool/include/objtool/elf.h
|
|
@@ -57,6 +57,7 @@ struct symbol {
|
|
u8 uaccess_safe : 1;
|
|
u8 static_call_tramp : 1;
|
|
u8 retpoline_thunk : 1;
|
|
+ u8 return_thunk : 1;
|
|
u8 fentry : 1;
|
|
u8 profiling_func : 1;
|
|
struct list_head pv_target;
|
|
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
|
|
index a6e72d916807..7f2d1b095333 100644
|
|
--- a/tools/objtool/include/objtool/objtool.h
|
|
+++ b/tools/objtool/include/objtool/objtool.h
|
|
@@ -24,6 +24,7 @@ struct objtool_file {
|
|
struct list_head insn_list;
|
|
DECLARE_HASHTABLE(insn_hash, 20);
|
|
struct list_head retpoline_call_list;
|
|
+ struct list_head return_thunk_list;
|
|
struct list_head static_call_list;
|
|
struct list_head mcount_loc_list;
|
|
struct list_head endbr_list;
|
|
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
|
|
index 843ff3c2f28e..983687345d35 100644
|
|
--- a/tools/objtool/objtool.c
|
|
+++ b/tools/objtool/objtool.c
|
|
@@ -126,6 +126,7 @@ struct objtool_file *objtool_open_read(const char *_objname)
|
|
INIT_LIST_HEAD(&file.insn_list);
|
|
hash_init(file.insn_hash);
|
|
INIT_LIST_HEAD(&file.retpoline_call_list);
|
|
+ INIT_LIST_HEAD(&file.return_thunk_list);
|
|
INIT_LIST_HEAD(&file.static_call_list);
|
|
INIT_LIST_HEAD(&file.mcount_loc_list);
|
|
INIT_LIST_HEAD(&file.endbr_list);
|