From 8feaa64a9a69652fdff87205f8a8cfe1bfd5b522 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 18 Dec 2016 17:44:11 +0100 Subject: [PATCH 01/24] x86/microcode/AMD: Make find_proper_container() sane again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixup signature and retvals, return the container struct through the passed in pointer, not as a function return value. Signed-off-by: Borislav Petkov Cc: Jürgen Gross Cc: Boris Ostrovsky Link: http://lkml.kernel.org/r/20161218164414.9649-2-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/amd.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 6f353bdb3a25..31f4e3f94d46 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -116,10 +116,11 @@ static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table, /* * This scans the ucode blob for the proper container as we can have multiple - * containers glued together. + * containers glued together. Returns the equivalence ID from the equivalence + * table or 0 if none found. */ -static struct container -find_proper_container(u8 *ucode, size_t size, u16 *ret_id) +static u16 +find_proper_container(u8 *ucode, size_t size, struct container *ret_cont) { struct container ret = { NULL, 0 }; u32 eax, ebx, ecx, edx; @@ -138,7 +139,7 @@ find_proper_container(u8 *ucode, size_t size, u16 *ret_id) if (header[0] != UCODE_MAGIC || header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ header[2] == 0) /* size */ - return ret; + return eq_id; eax = 0x00000001; ecx = 0; @@ -163,8 +164,9 @@ find_proper_container(u8 *ucode, size_t size, u16 *ret_id) * ucode update loop below */ left = ret.size - offset; - *ret_id = eq_id; - return ret; + + *ret_cont = ret; + return eq_id; } /* @@ -189,7 +191,7 @@ find_proper_container(u8 *ucode, size_t size, u16 *ret_id) ucode = data; } - return ret; + return eq_id; } static int __apply_microcode_amd(struct microcode_amd *mc_amd) @@ -237,7 +239,7 @@ apply_microcode_early_amd(void *ucode, size_t size, bool save_patch) if (check_current_patch_level(&rev, true)) return (struct container){ NULL, 0 }; - ret = find_proper_container(ucode, size, &eq_id); + eq_id = find_proper_container(ucode, size, &ret); if (!eq_id) return (struct container){ NULL, 0 }; @@ -443,7 +445,7 @@ int __init save_microcode_in_initrd_amd(unsigned int fam) return -EINVAL; } - cont = find_proper_container(cp.data, cp.size, &eq_id); + eq_id = find_proper_container(cp.data, cp.size, &cont); if (!eq_id) { cont.size = -1; return -EINVAL; From 200d3553163f6065a0f1f142f92d1cf716d586c2 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 18 Dec 2016 17:44:12 +0100 Subject: [PATCH 02/24] x86/microcode/AMD: Sanitize apply_microcode_early_amd() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it simply return bool to denote whether it found a container or not and return the pointer to the container and its size in the handed-in container pointer instead, as returning a struct was just silly. Signed-off-by: Borislav Petkov Cc: Jürgen Gross Cc: Boris Ostrovsky Link: http://lkml.kernel.org/r/20161218164414.9649-3-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/amd.c | 30 ++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 31f4e3f94d46..dc80acfa922d 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -216,17 +216,18 @@ static int __apply_microcode_amd(struct microcode_amd *mc_amd) * and on 32-bit during save_microcode_in_initrd_amd() -- we can call * load_microcode_amd() to save equivalent cpu table and microcode patches in * kernel heap memory. + * + * Returns true if container found (sets @ret_cont), false otherwise. */ -static struct container -apply_microcode_early_amd(void *ucode, size_t size, bool save_patch) +static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch, + struct container *ret_cont) { - struct container ret = { NULL, 0 }; u8 (*patch)[PATCH_MAX_SIZE]; + u32 rev, *header, *new_rev; + struct container ret; int offset, left; - u32 rev, *header; - u8 *data; u16 eq_id = 0; - u32 *new_rev; + u8 *data; #ifdef CONFIG_X86_32 new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); @@ -237,11 +238,11 @@ apply_microcode_early_amd(void *ucode, size_t size, bool save_patch) #endif if (check_current_patch_level(&rev, true)) - return (struct container){ NULL, 0 }; + return false; eq_id = find_proper_container(ucode, size, &ret); if (!eq_id) - return (struct container){ NULL, 0 }; + return false; this_equiv_id = eq_id; header = (u32 *)ret.data; @@ -275,7 +276,11 @@ apply_microcode_early_amd(void *ucode, size_t size, bool save_patch) data += offset; left -= offset; } - return ret; + + if (ret_cont) + *ret_cont = ret; + + return true; } static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) @@ -319,7 +324,7 @@ void __init load_ucode_amd_bsp(unsigned int family) /* Get BSP's CPUID.EAX(1), needed in load_microcode_amd() */ uci->cpu_sig.sig = cpuid_eax(1); - apply_microcode_early_amd(cp.data, cp.size, true); + apply_microcode_early_amd(cp.data, cp.size, true, NULL); } #ifdef CONFIG_X86_32 @@ -351,7 +356,7 @@ void load_ucode_amd_ap(unsigned int family) * This would set amd_ucode_patch above so that the following APs can * use it directly instead of going down this path again. */ - apply_microcode_early_amd(cp.data, cp.size, true); + apply_microcode_early_amd(cp.data, cp.size, true, NULL); } #else void load_ucode_amd_ap(unsigned int family) @@ -389,8 +394,7 @@ reget: } } - cont = apply_microcode_early_amd(cp.data, cp.size, false); - if (!(cont.data && cont.size)) { + if (!apply_microcode_early_amd(cp.data, cp.size, false, &cont)) { cont.size = -1; return; } From a15a753539eca8ba243d576f02e7ca9c4b7d7042 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 18 Dec 2016 17:44:13 +0100 Subject: [PATCH 03/24] x86/microcode/AMD: Do not load when running on a hypervisor Doing so is completely void of sense for multiple reasons so prevent it. Set dis_ucode_ldr to true and thus disable the microcode loader by default to address xen pv guests which execute the AP path but not the BSP path. By having it turned off by default, the APs won't run into the loader either. Also, check CPUID(1).ECX[31] which hypervisors set. Well almost, not the xen pv one. That one gets the aforementioned "fix". Also, improve the detection method by caching the final decision whether to continue loading in dis_ucode_ldr and do it once on the BSP. The APs then simply test that value. Signed-off-by: Borislav Petkov Tested-by: Juergen Gross Tested-by: Boris Ostrovsky Acked-by: Juergen Gross Link: http://lkml.kernel.org/r/20161218164414.9649-4-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/core.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 6996413c78c3..c4bb2f7169f6 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -44,7 +44,7 @@ #define DRIVER_VERSION "2.2" static struct microcode_ops *microcode_ops; -static bool dis_ucode_ldr; +static bool dis_ucode_ldr = true; LIST_HEAD(microcode_cache); @@ -76,6 +76,7 @@ struct cpu_info_ctx { static bool __init check_loader_disabled_bsp(void) { static const char *__dis_opt_str = "dis_ucode_ldr"; + u32 a, b, c, d; #ifdef CONFIG_X86_32 const char *cmdline = (const char *)__pa_nodebug(boot_command_line); @@ -88,8 +89,23 @@ static bool __init check_loader_disabled_bsp(void) bool *res = &dis_ucode_ldr; #endif - if (cmdline_find_option_bool(cmdline, option)) - *res = true; + if (!have_cpuid_p()) + return *res; + + a = 1; + c = 0; + native_cpuid(&a, &b, &c, &d); + + /* + * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not + * completely accurate as xen pv guests don't see that CPUID bit set but + * that's good enough as they don't land on the BSP path anyway. + */ + if (c & BIT(31)) + return *res; + + if (cmdline_find_option_bool(cmdline, option) <= 0) + *res = false; return *res; } @@ -121,9 +137,6 @@ void __init load_ucode_bsp(void) if (check_loader_disabled_bsp()) return; - if (!have_cpuid_p()) - return; - vendor = x86_cpuid_vendor(); family = x86_cpuid_family(); @@ -157,9 +170,6 @@ void load_ucode_ap(void) if (check_loader_disabled_ap()) return; - if (!have_cpuid_p()) - return; - vendor = x86_cpuid_vendor(); family = x86_cpuid_family(); From 2b4c91569a40c4512ea1b413e0c817d179ce9868 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Sun, 18 Dec 2016 17:44:14 +0100 Subject: [PATCH 04/24] x86/microcode/AMD: Use native_cpuid() in load_ucode_amd_bsp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_PARAVIRT is selected, cpuid() becomes a call. Since for 32-bit kernels load_ucode_amd_bsp() is executed before paging is enabled the call cannot be completed (as kernel virtual addresses are not reachable yet). Use native_cpuid() instead which is an asm wrapper for the CPUID instruction. Signed-off-by: Boris Ostrovsky Signed-off-by: Borislav Petkov Cc: Jürgen Gross Link: http://lkml.kernel.org/r/1481906392-3847-1-git-send-email-boris.ostrovsky@oracle.com Link: http://lkml.kernel.org/r/20161218164414.9649-5-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/amd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index dc80acfa922d..6a31e2691f3a 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -301,6 +301,7 @@ static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) void __init load_ucode_amd_bsp(unsigned int family) { struct ucode_cpu_info *uci; + u32 eax, ebx, ecx, edx; struct cpio_data cp; const char *path; bool use_pa; @@ -322,7 +323,10 @@ void __init load_ucode_amd_bsp(unsigned int family) return; /* Get BSP's CPUID.EAX(1), needed in load_microcode_amd() */ - uci->cpu_sig.sig = cpuid_eax(1); + eax = 1; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + uci->cpu_sig.sig = eax; apply_microcode_early_amd(cp.data, cp.size, true, NULL); } From 93ffa9a479ffb65d045e74e141346e7f107fcde1 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 9 Dec 2016 12:57:38 -0800 Subject: [PATCH 05/24] x86/init: Add i8042 state to the platform data Add i8042 state to the platform data to help i8042 driver make decision whether to probe for i8042 or not. We recognize 3 states: platform/subarch ca not possible have i8042 (as is the case with Inrel MID platform), firmware (such as ACPI) reports that i8042 is absent from the device, or i8042 may be present and the driver should probe for it. The intent is to allow i8042 driver abort initialization on x86 if PNP data (absence of both keyboard and mouse PNP devices) agrees with firmware data. It will also allow us to remove i8042_detect later. Signed-off-by: Dmitry Torokhov Tested-by: Takashi Iwai Acked-by: Marcos Paulo de Souza Cc: linux-input@vger.kernel.org Link: http://lkml.kernel.org/r/1481317061-31486-2-git-send-email-dmitry.torokhov@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/x86_init.h | 18 ++++++++++++++++++ arch/x86/kernel/acpi/boot.c | 7 +++++++ arch/x86/kernel/platform-quirks.c | 5 +++++ 3 files changed, 30 insertions(+) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 6ba793178441..c4d09c797cf7 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -164,9 +164,26 @@ struct x86_legacy_devices { int pnpbios; }; +/** + * enum x86_legacy_i8042_state - i8042 keyboard controller state + * @X86_LEGACY_I8042_PLATFORM_ABSENT: the controller is always absent on + * given platform/subarch. + * @X86_LEGACY_I8042_FIRMWARE_ABSENT: firmware reports that the controller + * is absent. + * @X86_LEGACY_i8042_EXPECTED_PRESENT: the controller is likely to be + * present, the i8042 driver should probe for controller existence. + */ +enum x86_legacy_i8042_state { + X86_LEGACY_I8042_PLATFORM_ABSENT, + X86_LEGACY_I8042_FIRMWARE_ABSENT, + X86_LEGACY_I8042_EXPECTED_PRESENT, +}; + /** * struct x86_legacy_features - legacy x86 features * + * @i8042: indicated if we expect the device to have i8042 controller + * present. * @rtc: this device has a CMOS real-time clock present * @reserve_bios_regions: boot code will search for the EBDA address and the * start of the 640k - 1M BIOS region. If false, the platform must @@ -175,6 +192,7 @@ struct x86_legacy_devices { * documentation for further details. */ struct x86_legacy_features { + enum x86_legacy_i8042_state i8042; int rtc; int reserve_bios_regions; struct x86_legacy_devices devices; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 6f65b0eed384..64422f850e95 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -930,6 +930,13 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table) x86_platform.legacy.devices.pnpbios = 0; } + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && + !(acpi_gbl_FADT.boot_flags & ACPI_FADT_8042) && + x86_platform.legacy.i8042 != X86_LEGACY_I8042_PLATFORM_ABSENT) { + pr_debug("ACPI: i8042 controller is absent\n"); + x86_platform.legacy.i8042 = X86_LEGACY_I8042_FIRMWARE_ABSENT; + } + if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) { pr_debug("ACPI: not registering RTC platform device\n"); x86_platform.legacy.rtc = 0; diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c index 24a50301f150..91271122f0df 100644 --- a/arch/x86/kernel/platform-quirks.c +++ b/arch/x86/kernel/platform-quirks.c @@ -6,6 +6,7 @@ void __init x86_early_init_platform_quirks(void) { + x86_platform.legacy.i8042 = X86_LEGACY_I8042_EXPECTED_PRESENT; x86_platform.legacy.rtc = 1; x86_platform.legacy.reserve_bios_regions = 0; x86_platform.legacy.devices.pnpbios = 1; @@ -16,10 +17,14 @@ void __init x86_early_init_platform_quirks(void) break; case X86_SUBARCH_XEN: case X86_SUBARCH_LGUEST: + x86_platform.legacy.devices.pnpbios = 0; + x86_platform.legacy.rtc = 0; + break; case X86_SUBARCH_INTEL_MID: case X86_SUBARCH_CE4100: x86_platform.legacy.devices.pnpbios = 0; x86_platform.legacy.rtc = 0; + x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT; break; } From d79e141c1c6ea7cb70c169971d522b88c8d5b419 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 9 Dec 2016 12:57:39 -0800 Subject: [PATCH 06/24] Input: i8042 - Trust firmware a bit more when probing on X86 The error message "Can't read CTR while initializing i8042" appears on Cherry Trail-based devices at each boot time: i8042: PNP: No PS/2 controller found. Probing ports directly. i8042: Can't read CTR while initializing i8042 i8042: probe of i8042 failed with error -5 This happens because we historically do not trust firmware on X86 and, while noting that PNP does not show keyboard or mouse devices, we still charge ahead and try to probe the controller. Let's relax this a bit and if results of PNP probe agree with the results of platform initialization/quirks conclude that there is, in fact, no i8042. While at it, let's avoid using x86_platform.i8042_detect() and instead abort execution early if platform indicates that it can not possibly have i8042 (x86_platform.legacy.i8042 equals X86_LEGACY_I8042_PLATFORM_ABSENT). Reported-and-tested-by: Takashi Iwai Signed-off-by: Dmitry Torokhov Acked-by: Marcos Paulo de Souza Cc: linux-input@vger.kernel.org Link: http://lkml.kernel.org/r/1481317061-31486-3-git-send-email-dmitry.torokhov@gmail.com Signed-off-by: Thomas Gleixner --- drivers/input/serio/i8042-x86ia64io.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 73a4e68448fc..77551f522202 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -983,7 +983,11 @@ static int __init i8042_pnp_init(void) #if defined(__ia64__) return -ENODEV; #else - pr_info("PNP: No PS/2 controller found. Probing ports directly.\n"); + pr_info("PNP: No PS/2 controller found.\n"); + if (x86_platform.legacy.i8042 != + X86_LEGACY_I8042_EXPECTED_PRESENT) + return -ENODEV; + pr_info("Probing ports directly.\n"); return 0; #endif } @@ -1070,8 +1074,8 @@ static int __init i8042_platform_init(void) #ifdef CONFIG_X86 u8 a20_on = 0xdf; - /* Just return if pre-detection shows no i8042 controller exist */ - if (!x86_platform.i8042_detect()) + /* Just return if platform does not have i8042 controller */ + if (x86_platform.legacy.i8042 == X86_LEGACY_I8042_PLATFORM_ABSENT) return -ENODEV; #endif From 32786fdc9506aeba98278c1844d4bfb766863832 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 9 Dec 2016 12:57:40 -0800 Subject: [PATCH 07/24] x86/init: Remove i8042_detect() from platform ops Now that i8042 uses flag in legacy platform data, i8042_detect() is no longer used and can be removed. Signed-off-by: Dmitry Torokhov Tested-by: Takashi Iwai Acked-by: Marcos Paulo de Souza Cc: linux-input@vger.kernel.org Link: http://lkml.kernel.org/r/1481317061-31486-4-git-send-email-dmitry.torokhov@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/x86_init.h | 2 -- arch/x86/kernel/x86_init.c | 2 -- arch/x86/platform/ce4100/ce4100.c | 6 ------ arch/x86/platform/intel-mid/intel-mid.c | 7 ------- 4 files changed, 17 deletions(-) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index c4d09c797cf7..85b2ae534179 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -206,7 +206,6 @@ struct x86_legacy_features { * @set_wallclock: set time back to HW clock * @is_untracked_pat_range exclude from PAT logic * @nmi_init enable NMI on cpus - * @i8042_detect pre-detect if i8042 controller exists * @save_sched_clock_state: save state for sched_clock() on suspend * @restore_sched_clock_state: restore state for sched_clock() on resume * @apic_post_init: adjust apic if neeeded @@ -228,7 +227,6 @@ struct x86_platform_ops { bool (*is_untracked_pat_range)(u64 start, u64 end); void (*nmi_init)(void); unsigned char (*get_nmi_reason)(void); - int (*i8042_detect)(void); void (*save_sched_clock_state)(void); void (*restore_sched_clock_state)(void); void (*apic_post_init)(void); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 0bd9f1287f39..11a93f005268 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -89,7 +89,6 @@ struct x86_cpuinit_ops x86_cpuinit = { }; static void default_nmi_init(void) { }; -static int default_i8042_detect(void) { return 1; }; struct x86_platform_ops x86_platform __ro_after_init = { .calibrate_cpu = native_calibrate_cpu, @@ -100,7 +99,6 @@ struct x86_platform_ops x86_platform __ro_after_init = { .is_untracked_pat_range = is_ISA_range, .nmi_init = default_nmi_init, .get_nmi_reason = default_get_nmi_reason, - .i8042_detect = default_i8042_detect, .save_sched_clock_state = tsc_save_sched_clock_state, .restore_sched_clock_state = tsc_restore_sched_clock_state, }; diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index 821cb41f00e6..ce4b06733c09 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -23,11 +23,6 @@ #include #include -static int ce4100_i8042_detect(void) -{ - return 0; -} - /* * The CE4100 platform has an internal 8051 Microcontroller which is * responsible for signaling to the external Power Management Unit the @@ -145,7 +140,6 @@ static void sdv_pci_init(void) void __init x86_ce4100_early_setup(void) { x86_init.oem.arch_setup = sdv_arch_setup; - x86_platform.i8042_detect = ce4100_i8042_detect; x86_init.resources.probe_roms = x86_init_noop; x86_init.mpparse.get_smp_config = x86_init_uint_noop; x86_init.mpparse.find_smp_config = x86_init_noop; diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 7850128f0026..12a272582cdc 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -161,12 +161,6 @@ out: regulator_has_full_constraints(); } -/* MID systems don't have i8042 controller */ -static int intel_mid_i8042_detect(void) -{ - return 0; -} - /* * Moorestown does not have external NMI source nor port 0x61 to report * NMI status. The possible NMI sources are from pmu as a result of NMI @@ -197,7 +191,6 @@ void __init x86_intel_mid_early_setup(void) x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock; x86_platform.calibrate_tsc = intel_mid_calibrate_tsc; - x86_platform.i8042_detect = intel_mid_i8042_detect; x86_init.timers.wallclock_init = intel_mid_rtc_init; x86_platform.get_nmi_reason = intel_mid_get_nmi_reason; From 22d3c0d63b1108af0b4ef1cfdad1f6ef0710da30 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 9 Dec 2016 12:57:41 -0800 Subject: [PATCH 08/24] x86/init: Fix a couple of comment typos Signed-off-by: Dmitry Torokhov Acked-by: Marcos Paulo de Souza Cc: linux-input@vger.kernel.org Link: http://lkml.kernel.org/r/1481317061-31486-5-git-send-email-dmitry.torokhov@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/x86_init.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 85b2ae534179..7ba7e90a9ad6 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -59,7 +59,7 @@ struct x86_init_irqs { /** * struct x86_init_oem - oem platform specific customizing functions - * @arch_setup: platform specific architecure setup + * @arch_setup: platform specific architecture setup * @banner: print a platform specific banner */ struct x86_init_oem { @@ -208,12 +208,12 @@ struct x86_legacy_features { * @nmi_init enable NMI on cpus * @save_sched_clock_state: save state for sched_clock() on suspend * @restore_sched_clock_state: restore state for sched_clock() on resume - * @apic_post_init: adjust apic if neeeded + * @apic_post_init: adjust apic if needed * @legacy: legacy features * @set_legacy_features: override legacy features. Use of this callback * is highly discouraged. You should only need * this if your hardware platform requires further - * custom fine tuning far beyong what may be + * custom fine tuning far beyond what may be * possible in x86_early_init_platform_quirks() by * only using the current x86_hardware_subarch * semantics. From 8023e0e2a48d45e8d5363081fad9f7ed4402f953 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 16 Dec 2016 10:05:05 -0600 Subject: [PATCH 09/24] x86/unwind: Adjust last frame check for aligned function stacks Somehow, CONFIG_PARAVIRT=n convinces gcc to change the x86_64_start_kernel() prologue from: 0000000000000129 : 129: 55 push %rbp 12a: 48 89 e5 mov %rsp,%rbp to: 0000000000000124 : 124: 4c 8d 54 24 08 lea 0x8(%rsp),%r10 129: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp 12d: 41 ff 72 f8 pushq -0x8(%r10) 131: 55 push %rbp 132: 48 89 e5 mov %rsp,%rbp This is an unusual pattern which aligns rsp (though in this case it's already aligned) and saves the start_cpu() return address again on the stack before storing the frame pointer. The unwinder assumes the last stack frame header is at a certain offset, but the above code breaks that assumption, resulting in the following warning: WARNING: kernel stack frame pointer at ffffffff82e03f40 in swapper:0 has bad value (null) Fix it by checking for the last task stack frame at the aligned offset in addition to the normal unaligned offset. Fixes: acb4608ad186 ("x86/unwind: Create stack frames for saved syscall registers") Reported-by: Borislav Petkov Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/9d7b4eb8cf55a7d6002cb738f25c23e7429c99a0.1481904011.git.jpoimboe@redhat.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/unwind_frame.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index ea7b7f9a3b9e..33aeaae961aa 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -46,7 +46,14 @@ static bool is_last_task_frame(struct unwind_state *state) unsigned long bp = (unsigned long)state->bp; unsigned long regs = (unsigned long)task_pt_regs(state->task); - return bp == regs - FRAME_HEADER_SIZE; + /* + * We have to check for the last task frame at two different locations + * because gcc can occasionally decide to realign the stack pointer and + * change the offset of the stack frame by a word in the prologue of a + * function called by head/entry code. + */ + return bp == regs - FRAME_HEADER_SIZE || + bp == regs - FRAME_HEADER_SIZE - sizeof(long); } /* From 8b5e99f02264130782a10ba5c0c759797fb064ee Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 16 Dec 2016 10:05:06 -0600 Subject: [PATCH 10/24] x86/unwind: Dump stack data on warnings The unwinder warnings are good at finding unexpected unwinder issues, but they often don't give enough data to be able to fully diagnose them. Print a one-time stack dump when a warning is detected. Signed-off-by: Josh Poimboeuf Cc: Borislav Petkov Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/15607370e3ddb1732b6a73d5c65937864df16ac8.1481904011.git.jpoimboe@redhat.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/unwind.h | 2 +- arch/x86/kernel/unwind_frame.c | 38 ++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index c5a7f3a930dd..6fa75b17aec3 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -12,7 +12,7 @@ struct unwind_state { struct task_struct *task; int graph_idx; #ifdef CONFIG_FRAME_POINTER - unsigned long *bp; + unsigned long *bp, *orig_sp; struct pt_regs *regs; #else unsigned long *sp; diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 33aeaae961aa..20d4b4e0800c 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -6,6 +6,37 @@ #define FRAME_HEADER_SIZE (sizeof(long) * 2) +static void unwind_dump(struct unwind_state *state, unsigned long *sp) +{ + static bool dumped_before = false; + bool prev_zero, zero = false; + unsigned long word; + + if (dumped_before) + return; + + dumped_before = true; + + printk_deferred("unwind stack type:%d next_sp:%p mask:%lx graph_idx:%d\n", + state->stack_info.type, state->stack_info.next_sp, + state->stack_mask, state->graph_idx); + + for (sp = state->orig_sp; sp < state->stack_info.end; sp++) { + word = READ_ONCE_NOCHECK(*sp); + + prev_zero = zero; + zero = word == 0; + + if (zero) { + if (!prev_zero) + printk_deferred("%p: %016x ...\n", sp, 0); + continue; + } + + printk_deferred("%p: %016lx (%pB)\n", sp, word, (void *)word); + } +} + unsigned long unwind_get_return_address(struct unwind_state *state) { unsigned long addr; @@ -25,6 +56,7 @@ unsigned long unwind_get_return_address(struct unwind_state *state) "WARNING: unrecognized kernel stack return address %p at %p in %s:%d\n", (void *)addr, addr_p, state->task->comm, state->task->pid); + unwind_dump(state, addr_p); return 0; } @@ -74,6 +106,7 @@ static bool update_stack_state(struct unwind_state *state, void *addr, size_t len) { struct stack_info *info = &state->stack_info; + enum stack_type orig_type = info->type; /* * If addr isn't on the current stack, switch to the next one. @@ -87,6 +120,9 @@ static bool update_stack_state(struct unwind_state *state, void *addr, &state->stack_mask)) return false; + if (!state->orig_sp || info->type != orig_type) + state->orig_sp = addr; + return true; } @@ -185,11 +221,13 @@ bad_address: "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n", state->regs, state->task->comm, state->task->pid, next_frame); + unwind_dump(state, (unsigned long *)state->regs); } else { printk_deferred_once(KERN_WARNING "WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n", state->bp, state->task->comm, state->task->pid, next_frame); + unwind_dump(state, state->bp); } the_end: state->stack_info.type = STACK_TYPE_UNKNOWN; From 7ebb916782949621ff6819acf373a06902df7679 Mon Sep 17 00:00:00 2001 From: Markus Trippelsdorf Date: Thu, 15 Dec 2016 13:45:13 +0100 Subject: [PATCH 11/24] x86/tools: Fix gcc-7 warning in relocs.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc-7 warns: In file included from arch/x86/tools/relocs_64.c:17:0: arch/x86/tools/relocs.c: In function ‘process_64’: arch/x86/tools/relocs.c:953:2: warning: argument 1 null where non-null expected [-Wnonnull] qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from arch/x86/tools/relocs.h:6:0, from arch/x86/tools/relocs_64.c:1: /usr/include/stdlib.h:741:13: note: in a call to function ‘qsort’ declared here extern void qsort This happens because relocs16 is not used for ELF_BITS == 64, so there is no point in trying to sort it. Make the sort_relocs(&relocs16) call 32bit only. Signed-off-by: Markus Trippelsdorf Link: http://lkml.kernel.org/r/20161215124513.GA289@x4 Signed-off-by: Thomas Gleixner --- arch/x86/tools/relocs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 0c2fae8d929d..73eb7fd4aec4 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -992,11 +992,12 @@ static void emit_relocs(int as_text, int use_real_mode) die("Segment relocations found but --realmode not specified\n"); /* Order the relocations for more efficient processing */ - sort_relocs(&relocs16); sort_relocs(&relocs32); #if ELF_BITS == 64 sort_relocs(&relocs32neg); sort_relocs(&relocs64); +#else + sort_relocs(&relocs16); #endif /* Print the relocations */ From 3df8d9208569ef0b2313e516566222d745f3b94b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 15 Dec 2016 10:14:42 -0800 Subject: [PATCH 12/24] x86/cpu: Probe CPUID leaf 6 even when cpuid_level == 6 A typo (or mis-merge?) resulted in leaf 6 only being probed if cpuid_level >= 7. Fixes: 2ccd71f1b278 ("x86/cpufeature: Move some of the scattered feature bits to x86_capability") Signed-off-by: Andy Lutomirski Acked-by: Borislav Petkov Cc: Brian Gerst Link: http://lkml.kernel.org/r/6ea30c0e9daec21e488b54761881a6dfcf3e04d0.1481825597.git.luto@kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/common.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1f6b50a449ab..dc1697ca5191 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -667,13 +667,14 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_1_EDX] = edx; } + /* Thermal and Power Management Leaf: level 0x00000006 (eax) */ + if (c->cpuid_level >= 0x00000006) + c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006); + /* Additional Intel-defined flags: level 0x00000007 */ if (c->cpuid_level >= 0x00000007) { cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); - c->x86_capability[CPUID_7_0_EBX] = ebx; - - c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006); c->x86_capability[CPUID_7_ECX] = ecx; } From 1c52d859cb2d417e7216d3e56bb7fea88444cec9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 9 Dec 2016 10:24:05 -0800 Subject: [PATCH 13/24] x86/asm/32: Make sync_core() handle missing CPUID on all 32-bit kernels We support various non-Intel CPUs that don't have the CPUID instruction, so the M486 test was wrong. For now, fix it with a big hammer: handle missing CPUID on all 32-bit CPUs. Reported-by: One Thousand Gnomes Signed-off-by: Andy Lutomirski Cc: Juergen Gross Cc: Peter Zijlstra Cc: Brian Gerst Cc: Matthew Whitehead Cc: Borislav Petkov Cc: Henrique de Moraes Holschuh Cc: Andrew Cooper Cc: Boris Ostrovsky Cc: xen-devel Link: http://lkml.kernel.org/r/685bd083a7c036f7769510b6846315b17d6ba71f.1481307769.git.luto@kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6aa741fbe1df..b934871ae226 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -607,7 +607,7 @@ static inline void sync_core(void) { int tmp; -#ifdef CONFIG_M486 +#ifdef CONFIG_X86_32 /* * Do a CPUID if available, otherwise do a jump. The jump * can conveniently enough be the jump around CPUID. From 426d1aff3138cf38da14e912df3c75e312f96e9e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 9 Dec 2016 10:24:06 -0800 Subject: [PATCH 14/24] Revert "x86/boot: Fail the boot if !M486 and CPUID is missing" This reverts commit ed68d7e9b9cfb64f3045ffbcb108df03c09a0f98. The patch wasn't quite correct -- there are non-Intel (and hence non-486) CPUs that we support that don't have CPUID. Since we no longer require CPUID for sync_core(), just revert the patch. I think the relevant CPUs are Geode and Elan, but I'm not sure. In principle, we should try to do better at identifying CPUID-less CPUs in early boot, but that's more complicated. Reported-by: One Thousand Gnomes Signed-off-by: Andy Lutomirski Cc: Juergen Gross Cc: Denys Vlasenko Cc: Peter Zijlstra Cc: Brian Gerst Cc: Josh Poimboeuf Cc: Matthew Whitehead Cc: Borislav Petkov Cc: Henrique de Moraes Holschuh Cc: Andrew Cooper Cc: Boris Ostrovsky Cc: xen-devel Cc: Linus Torvalds Link: http://lkml.kernel.org/r/82acde18a108b8e353180dd6febcc2876df33f24.1481307769.git.luto@kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/boot/cpu.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c index 4224ede43b4e..26240dde081e 100644 --- a/arch/x86/boot/cpu.c +++ b/arch/x86/boot/cpu.c @@ -87,12 +87,6 @@ int validate_cpu(void) return -1; } - if (CONFIG_X86_MINIMUM_CPU_FAMILY <= 4 && !IS_ENABLED(CONFIG_M486) && - !has_eflag(X86_EFLAGS_ID)) { - printf("This kernel requires a CPU with the CPUID instruction. Build with CONFIG_M486=y to run on this CPU.\n"); - return -1; - } - if (err_flags) { puts("This kernel requires the following features " "not present on the CPU:\n"); From 484d0e5c7943644cc46e7308a8f9d83be598f2b9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 9 Dec 2016 10:24:07 -0800 Subject: [PATCH 15/24] x86/microcode/intel: Replace sync_core() with native_cpuid() The Intel microcode driver is using sync_core() to mean "do CPUID with EAX=1". I want to rework sync_core(), but first the Intel microcode driver needs to stop depending on its current behavior. Reported-by: Henrique de Moraes Holschuh Signed-off-by: Andy Lutomirski Acked-by: Borislav Petkov Cc: Juergen Gross Cc: One Thousand Gnomes Cc: Peter Zijlstra Cc: Brian Gerst Cc: Matthew Whitehead Cc: Andrew Cooper Cc: Boris Ostrovsky Cc: xen-devel Link: http://lkml.kernel.org/r/535a025bb91fed1a019c5412b036337ad239e5bb.1481307769.git.luto@kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/intel.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 54d50c3694d8..b624b54912e1 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -368,6 +368,26 @@ next: return patch; } +static void cpuid_1(void) +{ + /* + * According to the Intel SDM, Volume 3, 9.11.7: + * + * CPUID returns a value in a model specific register in + * addition to its usual register return values. The + * semantics of CPUID cause it to deposit an update ID value + * in the 64-bit model-specific register at address 08BH + * (IA32_BIOS_SIGN_ID). If no update is present in the + * processor, the value in the MSR remains unmodified. + * + * Use native_cpuid -- this code runs very early and we don't + * want to mess with paravirt. + */ + unsigned int eax = 1, ebx, ecx = 0, edx; + + native_cpuid(&eax, &ebx, &ecx, &edx); +} + static int collect_cpu_info_early(struct ucode_cpu_info *uci) { unsigned int val[2]; @@ -393,7 +413,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci) native_wrmsrl(MSR_IA32_UCODE_REV, 0); /* As documented in the SDM: Do a CPUID 1 here */ - sync_core(); + cpuid_1(); /* get the current revision from MSR 0x8B */ native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); @@ -593,7 +613,7 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) native_wrmsrl(MSR_IA32_UCODE_REV, 0); /* As documented in the SDM: Do a CPUID 1 here */ - sync_core(); + cpuid_1(); /* get the current revision from MSR 0x8B */ native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); @@ -805,7 +825,7 @@ static int apply_microcode_intel(int cpu) wrmsrl(MSR_IA32_UCODE_REV, 0); /* As documented in the SDM: Do a CPUID 1 here */ - sync_core(); + cpuid_1(); /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); From c198b121b1a1d7a7171770c634cd49191bac4477 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 9 Dec 2016 10:24:08 -0800 Subject: [PATCH 16/24] x86/asm: Rewrite sync_core() to use IRET-to-self Aside from being excessively slow, CPUID is problematic: Linux runs on a handful of CPUs that don't have CPUID. Use IRET-to-self instead. IRET-to-self works everywhere, so it makes testing easy. For reference, On my laptop, IRET-to-self is ~110ns, CPUID(eax=1, ecx=0) is ~83ns on native and very very slow under KVM, and MOV-to-CR2 is ~42ns. While we're at it: sync_core() serves a very specific purpose. Document it. Signed-off-by: Andy Lutomirski Cc: Juergen Gross Cc: One Thousand Gnomes Cc: Peter Zijlstra Cc: Brian Gerst Cc: Matthew Whitehead Cc: Borislav Petkov Cc: Henrique de Moraes Holschuh Cc: Andrew Cooper Cc: Boris Ostrovsky Cc: xen-devel Link: http://lkml.kernel.org/r/5c79f0225f68bc8c40335612bf624511abb78941.1481307769.git.luto@kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/processor.h | 80 +++++++++++++++++++++++--------- 1 file changed, 58 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b934871ae226..eaf100508c36 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -602,33 +602,69 @@ static __always_inline void cpu_relax(void) rep_nop(); } -/* Stop speculative execution and prefetching of modified code. */ +/* + * This function forces the icache and prefetched instruction stream to + * catch up with reality in two very specific cases: + * + * a) Text was modified using one virtual address and is about to be executed + * from the same physical page at a different virtual address. + * + * b) Text was modified on a different CPU, may subsequently be + * executed on this CPU, and you want to make sure the new version + * gets executed. This generally means you're calling this in a IPI. + * + * If you're calling this for a different reason, you're probably doing + * it wrong. + */ static inline void sync_core(void) { - int tmp; + /* + * There are quite a few ways to do this. IRET-to-self is nice + * because it works on every CPU, at any CPL (so it's compatible + * with paravirtualization), and it never exits to a hypervisor. + * The only down sides are that it's a bit slow (it seems to be + * a bit more than 2x slower than the fastest options) and that + * it unmasks NMIs. The "push %cs" is needed because, in + * paravirtual environments, __KERNEL_CS may not be a valid CS + * value when we do IRET directly. + * + * In case NMI unmasking or performance ever becomes a problem, + * the next best option appears to be MOV-to-CR2 and an + * unconditional jump. That sequence also works on all CPUs, + * but it will fault at CPL3 (i.e. Xen PV and lguest). + * + * CPUID is the conventional way, but it's nasty: it doesn't + * exist on some 486-like CPUs, and it usually exits to a + * hypervisor. + * + * Like all of Linux's memory ordering operations, this is a + * compiler barrier as well. + */ + register void *__sp asm(_ASM_SP); #ifdef CONFIG_X86_32 - /* - * Do a CPUID if available, otherwise do a jump. The jump - * can conveniently enough be the jump around CPUID. - */ - asm volatile("cmpl %2,%1\n\t" - "jl 1f\n\t" - "cpuid\n" - "1:" - : "=a" (tmp) - : "rm" (boot_cpu_data.cpuid_level), "ri" (0), "0" (1) - : "ebx", "ecx", "edx", "memory"); + asm volatile ( + "pushfl\n\t" + "pushl %%cs\n\t" + "pushl $1f\n\t" + "iret\n\t" + "1:" + : "+r" (__sp) : : "memory"); #else - /* - * CPUID is a barrier to speculative execution. - * Prefetched instructions are automatically - * invalidated when modified. - */ - asm volatile("cpuid" - : "=a" (tmp) - : "0" (1) - : "ebx", "ecx", "edx", "memory"); + unsigned int tmp; + + asm volatile ( + "mov %%ss, %0\n\t" + "pushq %q0\n\t" + "pushq %%rsp\n\t" + "addq $8, (%%rsp)\n\t" + "pushfq\n\t" + "mov %%cs, %0\n\t" + "pushq %q0\n\t" + "pushq $1f\n\t" + "iretq\n\t" + "1:" + : "=&r" (tmp), "+r" (__sp) : : "cc", "memory"); #endif } From 59107e2f48831daedc46973ce4988605ab066de3 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 2 Dec 2016 11:07:20 +0100 Subject: [PATCH 17/24] x86/hyperv: Handle unknown NMIs on one CPU when unknown_nmi_panic There is a feature in Hyper-V ('Debug-VM --InjectNonMaskableInterrupt') which injects NMI to the guest. We may want to crash the guest and do kdump on this NMI by enabling unknown_nmi_panic. To make kdump succeed we need to allow the kdump kernel to re-establish VMBus connection so it will see VMBus devices (storage, network,..). To properly unload VMBus making it possible to start over during kdump we need to do the following: - Send an 'unload' message to the hypervisor. This can be done on any CPU so we do this the crashing CPU. - Receive the 'unload finished' reply message. WS2012R2 delivers this message to the CPU which was used to establish VMBus connection during module load and this CPU may differ from the CPU sending 'unload'. Receiving a VMBus message means the following: - There is a per-CPU slot in memory for one message. This slot can in theory be accessed by any CPU. - We get an interrupt on the CPU when a message was placed into the slot. - When we read the message we need to clear the slot and signal the fact to the hypervisor. In case there are more messages to this CPU pending the hypervisor will deliver the next message. The signaling is done by writing to an MSR so this can only be done on the appropriate CPU. To avoid doing cross-CPU work on crash we have vmbus_wait_for_unload() function which checks message slots for all CPUs in a loop waiting for the 'unload finished' messages. However, there is an issue which arises when these conditions are met: - We're crashing on a CPU which is different from the one which was used to initially contact the hypervisor. - The CPU which was used for the initial contact is blocked with interrupts disabled and there is a message pending in the message slot. In this case we won't be able to read the 'unload finished' message on the crashing CPU. This is reproducible when we receive unknown NMIs on all CPUs simultaneously: the first CPU entering panic() will proceed to crash and all other CPUs will stop themselves with interrupts disabled. The suggested solution is to handle unknown NMIs for Hyper-V guests on the first CPU which gets them only. This will allow us to rely on VMBus interrupt handler being able to receive the 'unload finish' message in case it is delivered to a different CPU. The issue is not reproducible on WS2016 as Debug-VM delivers NMI to the boot CPU only, WS2012R2 and earlier Hyper-V versions are affected. Signed-off-by: Vitaly Kuznetsov Acked-by: K. Y. Srinivasan Cc: devel@linuxdriverproject.org Cc: Haiyang Zhang Link: http://lkml.kernel.org/r/20161202100720.28121-1-vkuznets@redhat.com Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mshyperv.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 6c044543545e..f37e02e41a77 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -30,6 +30,7 @@ #include #include #include +#include struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); @@ -157,6 +158,26 @@ static unsigned char hv_get_nmi_reason(void) return 0; } +#ifdef CONFIG_X86_LOCAL_APIC +/* + * Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes + * it dificult to process CHANNELMSG_UNLOAD in case of crash. Handle + * unknown NMI on the first CPU which gets it. + */ +static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs) +{ + static atomic_t nmi_cpu = ATOMIC_INIT(-1); + + if (!unknown_nmi_panic) + return NMI_DONE; + + if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1) + return NMI_HANDLED; + + return NMI_DONE; +} +#endif + static void __init ms_hyperv_init_platform(void) { /* @@ -182,6 +203,9 @@ static void __init ms_hyperv_init_platform(void) pr_info("HyperV: LAPIC Timer Frequency: %#x\n", lapic_timer_frequency); } + + register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST, + "hv_nmi_unknown"); #endif if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) From a268b5f1d6e4639fa6d78fc8bdddaebaa032ab24 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 17 Nov 2016 10:45:57 +0100 Subject: [PATCH 18/24] x86/topology: Document cpu_llc_id It means different things on Intel and AMD so write it down so that there's no confusion. Signed-off-by: Borislav Petkov Cc: Peter Zijlstra Cc: Yazen Ghannam Link: http://lkml.kernel.org/r/20161117094557.jm6hwzdd52h7iwnj@pd.tnic Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- Documentation/x86/topology.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/x86/topology.txt b/Documentation/x86/topology.txt index 06afac252f5b..f3e9d7e9ed6c 100644 --- a/Documentation/x86/topology.txt +++ b/Documentation/x86/topology.txt @@ -63,6 +63,15 @@ The topology of a system is described in the units of: The maximum possible number of packages in the system. Helpful for per package facilities to preallocate per package information. + - cpu_llc_id: + + A per-CPU variable containing: + - On Intel, the first APIC ID of the list of CPUs sharing the Last Level + Cache + + - On AMD, the Node ID or Core Complex ID containing the Last Level + Cache. In general, it is a number identifying an LLC uniquely on the + system. * Cores: From 34bfab0eaf0fb5c6fb14c6b4013b06cdc7984466 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 3 Dec 2016 16:02:58 +0100 Subject: [PATCH 19/24] x86/alternatives: Do not use sync_core() to serialize I$ We use sync_core() in the alternatives code to stop speculative execution of prefetched instructions because we are potentially changing them and don't want to execute stale bytes. What it does on most machines is call CPUID which is a serializing instruction. And that's expensive. However, the instruction cache is serialized when we're on the local CPU and are changing the data through the same virtual address. So then, we don't need the serializing CPUID but a simple control flow change. Last being accomplished with a CALL/RET which the noinline causes. Suggested-by: Linus Torvalds Signed-off-by: Borislav Petkov Reviewed-by: Andy Lutomirski Cc: Andrew Cooper Cc: Andy Lutomirski Cc: Brian Gerst Cc: Henrique de Moraes Holschuh Cc: Matthew Whitehead Cc: One Thousand Gnomes Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161203150258.vwr5zzco7ctgc4pe@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5cb272a7a5a3..c5b8f760473c 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -337,7 +337,11 @@ done: n_dspl, (unsigned long)orig_insn + n_dspl + repl_len); } -static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) +/* + * "noinline" to cause control flow change and thus invalidate I$ and + * cause refetch after modification. + */ +static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) { unsigned long flags; @@ -346,7 +350,6 @@ static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) local_irq_save(flags); add_nops(instr + (a->instrlen - a->padlen), a->padlen); - sync_core(); local_irq_restore(flags); DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", @@ -359,9 +362,12 @@ static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) * This implies that asymmetric systems where APs have less capabilities than * the boot processor are not handled. Tough. Make sure you disable such * features by hand. + * + * Marked "noinline" to cause control flow change and thus insn cache + * to refetch changed I$ lines. */ -void __init_or_module apply_alternatives(struct alt_instr *start, - struct alt_instr *end) +void __init_or_module noinline apply_alternatives(struct alt_instr *start, + struct alt_instr *end) { struct alt_instr *a; u8 *instr, *replacement; @@ -667,7 +673,6 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode, unsigned long flags; local_irq_save(flags); memcpy(addr, opcode, len); - sync_core(); local_irq_restore(flags); /* Could also do a CLFLUSH here to speed up CPU recovery; but that causes hangs on some VIA CPUs. */ From 634b847b6d232f861abd5a03a1f75677f541b156 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 14 Dec 2016 14:39:54 +0100 Subject: [PATCH 20/24] x86/platform/intel-mid: Switch MPU3050 driver to IIO The Intel Mid goes in and creates a I2C device for the MPU3050 if the input driver for MPU-3050 is activated. As of commit: 3904b28efb2c ("iio: gyro: Add driver for the MPU-3050 gyroscope") .. there is a proper and fully featured IIO driver for this device, so deprecate the use of the incomplete input driver by augmenting the device population code to react to the presence of the IIO driver's Kconfig symbol instead. Signed-off-by: Linus Walleij Acked-by: Andy Shevchenko Cc: Dmitry Torokhov Cc: Jonathan Cameron Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1481722794-4348-1-git-send-email-linus.walleij@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/device_libs/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index dd6cfa4ad3ac..61b5ed2b7d40 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -19,7 +19,7 @@ obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_spidev.o # I2C Devices obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o -obj-$(subst m,y,$(CONFIG_INPUT_MPU3050)) += platform_mpu3050.o +obj-$(subst m,y,$(CONFIG_MPU3050_I2C)) += platform_mpu3050.o obj-$(subst m,y,$(CONFIG_INPUT_BMA150)) += platform_bma023.o obj-$(subst m,y,$(CONFIG_DRM_MEDFIELD)) += platform_tc35876x.o # I2C GPIO Expanders From 9120cf4fd9ae77245ce9137869bcbd16575cc633 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Mon, 19 Dec 2016 14:21:44 +0100 Subject: [PATCH 21/24] x86/platform/intel/quark: Add printf attribute to imr_self_test_result() __printf() attributes help detecting issues in printf() format strings at compile time. Even though imr_selftest.c is only compiled with CONFIG_DEBUG_IMR_SELFTEST=y, GCC complains about a missing format attribute when compiling allmodconfig with -Wmissing-format-attribute. Silence this warning by adding the attribute. Signed-off-by: Nicolas Iooss Acked-by: Bryan O'Donoghue Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161219132144.4108-1-nicolas.iooss_linux@m4x.org Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-quark/imr_selftest.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c index f5bad40936ac..b8f562049cad 100644 --- a/arch/x86/platform/intel-quark/imr_selftest.c +++ b/arch/x86/platform/intel-quark/imr_selftest.c @@ -25,7 +25,8 @@ * @fmt: format string. * ... variadic argument list. */ -static void __init imr_self_test_result(int res, const char *fmt, ...) +static __printf(2, 3) +void __init imr_self_test_result(int res, const char *fmt, ...) { va_list vlist; From 8877ebdd3f9a3ffc84c4b67562d257c5f553bc49 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 20 Dec 2016 11:54:30 +0100 Subject: [PATCH 22/24] x86/microcode/AMD: Reload proper initrd start address When we switch to virtual addresses and, especially after reserve_initrd()->relocate_initrd() have run, we have the updated initrd address in initrd_start. Use initrd_start then instead of the address which has been passed to us through boot params. (That still gets used when we're running the very early routines on the BSP). Reported-and-tested-by: Boris Ostrovsky Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20161220144012.lc4cwrg6dphqbyqu@pd.tnic Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/core.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index c4bb2f7169f6..2af69d27da62 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -243,14 +243,12 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa) # endif /* - * Did we relocate the ramdisk? - * - * So we possibly relocate the ramdisk *after* applying microcode on the - * BSP so we rely on use_pa (use physical addresses) - even if it is not - * absolutely correct - to determine whether we've done the ramdisk - * relocation already. + * Fixup the start address: after reserve_initrd() runs, initrd_start + * has the virtual address of the beginning of the initrd. It also + * possibly relocates the ramdisk. In either case, initrd_start contains + * the updated address so use that instead. */ - if (!use_pa && relocated_ramdisk) + if (!use_pa && initrd_start) start = initrd_start; return find_cpio_data(path, (void *)start, size, NULL); From cef4402d7627f14a08571e7c816b199edf8cc24b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 16 Dec 2016 10:51:50 +0100 Subject: [PATCH 23/24] x86/paravirt: Mark unused patch_default label A bugfix commit: 45dbea5f55c0 ("x86/paravirt: Fix native_patch()") ... introduced a harmless warning: arch/x86/kernel/paravirt_patch_32.c: In function 'native_patch': arch/x86/kernel/paravirt_patch_32.c:71:1: error: label 'patch_default' defined but not used [-Werror=unused-label] Fix it by annotating the label as __maybe_unused. Reported-by: Arnd Bergmann Reported-by: Piotr Gregor Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 45dbea5f55c0 ("x86/paravirt: Fix native_patch()") Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt_patch_32.c | 2 +- arch/x86/kernel/paravirt_patch_64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index d33ef165b1f8..553acbbb4d32 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -68,7 +68,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, #endif default: -patch_default: +patch_default: __maybe_unused ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); break; diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index f4fcf26c9fce..11aaf1eaa0e4 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -80,7 +80,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, #endif default: -patch_default: +patch_default: __maybe_unused ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); break; From c280f7736ab26a601932b1ce017a3840dbedcfdc Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 22 Dec 2016 09:02:49 -0600 Subject: [PATCH 24/24] Revert "x86/unwind: Detect bad stack return address" Revert the following commit: b6959a362177 ("x86/unwind: Detect bad stack return address") ... because Andrey Konovalov reported an unwinder warning: WARNING: unrecognized kernel stack return address ffffffffa0000001 at ffff88006377fa18 in a.out:4467 The unwind was initiated from an interrupt which occurred while running in the generated code for a kprobe. The unwinder printed the warning because it expected regs->ip to point to a valid text address, but instead it pointed to the generated code. Eventually we may want come up with a way to identify generated kprobe code so the unwinder can know that it's a valid return address. Until then, just remove the warning. Reported-by: Andrey Konovalov Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/02f296848fbf49fb72dfeea706413ecbd9d4caf6.1482418739.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/unwind_frame.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 20d4b4e0800c..4443e499f279 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -51,16 +51,7 @@ unsigned long unwind_get_return_address(struct unwind_state *state) addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p, addr_p); - if (!__kernel_text_address(addr)) { - printk_deferred_once(KERN_WARNING - "WARNING: unrecognized kernel stack return address %p at %p in %s:%d\n", - (void *)addr, addr_p, state->task->comm, - state->task->pid); - unwind_dump(state, addr_p); - return 0; - } - - return addr; + return __kernel_text_address(addr) ? addr : 0; } EXPORT_SYMBOL_GPL(unwind_get_return_address);