297 lines
11 KiB
Diff
297 lines
11 KiB
Diff
|
From patchwork Mon Nov 6 12:31:12 2017
|
||
|
Content-Type: text/plain; charset="utf-8"
|
||
|
MIME-Version: 1.0
|
||
|
Content-Transfer-Encoding: 7bit
|
||
|
Subject: [1/2] kvm: vmx: Reinstate support for CPUs without virtual NMI
|
||
|
From: Paolo Bonzini <pbonzini@redhat.com>
|
||
|
X-Patchwork-Id: 10043403
|
||
|
Message-Id: <1509971473-74491-2-git-send-email-pbonzini@redhat.com>
|
||
|
To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org
|
||
|
Cc: rkrcmar@redhat.com, stable@vger.kernel.org
|
||
|
Date: Mon, 6 Nov 2017 13:31:12 +0100
|
||
|
|
||
|
This is more or less a revert of commit 2c82878b0cb3 ("KVM: VMX: require
|
||
|
virtual NMI support", 2017-03-27); it turns out that Core 2 Duo machines
|
||
|
only had virtual NMIs in some SKUs.
|
||
|
|
||
|
The revert is not trivial because in the meanwhile there have been several
|
||
|
fixes to nested NMI injection. Therefore, the entire vNMI state is moved
|
||
|
to struct loaded_vmcs.
|
||
|
|
||
|
Another change compared to before the patch is a simplification here:
|
||
|
|
||
|
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
|
||
|
!(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
|
||
|
get_vmcs12(vcpu))))) {
|
||
|
|
||
|
The final condition here is always true (because nested_cpu_has_virtual_nmis
|
||
|
is always false) and is removed.
|
||
|
|
||
|
Fixes: 2c82878b0cb38fd516fd612c67852a6bbf282003
|
||
|
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1490803
|
||
|
Cc: stable@vger.kernel.org
|
||
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||
|
---
|
||
|
arch/x86/kvm/vmx.c | 150 +++++++++++++++++++++++++++++++++++++----------------
|
||
|
1 file changed, 106 insertions(+), 44 deletions(-)
|
||
|
|
||
|
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
|
||
|
index e6c8ffa84968..d6b3b12ae1e2 100644
|
||
|
--- a/arch/x86/kvm/vmx.c
|
||
|
+++ b/arch/x86/kvm/vmx.c
|
||
|
@@ -202,6 +202,10 @@ struct loaded_vmcs {
|
||
|
bool nmi_known_unmasked;
|
||
|
unsigned long vmcs_host_cr3; /* May not match real cr3 */
|
||
|
unsigned long vmcs_host_cr4; /* May not match real cr4 */
|
||
|
+ /* Support for vnmi-less CPUs */
|
||
|
+ int soft_vnmi_blocked;
|
||
|
+ ktime_t entry_time;
|
||
|
+ s64 vnmi_blocked_time;
|
||
|
struct list_head loaded_vmcss_on_cpu_link;
|
||
|
};
|
||
|
|
||
|
@@ -1291,6 +1295,11 @@ static inline bool cpu_has_vmx_invpcid(void)
|
||
|
SECONDARY_EXEC_ENABLE_INVPCID;
|
||
|
}
|
||
|
|
||
|
+static inline bool cpu_has_virtual_nmis(void)
|
||
|
+{
|
||
|
+ return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
|
||
|
+}
|
||
|
+
|
||
|
static inline bool cpu_has_vmx_wbinvd_exit(void)
|
||
|
{
|
||
|
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||
|
@@ -1348,11 +1357,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
|
||
|
(vmcs12->secondary_vm_exec_control & bit);
|
||
|
}
|
||
|
|
||
|
-static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
|
||
|
-{
|
||
|
- return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
|
||
|
-}
|
||
|
-
|
||
|
static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
|
||
|
{
|
||
|
return vmcs12->pin_based_vm_exec_control &
|
||
|
@@ -3712,9 +3716,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
||
|
&_vmexit_control) < 0)
|
||
|
return -EIO;
|
||
|
|
||
|
- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
|
||
|
- PIN_BASED_VIRTUAL_NMIS;
|
||
|
- opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
|
||
|
+ min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
|
||
|
+ opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
|
||
|
+ PIN_BASED_VMX_PREEMPTION_TIMER;
|
||
|
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
|
||
|
&_pin_based_exec_control) < 0)
|
||
|
return -EIO;
|
||
|
@@ -5669,7 +5673,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
|
||
|
|
||
|
static void enable_nmi_window(struct kvm_vcpu *vcpu)
|
||
|
{
|
||
|
- if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
|
||
|
+ if (!cpu_has_virtual_nmis() ||
|
||
|
+ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
|
||
|
enable_irq_window(vcpu);
|
||
|
return;
|
||
|
}
|
||
|
@@ -5709,6 +5714,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
|
||
|
{
|
||
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||
|
|
||
|
+ if (!cpu_has_virtual_nmis()) {
|
||
|
+ /*
|
||
|
+ * Tracking the NMI-blocked state in software is built upon
|
||
|
+ * finding the next open IRQ window. This, in turn, depends on
|
||
|
+ * well-behaving guests: They have to keep IRQs disabled at
|
||
|
+ * least as long as the NMI handler runs. Otherwise we may
|
||
|
+ * cause NMI nesting, maybe breaking the guest. But as this is
|
||
|
+ * highly unlikely, we can live with the residual risk.
|
||
|
+ */
|
||
|
+ vmx->loaded_vmcs->soft_vnmi_blocked = 1;
|
||
|
+ vmx->loaded_vmcs->vnmi_blocked_time = 0;
|
||
|
+ }
|
||
|
+
|
||
|
++vcpu->stat.nmi_injections;
|
||
|
vmx->loaded_vmcs->nmi_known_unmasked = false;
|
||
|
|
||
|
@@ -5727,6 +5745,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
|
||
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||
|
bool masked;
|
||
|
|
||
|
+ if (!cpu_has_virtual_nmis())
|
||
|
+ return vmx->loaded_vmcs->soft_vnmi_blocked;
|
||
|
if (vmx->loaded_vmcs->nmi_known_unmasked)
|
||
|
return false;
|
||
|
masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
|
||
|
@@ -5738,13 +5758,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
|
||
|
{
|
||
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||
|
|
||
|
- vmx->loaded_vmcs->nmi_known_unmasked = !masked;
|
||
|
- if (masked)
|
||
|
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
|
||
|
- GUEST_INTR_STATE_NMI);
|
||
|
- else
|
||
|
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
|
||
|
- GUEST_INTR_STATE_NMI);
|
||
|
+ if (!cpu_has_virtual_nmis()) {
|
||
|
+ if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
|
||
|
+ vmx->loaded_vmcs->soft_vnmi_blocked = masked;
|
||
|
+ vmx->loaded_vmcs->vnmi_blocked_time = 0;
|
||
|
+ }
|
||
|
+ } else {
|
||
|
+ vmx->loaded_vmcs->nmi_known_unmasked = !masked;
|
||
|
+ if (masked)
|
||
|
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
|
||
|
+ GUEST_INTR_STATE_NMI);
|
||
|
+ else
|
||
|
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
|
||
|
+ GUEST_INTR_STATE_NMI);
|
||
|
+ }
|
||
|
}
|
||
|
|
||
|
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
|
||
|
@@ -5752,6 +5779,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
|
||
|
if (to_vmx(vcpu)->nested.nested_run_pending)
|
||
|
return 0;
|
||
|
|
||
|
+ if (!cpu_has_virtual_nmis() &&
|
||
|
+ to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
|
||
|
+ return 0;
|
||
|
+
|
||
|
return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
|
||
|
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
|
||
|
| GUEST_INTR_STATE_NMI));
|
||
|
@@ -6479,6 +6510,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
|
||
|
* AAK134, BY25.
|
||
|
*/
|
||
|
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
|
||
|
+ cpu_has_virtual_nmis() &&
|
||
|
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
|
||
|
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
|
||
|
|
||
|
@@ -6965,7 +6997,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
|
||
|
}
|
||
|
|
||
|
/* Create a new VMCS */
|
||
|
- item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
|
||
|
+ item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
|
||
|
if (!item)
|
||
|
return NULL;
|
||
|
item->vmcs02.vmcs = alloc_vmcs();
|
||
|
@@ -7982,6 +8014,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
|
||
|
* "blocked by NMI" bit has to be set before next VM entry.
|
||
|
*/
|
||
|
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
|
||
|
+ cpu_has_virtual_nmis() &&
|
||
|
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
|
||
|
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
|
||
|
GUEST_INTR_STATE_NMI);
|
||
|
@@ -8826,6 +8859,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
+ if (unlikely(!cpu_has_virtual_nmis() &&
|
||
|
+ vmx->loaded_vmcs->soft_vnmi_blocked)) {
|
||
|
+ if (vmx_interrupt_allowed(vcpu)) {
|
||
|
+ vmx->loaded_vmcs->soft_vnmi_blocked = 0;
|
||
|
+ } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
|
||
|
+ vcpu->arch.nmi_pending) {
|
||
|
+ /*
|
||
|
+ * This CPU don't support us in finding the end of an
|
||
|
+ * NMI-blocked window if the guest runs with IRQs
|
||
|
+ * disabled. So we pull the trigger after 1 s of
|
||
|
+ * futile waiting, but inform the user about this.
|
||
|
+ */
|
||
|
+ printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
|
||
|
+ "state on VCPU %d after 1 s timeout\n",
|
||
|
+ __func__, vcpu->vcpu_id);
|
||
|
+ vmx->loaded_vmcs->soft_vnmi_blocked = 0;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
if (exit_reason < kvm_vmx_max_exit_handlers
|
||
|
&& kvm_vmx_exit_handlers[exit_reason])
|
||
|
return kvm_vmx_exit_handlers[exit_reason](vcpu);
|
||
|
@@ -9108,33 +9160,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
|
||
|
|
||
|
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
|
||
|
|
||
|
- if (vmx->loaded_vmcs->nmi_known_unmasked)
|
||
|
- return;
|
||
|
- /*
|
||
|
- * Can't use vmx->exit_intr_info since we're not sure what
|
||
|
- * the exit reason is.
|
||
|
- */
|
||
|
- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
||
|
- unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
|
||
|
- vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
|
||
|
- /*
|
||
|
- * SDM 3: 27.7.1.2 (September 2008)
|
||
|
- * Re-set bit "block by NMI" before VM entry if vmexit caused by
|
||
|
- * a guest IRET fault.
|
||
|
- * SDM 3: 23.2.2 (September 2008)
|
||
|
- * Bit 12 is undefined in any of the following cases:
|
||
|
- * If the VM exit sets the valid bit in the IDT-vectoring
|
||
|
- * information field.
|
||
|
- * If the VM exit is due to a double fault.
|
||
|
- */
|
||
|
- if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
|
||
|
- vector != DF_VECTOR && !idtv_info_valid)
|
||
|
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
|
||
|
- GUEST_INTR_STATE_NMI);
|
||
|
- else
|
||
|
- vmx->loaded_vmcs->nmi_known_unmasked =
|
||
|
- !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
|
||
|
- & GUEST_INTR_STATE_NMI);
|
||
|
+ if (cpu_has_virtual_nmis()) {
|
||
|
+ if (vmx->loaded_vmcs->nmi_known_unmasked)
|
||
|
+ return;
|
||
|
+ /*
|
||
|
+ * Can't use vmx->exit_intr_info since we're not sure what
|
||
|
+ * the exit reason is.
|
||
|
+ */
|
||
|
+ exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
||
|
+ unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
|
||
|
+ vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
|
||
|
+ /*
|
||
|
+ * SDM 3: 27.7.1.2 (September 2008)
|
||
|
+ * Re-set bit "block by NMI" before VM entry if vmexit caused by
|
||
|
+ * a guest IRET fault.
|
||
|
+ * SDM 3: 23.2.2 (September 2008)
|
||
|
+ * Bit 12 is undefined in any of the following cases:
|
||
|
+ * If the VM exit sets the valid bit in the IDT-vectoring
|
||
|
+ * information field.
|
||
|
+ * If the VM exit is due to a double fault.
|
||
|
+ */
|
||
|
+ if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
|
||
|
+ vector != DF_VECTOR && !idtv_info_valid)
|
||
|
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
|
||
|
+ GUEST_INTR_STATE_NMI);
|
||
|
+ else
|
||
|
+ vmx->loaded_vmcs->nmi_known_unmasked =
|
||
|
+ !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
|
||
|
+ & GUEST_INTR_STATE_NMI);
|
||
|
+ } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
|
||
|
+ vmx->loaded_vmcs->vnmi_blocked_time +=
|
||
|
+ ktime_to_ns(ktime_sub(ktime_get(),
|
||
|
+ vmx->loaded_vmcs->entry_time));
|
||
|
}
|
||
|
|
||
|
static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
|
||
|
@@ -9251,6 +9308,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||
|
unsigned long debugctlmsr, cr3, cr4;
|
||
|
|
||
|
+ /* Record the guest's net vcpu time for enforced NMI injections. */
|
||
|
+ if (unlikely(!cpu_has_virtual_nmis() &&
|
||
|
+ vmx->loaded_vmcs->soft_vnmi_blocked))
|
||
|
+ vmx->loaded_vmcs->entry_time = ktime_get();
|
||
|
+
|
||
|
/* Don't enter VMX if guest state is invalid, let the exit handler
|
||
|
start emulation until we arrive back to a valid state */
|
||
|
if (vmx->emulation_required)
|