Linux v3.14.24

This commit is contained in:
Justin M. Forbes 2014-11-14 13:45:35 -06:00
parent c071a5748c
commit 0b279ec12b
13 changed files with 5 additions and 1240 deletions

View File

@ -1,145 +0,0 @@
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Fri, 24 Oct 2014 17:07:12 +0200
Subject: [PATCH] KVM: x86: Check non-canonical addresses upon WRMSR
Upon WRMSR, the CPU should inject #GP if a non-canonical value (address) is
written to certain MSRs. The behavior is "almost" identical for AMD and Intel
(ignoring MSRs that are not implemented in either architecture since they would
anyhow #GP). However, IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
non-canonical address is written on Intel but not on AMD (which ignores the top
32-bits).
Accordingly, this patch injects a #GP on the MSRs which behave identically on
Intel and AMD. To eliminate the differences between the architecutres, the
value which is written to IA32_SYSENTER_ESP and IA32_SYSENTER_EIP is turned to
canonical value before writing instead of injecting a #GP.
Some references from Intel and AMD manuals:
According to Intel SDM description of WRMSR instruction #GP is expected on
WRMSR "If the source register contains a non-canonical address and ECX
specifies one of the following MSRs: IA32_DS_AREA, IA32_FS_BASE, IA32_GS_BASE,
IA32_KERNEL_GS_BASE, IA32_LSTAR, IA32_SYSENTER_EIP, IA32_SYSENTER_ESP."
According to AMD manual instruction manual:
LSTAR/CSTAR (SYSCALL): "The WRMSR instruction loads the target RIP into the
LSTAR and CSTAR registers. If an RIP written by WRMSR is not in canonical
form, a general-protection exception (#GP) occurs."
IA32_GS_BASE and IA32_FS_BASE (WRFSBASE/WRGSBASE): "The address written to the
base field must be in canonical form or a #GP fault will occur."
IA32_KERNEL_GS_BASE (SWAPGS): "The address stored in the KernelGSbase MSR must
be in canonical form."
This patch fixes CVE-2014-3610.
Cc: stable@vger.kernel.org
Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 14 ++++++++++++++
arch/x86/kvm/svm.c | 2 +-
arch/x86/kvm/vmx.c | 2 +-
arch/x86/kvm/x86.c | 27 ++++++++++++++++++++++++++-
4 files changed, 42 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7c492ed9087b..78d014c83ae3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -990,6 +990,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
}
+static inline u64 get_canonical(u64 la)
+{
+ return ((int64_t)la << 16) >> 16;
+}
+
+static inline bool is_noncanonical_address(u64 la)
+{
+#ifdef CONFIG_X86_64
+ return get_canonical(la) != la;
+#else
+ return false;
+#endif
+}
+
#define TSS_IOPB_BASE_OFFSET 0x66
#define TSS_BASE_SIZE 0x68
#define TSS_IOPB_SIZE (65536 / 8)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ddf742768ecf..e2de97daa03c 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3234,7 +3234,7 @@ static int wrmsr_interception(struct vcpu_svm *svm)
msr.host_initiated = false;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
- if (svm_set_msr(&svm->vcpu, &msr)) {
+ if (kvm_set_msr(&svm->vcpu, &msr)) {
trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(&svm->vcpu, 0);
} else {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6a118fa378b5..3a3e419780df 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5263,7 +5263,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
msr.data = data;
msr.index = ecx;
msr.host_initiated = false;
- if (vmx_set_msr(vcpu, &msr) != 0) {
+ if (kvm_set_msr(vcpu, &msr) != 0) {
trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(vcpu, 0);
return 1;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f1e22d3b286..1f9a233d8624 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -984,7 +984,6 @@ void kvm_enable_efer_bits(u64 mask)
}
EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
-
/*
* Writes msr value into into the appropriate "register".
* Returns 0 on success, non-0 otherwise.
@@ -992,8 +991,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
*/
int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
+ switch (msr->index) {
+ case MSR_FS_BASE:
+ case MSR_GS_BASE:
+ case MSR_KERNEL_GS_BASE:
+ case MSR_CSTAR:
+ case MSR_LSTAR:
+ if (is_noncanonical_address(msr->data))
+ return 1;
+ break;
+ case MSR_IA32_SYSENTER_EIP:
+ case MSR_IA32_SYSENTER_ESP:
+ /*
+ * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
+ * non-canonical address is written on Intel but not on
+ * AMD (which ignores the top 32-bits, because it does
+ * not implement 64-bit SYSENTER).
+ *
+ * 64-bit code should hence be able to write a non-canonical
+ * value on AMD. Making the address canonical ensures that
+ * vmentry does not fail on Intel after writing a non-canonical
+ * value, and that something deterministic happens if the guest
+ * invokes 64-bit SYSENTER.
+ */
+ msr->data = get_canonical(msr->data);
+ }
return kvm_x86_ops->set_msr(vcpu, msr);
}
+EXPORT_SYMBOL_GPL(kvm_set_msr);
/*
* Adapt set_msr() to msr_io()'s calling convention
--
1.9.3

View File

@ -1,234 +0,0 @@
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Fri, 24 Oct 2014 17:07:16 +0200
Subject: [PATCH] KVM: x86: Emulator fixes for eip canonical checks on near
branches
Before changing rip (during jmp, call, ret, etc.) the target should be asserted
to be canonical one, as real CPUs do. During sysret, both target rsp and rip
should be canonical. If any of these values is noncanonical, a #GP exception
should occur. The exception to this rule are syscall and sysenter instructions
in which the assigned rip is checked during the assignment to the relevant
MSRs.
This patch fixes the emulator to behave as real CPUs do for near branches.
Far branches are handled by the next patch.
This fixes CVE-2014-3647.
Cc: stable@vger.kernel.org
Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/kvm/emulate.c | 78 ++++++++++++++++++++++++++++++++++----------------
1 file changed, 54 insertions(+), 24 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a85f438b6a47..e52e74feedb8 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -563,7 +563,8 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
return emulate_exception(ctxt, NM_VECTOR, 0, false);
}
-static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
+ int cs_l)
{
switch (ctxt->op_bytes) {
case 2:
@@ -573,16 +574,25 @@ static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
ctxt->_eip = (u32)dst;
break;
case 8:
+ if ((cs_l && is_noncanonical_address(dst)) ||
+ (!cs_l && (dst & ~(u32)-1)))
+ return emulate_gp(ctxt, 0);
ctxt->_eip = dst;
break;
default:
WARN(1, "unsupported eip assignment size\n");
}
+ return X86EMUL_CONTINUE;
+}
+
+static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+{
+ return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
}
-static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
{
- assign_eip_near(ctxt, ctxt->_eip + rel);
+ return assign_eip_near(ctxt, ctxt->_eip + rel);
}
static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
@@ -1989,13 +1999,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
case 2: /* call near abs */ {
long int old_eip;
old_eip = ctxt->_eip;
- ctxt->_eip = ctxt->src.val;
+ rc = assign_eip_near(ctxt, ctxt->src.val);
+ if (rc != X86EMUL_CONTINUE)
+ break;
ctxt->src.val = old_eip;
rc = em_push(ctxt);
break;
}
case 4: /* jmp abs */
- ctxt->_eip = ctxt->src.val;
+ rc = assign_eip_near(ctxt, ctxt->src.val);
break;
case 5: /* jmp far */
rc = em_jmp_far(ctxt);
@@ -2030,10 +2042,14 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
static int em_ret(struct x86_emulate_ctxt *ctxt)
{
- ctxt->dst.type = OP_REG;
- ctxt->dst.addr.reg = &ctxt->_eip;
- ctxt->dst.bytes = ctxt->op_bytes;
- return em_pop(ctxt);
+ int rc;
+ unsigned long eip;
+
+ rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ return assign_eip_near(ctxt, eip);
}
static int em_ret_far(struct x86_emulate_ctxt *ctxt)
@@ -2314,7 +2330,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
{
const struct x86_emulate_ops *ops = ctxt->ops;
struct desc_struct cs, ss;
- u64 msr_data;
+ u64 msr_data, rcx, rdx;
int usermode;
u16 cs_sel = 0, ss_sel = 0;
@@ -2330,6 +2346,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
else
usermode = X86EMUL_MODE_PROT32;
+ rcx = reg_read(ctxt, VCPU_REGS_RCX);
+ rdx = reg_read(ctxt, VCPU_REGS_RDX);
+
cs.dpl = 3;
ss.dpl = 3;
ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
@@ -2347,6 +2366,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ss_sel = cs_sel + 8;
cs.d = 0;
cs.l = 1;
+ if (is_noncanonical_address(rcx) ||
+ is_noncanonical_address(rdx))
+ return emulate_gp(ctxt, 0);
break;
}
cs_sel |= SELECTOR_RPL_MASK;
@@ -2355,8 +2377,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
- ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX);
- *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX);
+ ctxt->_eip = rdx;
+ *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
return X86EMUL_CONTINUE;
}
@@ -2897,10 +2919,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt)
static int em_call(struct x86_emulate_ctxt *ctxt)
{
+ int rc;
long rel = ctxt->src.val;
ctxt->src.val = (unsigned long)ctxt->_eip;
- jmp_rel(ctxt, rel);
+ rc = jmp_rel(ctxt, rel);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
return em_push(ctxt);
}
@@ -2932,11 +2957,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
{
int rc;
+ unsigned long eip;
- ctxt->dst.type = OP_REG;
- ctxt->dst.addr.reg = &ctxt->_eip;
- ctxt->dst.bytes = ctxt->op_bytes;
- rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
+ rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ rc = assign_eip_near(ctxt, eip);
if (rc != X86EMUL_CONTINUE)
return rc;
rsp_increment(ctxt, ctxt->src.val);
@@ -3267,20 +3293,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
static int em_loop(struct x86_emulate_ctxt *ctxt)
{
+ int rc = X86EMUL_CONTINUE;
+
register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
(ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
- jmp_rel(ctxt, ctxt->src.val);
+ rc = jmp_rel(ctxt, ctxt->src.val);
- return X86EMUL_CONTINUE;
+ return rc;
}
static int em_jcxz(struct x86_emulate_ctxt *ctxt)
{
+ int rc = X86EMUL_CONTINUE;
+
if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
- jmp_rel(ctxt, ctxt->src.val);
+ rc = jmp_rel(ctxt, ctxt->src.val);
- return X86EMUL_CONTINUE;
+ return rc;
}
static int em_in(struct x86_emulate_ctxt *ctxt)
@@ -4728,7 +4758,7 @@ special_insn:
break;
case 0x70 ... 0x7f: /* jcc (short) */
if (test_cc(ctxt->b, ctxt->eflags))
- jmp_rel(ctxt, ctxt->src.val);
+ rc = jmp_rel(ctxt, ctxt->src.val);
break;
case 0x8d: /* lea r16/r32, m */
ctxt->dst.val = ctxt->src.addr.mem.ea;
@@ -4758,7 +4788,7 @@ special_insn:
break;
case 0xe9: /* jmp rel */
case 0xeb: /* jmp rel short */
- jmp_rel(ctxt, ctxt->src.val);
+ rc = jmp_rel(ctxt, ctxt->src.val);
ctxt->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xf4: /* hlt */
@@ -4881,7 +4911,7 @@ twobyte_insn:
break;
case 0x80 ... 0x8f: /* jnz rel, etc*/
if (test_cc(ctxt->b, ctxt->eflags))
- jmp_rel(ctxt, ctxt->src.val);
+ rc = jmp_rel(ctxt, ctxt->src.val);
break;
case 0x90 ... 0x9f: /* setcc r/m8 */
ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
--
1.9.3

View File

@ -1,64 +0,0 @@
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Fri, 24 Oct 2014 17:07:15 +0200
Subject: [PATCH] KVM: x86: Fix wrong masking on relative jump/call
Relative jumps and calls do the masking according to the operand size, and not
according to the address size as the KVM emulator does today.
This patch fixes KVM behavior.
Cc: stable@vger.kernel.org
Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/kvm/emulate.c | 27 ++++++++++++++++++++++-----
1 file changed, 22 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 03954f7900f5..a85f438b6a47 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -504,11 +504,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
}
-static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
-{
- register_address_increment(ctxt, &ctxt->_eip, rel);
-}
-
static u32 desc_limit_scaled(struct desc_struct *desc)
{
u32 limit = get_desc_limit(desc);
@@ -568,6 +563,28 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
return emulate_exception(ctxt, NM_VECTOR, 0, false);
}
+static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+{
+ switch (ctxt->op_bytes) {
+ case 2:
+ ctxt->_eip = (u16)dst;
+ break;
+ case 4:
+ ctxt->_eip = (u32)dst;
+ break;
+ case 8:
+ ctxt->_eip = dst;
+ break;
+ default:
+ WARN(1, "unsupported eip assignment size\n");
+ }
+}
+
+static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+{
+ assign_eip_near(ctxt, ctxt->_eip + rel);
+}
+
static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
{
u16 selector;
--
1.9.3

View File

@ -1,287 +0,0 @@
From 1985aa0a5af1888329b9db477a00c3598880fb2b Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Fri, 24 Oct 2014 17:07:17 +0200
Subject: [PATCH] KVM: x86: Handle errors when RIP is set during far jumps
Far jmp/call/ret may fault while loading a new RIP. Currently KVM does not
handle this case, and may result in failed vm-entry once the assignment is
done. The tricky part of doing so is that loading the new CS affects the
VMCS/VMCB state, so if we fail during loading the new RIP, we are left in
unconsistent state. Therefore, this patch saves on 64-bit the old CS
descriptor and restores it if loading RIP failed.
This fixes CVE-2014-3647.
Cc: stable@vger.kernel.org
Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/kvm/emulate.c | 113 +++++++++++++++++++++++++++++++++----------------
1 file changed, 77 insertions(+), 36 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 38d3751472e4..dd2fdb29a7d4 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1437,7 +1437,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
/* Does not support long mode */
static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
- u16 selector, int seg)
+ u16 selector, int seg, struct desc_struct *desc)
{
struct desc_struct seg_desc, old_desc;
u8 dpl, rpl, cpl;
@@ -1564,6 +1564,8 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
}
load:
ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg);
+ if (desc)
+ *desc = seg_desc;
return X86EMUL_CONTINUE;
exception:
emulate_exception(ctxt, err_vec, err_code, true);
@@ -1770,7 +1772,7 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = load_segment_descriptor(ctxt, (u16)selector, seg);
+ rc = load_segment_descriptor(ctxt, (u16)selector, seg, NULL);
return rc;
}
@@ -1859,7 +1861,7 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
+ rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS, NULL);
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -1925,7 +1927,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
+ rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, NULL);
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -1964,17 +1966,30 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
{
int rc;
- unsigned short sel;
+ unsigned short sel, old_sel;
+ struct desc_struct old_desc, new_desc;
+ const struct x86_emulate_ops *ops = ctxt->ops;
+ u8 cpl = ctxt->ops->cpl(ctxt);
+
+ /* Assignment of RIP may only fail in 64-bit mode */
+ if (ctxt->mode == X86EMUL_MODE_PROT64)
+ ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
+ VCPU_SREG_CS);
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
- rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS);
+ rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, &new_desc);
if (rc != X86EMUL_CONTINUE)
return rc;
- ctxt->_eip = 0;
- memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
- return X86EMUL_CONTINUE;
+ rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+ if (rc != X86EMUL_CONTINUE) {
+ WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
+ /* assigning eip failed; restore the old cs */
+ ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
+ return rc;
+ }
+ return rc;
}
static int em_grp45(struct x86_emulate_ctxt *ctxt)
@@ -2038,21 +2053,33 @@ static int em_ret(struct x86_emulate_ctxt *ctxt)
static int em_ret_far(struct x86_emulate_ctxt *ctxt)
{
int rc;
- unsigned long cs;
+ unsigned long eip, cs;
+ u16 old_cs;
int cpl = ctxt->ops->cpl(ctxt);
+ struct desc_struct old_desc, new_desc;
+ const struct x86_emulate_ops *ops = ctxt->ops;
+
+ if (ctxt->mode == X86EMUL_MODE_PROT64)
+ ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
+ VCPU_SREG_CS);
- rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes);
+ rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
- if (ctxt->op_bytes == 4)
- ctxt->_eip = (u32)ctxt->_eip;
rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
/* Outer-privilege level return is not implemented */
if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
return X86EMUL_UNHANDLEABLE;
- rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
+ rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, &new_desc);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ rc = assign_eip_far(ctxt, eip, new_desc.l);
+ if (rc != X86EMUL_CONTINUE) {
+ WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
+ ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
+ }
return rc;
}
@@ -2093,7 +2120,7 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
- rc = load_segment_descriptor(ctxt, sel, seg);
+ rc = load_segment_descriptor(ctxt, sel, seg, NULL);
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -2473,19 +2500,19 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
* Now load segment descriptors. If fault happens at this stage
* it is handled in a context of new task
*/
- ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR);
+ ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES);
+ ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS);
+ ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS);
+ ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS);
+ ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
@@ -2614,25 +2641,25 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
* Now load segment descriptors. If fault happenes at this stage
* it is handled in a context of new task
*/
- ret = load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
+ ret = load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES);
+ ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS);
+ ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS);
+ ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS);
+ ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS);
+ ret = load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS);
+ ret = load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
@@ -2912,24 +2939,38 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
u16 sel, old_cs;
ulong old_eip;
int rc;
+ struct desc_struct old_desc, new_desc;
+ const struct x86_emulate_ops *ops = ctxt->ops;
+ int cpl = ctxt->ops->cpl(ctxt);
- old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
old_eip = ctxt->_eip;
+ ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
- if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS))
+ rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, &new_desc);
+ if (rc != X86EMUL_CONTINUE)
return X86EMUL_CONTINUE;
- ctxt->_eip = 0;
- memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
+ rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+ if (rc != X86EMUL_CONTINUE)
+ goto fail;
ctxt->src.val = old_cs;
rc = em_push(ctxt);
if (rc != X86EMUL_CONTINUE)
- return rc;
+ goto fail;
ctxt->src.val = old_eip;
- return em_push(ctxt);
+ rc = em_push(ctxt);
+ /* If we failed, we tainted the memory, but the very least we should
+ restore cs */
+ if (rc != X86EMUL_CONTINUE)
+ goto fail;
+ return rc;
+fail:
+ ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
+ return rc;
+
}
static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
@@ -3115,7 +3156,7 @@ static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
/* Disable writeback. */
ctxt->dst.type = OP_NONE;
- return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
+ return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg, NULL);
}
static int em_lldt(struct x86_emulate_ctxt *ctxt)
@@ -3124,7 +3165,7 @@ static int em_lldt(struct x86_emulate_ctxt *ctxt)
/* Disable writeback. */
ctxt->dst.type = OP_NONE;
- return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
+ return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR, NULL);
}
static int em_ltr(struct x86_emulate_ctxt *ctxt)
@@ -3133,7 +3174,7 @@ static int em_ltr(struct x86_emulate_ctxt *ctxt)
/* Disable writeback. */
ctxt->dst.type = OP_NONE;
- return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
+ return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR, NULL);
}
static int em_invlpg(struct x86_emulate_ctxt *ctxt)
--
1.9.3

View File

@ -1,36 +0,0 @@
From: Andy Honig <ahonig@google.com>
Date: Fri, 24 Oct 2014 17:07:14 +0200
Subject: [PATCH] KVM: x86: Improve thread safety in pit
There's a race condition in the PIT emulation code in KVM. In
__kvm_migrate_pit_timer the pit_timer object is accessed without
synchronization. If the race condition occurs at the wrong time this
can crash the host kernel.
This fixes CVE-2014-3611.
Cc: stable@vger.kernel.org
Signed-off-by: Andrew Honig <ahonig@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/kvm/i8254.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 518d86471b76..298781d4cfb4 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
return;
timer = &pit->pit_state.timer;
+ mutex_lock(&pit->pit_state.lock);
if (hrtimer_cancel(timer))
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
+ mutex_unlock(&pit->pit_state.lock);
}
static void destroy_pit_timer(struct kvm_pit *pit)
--
1.9.3

View File

@ -1,89 +0,0 @@
From: Andy Honig <ahonig@google.com>
Date: Fri, 24 Oct 2014 17:07:13 +0200
Subject: [PATCH] KVM: x86: Prevent host from panicking on shared MSR writes.
The previous patch blocked invalid writes directly when the MSR
is written. As a precaution, prevent future similar mistakes by
gracefulling handle GPs caused by writes to shared MSRs.
Cc: stable@vger.kernel.org
Signed-off-by: Andrew Honig <ahonig@google.com>
[Remove parts obsoleted by Nadav's patch. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 2 +-
arch/x86/kvm/vmx.c | 7 +++++--
arch/x86/kvm/x86.c | 11 ++++++++---
3 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 78d014c83ae3..d16311f4099e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1062,7 +1062,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
void kvm_define_shared_msr(unsigned index, u32 msr);
-void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
+int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3a3e419780df..0881ec6154cc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2632,12 +2632,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
default:
msr = find_msr_entry(vmx, msr_index);
if (msr) {
+ u64 old_msr_data = msr->data;
msr->data = data;
if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
preempt_disable();
- kvm_set_shared_msr(msr->index, msr->data,
- msr->mask);
+ ret = kvm_set_shared_msr(msr->index, msr->data,
+ msr->mask);
preempt_enable();
+ if (ret)
+ msr->data = old_msr_data;
}
break;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1f9a233d8624..9d292e8372d6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -229,20 +229,25 @@ static void kvm_shared_msr_cpu_online(void)
shared_msr_update(i, shared_msrs_global.msrs[i]);
}
-void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
+int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
{
unsigned int cpu = smp_processor_id();
struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
+ int err;
if (((value ^ smsr->values[slot].curr) & mask) == 0)
- return;
+ return 0;
smsr->values[slot].curr = value;
- wrmsrl(shared_msrs_global.msrs[slot], value);
+ err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
+ if (err)
+ return 1;
+
if (!smsr->registered) {
smsr->urn.on_user_return = kvm_on_user_return;
user_return_notifier_register(&smsr->urn);
smsr->registered = true;
}
+ return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
--
1.9.3

View File

@ -74,7 +74,7 @@ Summary: The Linux kernel
%if 0%{?released_kernel}
# Do we have a -stable update to apply?
%define stable_update 23
%define stable_update 24
# Is it a -stable RC?
%define stable_rc 0
# Set rpm version accordingly
@ -753,9 +753,6 @@ Patch25109: revert-input-wacom-testing-result-shows-get_report-is-unnecessary.pa
Patch25110: 0001-ideapad-laptop-Blacklist-rfkill-control-on-the-Lenov.patch
Patch25111: 0002-ideapad-laptop-Change-Lenovo-Yoga-2-series-rfkill-ha.patch
#CVE-2014-7970 rhbz 1151095 1151484
Patch26032: mnt-Prevent-pivot_root-from-creating-a-loop-in-the-m.patch
# CVE-2014-3690 rhbz 1153322 1155372
Patch26060: x86-kvm-vmx-Preserve-CR4-across-VM-entry.patch
@ -768,23 +765,6 @@ Patch26062: net-sctp-fix-panic-on-duplicate-ASCONF-chunks.patch
#CVE-2014-3673 rhbz 1147850 1155727
Patch26063: net-sctp-fix-remote-memory-pressure-from-excessive-q.patch
# CVE-2014-3610 kvm: noncanonical MSR writes (rhbz 1144883 1156543)
# CVE-2014-3611 kvm: PIT timer race condition (rhbz 1144878 1156537)
# CVE-2014-3646 kvm: vmx: invvpid vm exit not handled (rhbz 1144825 1156534)
# CVE-2014-8369 kvm: excessive pages un-pinning in kvm_iommu_map error path (rhbz 1156518 1156522)
Patch26070: KVM-x86-Check-non-canonical-addresses-upon-WRMSR.patch
Patch26071: KVM-x86-Prevent-host-from-panicking-on-shared-MSR-wr.patch
Patch26072: KVM-x86-Improve-thread-safety-in-pit.patch
Patch26073: KVM-x86-Fix-wrong-masking-on-relative-jump-call.patch
Patch26074: KVM-x86-Emulator-fixes-for-eip-canonical-checks-on-n.patch
Patch26075: KVM-x86-Handle-errors-when-RIP-is-set-during-far-jum.patch
Patch26076: kvm-vmx-handle-invvpid-vm-exit-gracefully.patch
Patch26077: kvm-x86-don-t-kill-guest-on-unknown-exit-reason.patch
Patch26082: kvm-fix-excessive-pages-un-pinning-in-kvm_iommu_map-.patch
# CVE-2014-7826 CVE-2014-7825 rhbz 1161565 1161572
Patch26085: tracing-syscalls-Ignore-numbers-outside-NR_syscalls-.patch
#CVE-2014-7841 rhbz 1163087 1163095
Patch26067: net-sctp-fix-NULL-pointer-dereference-in-af-from_add.patch
@ -1486,9 +1466,6 @@ ApplyPatch revert-input-wacom-testing-result-shows-get_report-is-unnecessary.pat
ApplyPatch 0001-ideapad-laptop-Blacklist-rfkill-control-on-the-Lenov.patch
ApplyPatch 0002-ideapad-laptop-Change-Lenovo-Yoga-2-series-rfkill-ha.patch
#CVE-2014-7970 rhbz 1151095 1151484
ApplyPatch mnt-Prevent-pivot_root-from-creating-a-loop-in-the-m.patch
# CVE-2014-3690 rhbz 1153322 1155372
ApplyPatch x86-kvm-vmx-Preserve-CR4-across-VM-entry.patch
@ -1501,23 +1478,6 @@ ApplyPatch net-sctp-fix-panic-on-duplicate-ASCONF-chunks.patch
#CVE-2014-3673 rhbz 1147850 1155727
ApplyPatch net-sctp-fix-remote-memory-pressure-from-excessive-q.patch
# CVE-2014-3610 kvm: noncanonical MSR writes (rhbz 1144883 1156543)
# CVE-2014-3611 kvm: PIT timer race condition (rhbz 1144878 1156537)
# CVE-2014-3646 kvm: vmx: invvpid vm exit not handled (rhbz 1144825 1156534)
# CVE-2014-8369 kvm: excessive pages un-pinning in kvm_iommu_map error path (rhbz 1156518 1156522)
ApplyPatch KVM-x86-Check-non-canonical-addresses-upon-WRMSR.patch
ApplyPatch KVM-x86-Prevent-host-from-panicking-on-shared-MSR-wr.patch
ApplyPatch KVM-x86-Improve-thread-safety-in-pit.patch
ApplyPatch KVM-x86-Fix-wrong-masking-on-relative-jump-call.patch
ApplyPatch KVM-x86-Emulator-fixes-for-eip-canonical-checks-on-n.patch
ApplyPatch KVM-x86-Handle-errors-when-RIP-is-set-during-far-jum.patch
ApplyPatch kvm-vmx-handle-invvpid-vm-exit-gracefully.patch
ApplyPatch kvm-x86-don-t-kill-guest-on-unknown-exit-reason.patch
ApplyPatch kvm-fix-excessive-pages-un-pinning-in-kvm_iommu_map-.patch
# CVE-2014-7826 CVE-2014-7825 rhbz 1161565 1161572
ApplyPatch tracing-syscalls-Ignore-numbers-outside-NR_syscalls-.patch
#CVE-2014-7841 rhbz 1163087 1163095
ApplyPatch net-sctp-fix-NULL-pointer-dereference-in-af-from_add.patch
@ -2336,6 +2296,9 @@ fi
# and build.
%changelog
* Fri Nov 14 2014 Justin M. Forbes <jforbes@fedoraproject.org> - 3.14.24-100
- Linux v3.14.24
* Thu Nov 13 2014 Josh Boyer <jwboyer@fedoraproject.org>
- CVE-2014-7842 kvm: reporting emulation failures to userspace (rhbz 1163762 1163767)

View File

@ -1,78 +0,0 @@
From: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Date: Fri, 24 Oct 2014 17:07:24 +0200
Subject: [PATCH] kvm: fix excessive pages un-pinning in kvm_iommu_map error
path.
The third parameter of kvm_unpin_pages() when called from
kvm_iommu_map_pages() is wrong, it should be the number of pages to un-pin
and not the page size.
This error was facilitated with an inconsistent API: kvm_pin_pages() takes
a size, but kvn_unpin_pages() takes a number of pages, so fix the problem
by matching the two.
This was introduced by commit 350b8bd ("kvm: iommu: fix the third parameter
of kvm_iommu_put_pages (CVE-2014-3601)"), which fixes the lack of
un-pinning for pages intended to be un-pinned (i.e. memory leak) but
unfortunately potentially aggravated the number of pages we un-pin that
should have stayed pinned. As far as I understand though, the same
practical mitigations apply.
This issue was found during review of Red Hat 6.6 patches to prepare
Ksplice rebootless updates.
Thanks to Vegard for his time on a late Friday evening to help me in
understanding this code.
Fixes: 350b8bd ("kvm: iommu: fix the third parameter of... (CVE-2014-3601)")
Cc: stable@vger.kernel.org
Signed-off-by: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Jamie Iles <jamie.iles@oracle.com>
Reviewed-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
virt/kvm/iommu.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 714b94932312..1f0dc1e5f1f0 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -43,13 +43,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
gfn_t base_gfn, unsigned long npages);
static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
- unsigned long size)
+ unsigned long npages)
{
gfn_t end_gfn;
pfn_t pfn;
pfn = gfn_to_pfn_memslot(slot, gfn);
- end_gfn = gfn + (size >> PAGE_SHIFT);
+ end_gfn = gfn + npages;
gfn += 1;
if (is_error_noslot_pfn(pfn))
@@ -119,7 +119,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
* Pin all pages we are about to map in memory. This is
* important because we unmap and unpin in 4kb steps later.
*/
- pfn = kvm_pin_pages(slot, gfn, page_size);
+ pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
if (is_error_noslot_pfn(pfn)) {
gfn += 1;
continue;
@@ -131,7 +131,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
if (r) {
printk(KERN_ERR "kvm_iommu_map_address:"
"iommu failed to map pfn=%llx\n", pfn);
- kvm_unpin_pages(kvm, pfn, page_size);
+ kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
goto unmap_pages;
}
--
1.9.3

View File

@ -1,79 +0,0 @@
From 1d4d4260d85f76b6c07ec165fba0fa81005ae68b Mon Sep 17 00:00:00 2001
From: Petr Matousek <pmatouse@redhat.com>
Date: Fri, 24 Oct 2014 17:07:18 +0200
Subject: [PATCH] kvm: vmx: handle invvpid vm exit gracefully
On systems with invvpid instruction support (corresponding bit in
IA32_VMX_EPT_VPID_CAP MSR is set) guest invocation of invvpid
causes vm exit, which is currently not handled and results in
propagation of unknown exit to userspace.
Fix this by installing an invvpid vm exit handler.
This is CVE-2014-3646.
Cc: stable@vger.kernel.org
Signed-off-by: Petr Matousek <pmatouse@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/include/uapi/asm/vmx.h | 2 ++
arch/x86/kvm/vmx.c | 9 ++++++++-
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 0e79420376eb..990a2fe1588d 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -67,6 +67,7 @@
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_INVEPT 50
#define EXIT_REASON_PREEMPTION_TIMER 52
+#define EXIT_REASON_INVVPID 53
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
#define EXIT_REASON_APIC_WRITE 56
@@ -114,6 +115,7 @@
{ EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
{ EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
{ EXIT_REASON_INVD, "INVD" }, \
+ { EXIT_REASON_INVVPID, "INVVPID" }, \
{ EXIT_REASON_INVPCID, "INVPCID" }
#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f0d9b00cfc9f..4e9a45dab731 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6450,6 +6450,12 @@ static int handle_invept(struct kvm_vcpu *vcpu)
return 1;
}
+static int handle_invvpid(struct kvm_vcpu *vcpu)
+{
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -6495,6 +6501,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op,
[EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op,
[EXIT_REASON_INVEPT] = handle_invept,
+ [EXIT_REASON_INVVPID] = handle_invvpid,
};
static const int kvm_vmx_max_exit_handlers =
@@ -6728,7 +6735,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
- case EXIT_REASON_INVEPT:
+ case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
/*
* VMX instructions trap unconditionally. This allows L1 to
* emulate them for its L2 guest, i.e., allows 3-level nesting!
--
1.9.3

View File

@ -1,54 +0,0 @@
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 24 Oct 2014 17:07:19 +0200
Subject: [PATCH] kvm: x86: don't kill guest on unknown exit reason
KVM_EXIT_UNKNOWN is a kvm bug, we don't really know whether it was
triggered by a priveledged application. Let's not kill the guest: WARN
and inject #UD instead.
Cc: stable@vger.kernel.org
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/kvm/svm.c | 6 +++---
arch/x86/kvm/vmx.c | 6 +++---
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e2de97daa03c..78dadc36fc78 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3534,9 +3534,9 @@ static int handle_exit(struct kvm_vcpu *vcpu)
if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|| !svm_exit_handlers[exit_code]) {
- kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
- kvm_run->hw.hardware_exit_reason = exit_code;
- return 0;
+ WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
}
return svm_exit_handlers[exit_code](svm);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 12dd2b2e655c..41a5426c8edb 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7065,10 +7065,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu);
else {
- vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
- vcpu->run->hw.hardware_exit_reason = exit_reason;
+ WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
}
- return 0;
}
static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
--
1.9.3

View File

@ -1,44 +0,0 @@
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 8 Oct 2014 10:42:27 -0700
Subject: [PATCH] mnt: Prevent pivot_root from creating a loop in the mount
tree
Andy Lutomirski recently demonstrated that when chroot is used to set
the root path below the path for the new ``root'' passed to pivot_root
the pivot_root system call succeeds and leaks mounts.
In examining the code I see that starting with a new root that is
below the current root in the mount tree will result in a loop in the
mount tree after the mounts are detached and then reattached to one
another. Resulting in all kinds of ugliness including a leak of that
mounts involved in the leak of the mount loop.
Prevent this problem by ensuring that the new mount is reachable from
the current root of the mount tree.
Upstream-status: Submitted for 3.18
Bugzilla: 1151095,1151484
Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
fs/namespace.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/fs/namespace.c b/fs/namespace.c
index ef42d9bee212..74647c2fe69c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2820,6 +2820,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
/* make sure we can reach put_old from new_root */
if (!is_path_reachable(old_mnt, old.dentry, &new))
goto out4;
+ /* make certain new is below the root */
+ if (!is_path_reachable(new_mnt, new.dentry, &root))
+ goto out4;
root_mp->m_count++; /* pin it so it won't go away */
lock_mount_hash();
detach_mnt(new_mnt, &parent_path);
--
1.9.3

View File

@ -1,2 +1,2 @@
b621207b3f6ecbb67db18b13258f8ea8 linux-3.14.tar.xz
45a2b9fbe6c9075093fb015f818b4e37 patch-3.14.23.xz
651a92fc1d45c02fa02358bb07e80697 patch-3.14.24.xz

View File

@ -1,88 +0,0 @@
From c0dc842e68fe51dd4096d374159d38292c4aca61 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Wed, 29 Oct 2014 23:06:58 +0100
Subject: [PATCH] tracing/syscalls: Ignore numbers outside NR_syscalls' range
ARM has some private syscalls (for example, set_tls(2)) which lie
outside the range of NR_syscalls. If any of these are called while
syscall tracing is being performed, out-of-bounds array access will
occur in the ftrace and perf sys_{enter,exit} handlers.
# trace-cmd record -e raw_syscalls:* true && trace-cmd report
...
true-653 [000] 384.675777: sys_enter: NR 192 (0, 1000, 3, 4000022, ffffffff, 0)
true-653 [000] 384.675812: sys_exit: NR 192 = 1995915264
true-653 [000] 384.675971: sys_enter: NR 983045 (76f74480, 76f74000, 76f74b28, 76f74480, 76f76f74, 1)
true-653 [000] 384.675988: sys_exit: NR 983045 = 0
...
# trace-cmd record -e syscalls:* true
[ 17.289329] Unable to handle kernel paging request at virtual address aaaaaace
[ 17.289590] pgd = 9e71c000
[ 17.289696] [aaaaaace] *pgd=00000000
[ 17.289985] Internal error: Oops: 5 [#1] PREEMPT SMP ARM
[ 17.290169] Modules linked in:
[ 17.290391] CPU: 0 PID: 704 Comm: true Not tainted 3.18.0-rc2+ #21
[ 17.290585] task: 9f4dab00 ti: 9e710000 task.ti: 9e710000
[ 17.290747] PC is at ftrace_syscall_enter+0x48/0x1f8
[ 17.290866] LR is at syscall_trace_enter+0x124/0x184
Fix this by ignoring out-of-NR_syscalls-bounds syscall numbers.
Commit cd0980fc8add "tracing: Check invalid syscall nr while tracing syscalls"
added the check for less than zero, but it should have also checked
for greater than NR_syscalls.
Link: http://lkml.kernel.org/p/1414620418-29472-1-git-send-email-rabin@rab.in
Fixes: cd0980fc8add "tracing: Check invalid syscall nr while tracing syscalls"
Cc: stable@vger.kernel.org # 2.6.33+
Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
kernel/trace/trace_syscalls.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 759d5e004517..7e3cd7aaec83 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -313,7 +313,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
int size;
syscall_nr = trace_get_syscall_nr(current, regs);
- if (syscall_nr < 0)
+ if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
return;
/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
@@ -360,7 +360,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
int syscall_nr;
syscall_nr = trace_get_syscall_nr(current, regs);
- if (syscall_nr < 0)
+ if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
return;
/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
@@ -567,7 +567,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
int size;
syscall_nr = trace_get_syscall_nr(current, regs);
- if (syscall_nr < 0)
+ if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
return;
if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
return;
@@ -641,7 +641,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
int size;
syscall_nr = trace_get_syscall_nr(current, regs);
- if (syscall_nr < 0)
+ if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
return;
if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
return;
--
1.9.3