170 lines
7.5 KiB
Diff
170 lines
7.5 KiB
Diff
|
From 7482be857b0d55189020bef699b65903be9c256a Mon Sep 17 00:00:00 2001
|
||
|
From: Aurelien Jarno <aurelien@aurel32.net>
|
||
|
Date: Tue, 9 Oct 2012 21:53:11 +0200
|
||
|
Subject: [PATCH] tcg/arm: fix TLB access in qemu-ld/st ops
|
||
|
|
||
|
The TCG arm backend considers likely that the offset to the TLB
|
||
|
entries does not exceed 12 bits for mem_index = 0. In practice this is
|
||
|
not true for at least the MIPS target.
|
||
|
|
||
|
The current patch fixes that by loading the bits 23-12 with a separate
|
||
|
instruction, and using loads with address writeback, independently of
|
||
|
the value of mem_idx. In total this allow a 24-bit offset, which is a
|
||
|
lot more than needed.
|
||
|
|
||
|
Cc: Andrzej Zaborowski <balrogg@gmail.com>
|
||
|
Cc: Peter Maydell <peter.maydell@linaro.org>
|
||
|
Cc: qemu-stable@nongnu.org
|
||
|
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
|
||
|
(cherry picked from commit d17bd1d8cc27f8c1a24c65f555a77a661c332b7f)
|
||
|
|
||
|
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||
|
---
|
||
|
tcg/arm/tcg-target.c | 78 ++++++++++++++++++++++++++++------------------------
|
||
|
1 file changed, 42 insertions(+), 36 deletions(-)
|
||
|
|
||
|
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
|
||
|
index aed3b53..fbad716 100644
|
||
|
--- a/tcg/arm/tcg-target.c
|
||
|
+++ b/tcg/arm/tcg-target.c
|
||
|
@@ -630,6 +630,22 @@ static inline void tcg_out_ld32_12(TCGContext *s, int cond,
|
||
|
(rn << 16) | (rd << 12) | ((-im) & 0xfff));
|
||
|
}
|
||
|
|
||
|
+/* Offset pre-increment with base writeback. */
|
||
|
+static inline void tcg_out_ld32_12wb(TCGContext *s, int cond,
|
||
|
+ int rd, int rn, tcg_target_long im)
|
||
|
+{
|
||
|
+ /* ldr with writeback and both register equals is UNPREDICTABLE */
|
||
|
+ assert(rd != rn);
|
||
|
+
|
||
|
+ if (im >= 0) {
|
||
|
+ tcg_out32(s, (cond << 28) | 0x05b00000 |
|
||
|
+ (rn << 16) | (rd << 12) | (im & 0xfff));
|
||
|
+ } else {
|
||
|
+ tcg_out32(s, (cond << 28) | 0x05300000 |
|
||
|
+ (rn << 16) | (rd << 12) | ((-im) & 0xfff));
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
static inline void tcg_out_st32_12(TCGContext *s, int cond,
|
||
|
int rd, int rn, tcg_target_long im)
|
||
|
{
|
||
|
@@ -1062,7 +1078,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
|
||
|
{
|
||
|
int addr_reg, data_reg, data_reg2, bswap;
|
||
|
#ifdef CONFIG_SOFTMMU
|
||
|
- int mem_index, s_bits;
|
||
|
+ int mem_index, s_bits, tlb_offset;
|
||
|
TCGReg argreg;
|
||
|
# if TARGET_LONG_BITS == 64
|
||
|
int addr_reg2;
|
||
|
@@ -1102,19 +1118,15 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
|
||
|
TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
|
||
|
tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0,
|
||
|
TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
|
||
|
- /* In the
|
||
|
- * ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_read))]
|
||
|
- * below, the offset is likely to exceed 12 bits if mem_index != 0 and
|
||
|
- * not exceed otherwise, so use an
|
||
|
- * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
|
||
|
- * before.
|
||
|
- */
|
||
|
- if (mem_index)
|
||
|
+ /* We assume that the offset is contained within 20 bits. */
|
||
|
+ tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
|
||
|
+ assert(tlb_offset & ~0xfffff == 0);
|
||
|
+ if (tlb_offset > 0xfff) {
|
||
|
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
|
||
|
- (mem_index << (TLB_SHIFT & 1)) |
|
||
|
- ((16 - (TLB_SHIFT >> 1)) << 8));
|
||
|
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
|
||
|
- offsetof(CPUArchState, tlb_table[0][0].addr_read));
|
||
|
+ 0xa00 | (tlb_offset >> 12));
|
||
|
+ tlb_offset &= 0xfff;
|
||
|
+ }
|
||
|
+ tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset);
|
||
|
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
|
||
|
TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
|
||
|
/* Check alignment. */
|
||
|
@@ -1122,15 +1134,14 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
|
||
|
tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
|
||
|
0, addr_reg, (1 << s_bits) - 1);
|
||
|
# if TARGET_LONG_BITS == 64
|
||
|
- /* XXX: possibly we could use a block data load or writeback in
|
||
|
- * the first access. */
|
||
|
- tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
|
||
|
- offsetof(CPUArchState, tlb_table[0][0].addr_read) + 4);
|
||
|
+ /* XXX: possibly we could use a block data load in the first access. */
|
||
|
+ tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4);
|
||
|
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
|
||
|
TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
|
||
|
# endif
|
||
|
tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
|
||
|
- offsetof(CPUArchState, tlb_table[0][0].addend));
|
||
|
+ offsetof(CPUTLBEntry, addend)
|
||
|
+ - offsetof(CPUTLBEntry, addr_read));
|
||
|
|
||
|
switch (opc) {
|
||
|
case 0:
|
||
|
@@ -1288,7 +1299,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
|
||
|
{
|
||
|
int addr_reg, data_reg, data_reg2, bswap;
|
||
|
#ifdef CONFIG_SOFTMMU
|
||
|
- int mem_index, s_bits;
|
||
|
+ int mem_index, s_bits, tlb_offset;
|
||
|
TCGReg argreg;
|
||
|
# if TARGET_LONG_BITS == 64
|
||
|
int addr_reg2;
|
||
|
@@ -1325,19 +1336,15 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
|
||
|
TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
|
||
|
tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0,
|
||
|
TCG_AREG0, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
|
||
|
- /* In the
|
||
|
- * ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_write))]
|
||
|
- * below, the offset is likely to exceed 12 bits if mem_index != 0 and
|
||
|
- * not exceed otherwise, so use an
|
||
|
- * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
|
||
|
- * before.
|
||
|
- */
|
||
|
- if (mem_index)
|
||
|
+ /* We assume that the offset is contained within 20 bits. */
|
||
|
+ tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
|
||
|
+ assert(tlb_offset & ~0xfffff == 0);
|
||
|
+ if (tlb_offset > 0xfff) {
|
||
|
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
|
||
|
- (mem_index << (TLB_SHIFT & 1)) |
|
||
|
- ((16 - (TLB_SHIFT >> 1)) << 8));
|
||
|
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
|
||
|
- offsetof(CPUArchState, tlb_table[0][0].addr_write));
|
||
|
+ 0xa00 | (tlb_offset >> 12));
|
||
|
+ tlb_offset &= 0xfff;
|
||
|
+ }
|
||
|
+ tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset);
|
||
|
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
|
||
|
TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
|
||
|
/* Check alignment. */
|
||
|
@@ -1345,15 +1352,14 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
|
||
|
tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
|
||
|
0, addr_reg, (1 << s_bits) - 1);
|
||
|
# if TARGET_LONG_BITS == 64
|
||
|
- /* XXX: possibly we could use a block data load or writeback in
|
||
|
- * the first access. */
|
||
|
- tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
|
||
|
- offsetof(CPUArchState, tlb_table[0][0].addr_write) + 4);
|
||
|
+ /* XXX: possibly we could use a block data load in the first access. */
|
||
|
+ tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4);
|
||
|
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
|
||
|
TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
|
||
|
# endif
|
||
|
tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
|
||
|
- offsetof(CPUArchState, tlb_table[0][0].addend));
|
||
|
+ offsetof(CPUTLBEntry, addend)
|
||
|
+ - offsetof(CPUTLBEntry, addr_write));
|
||
|
|
||
|
switch (opc) {
|
||
|
case 0:
|
||
|
--
|
||
|
1.8.0.2
|
||
|
|