qemu/0057-tcg-hppa-Fix-broken-lo...

250 lines
10 KiB
Diff

From c13ecfea174994d3f7f7d392f0faaed6d40efd9e Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Tue, 18 Sep 2012 19:59:48 -0700
Subject: [PATCH] tcg-hppa: Fix broken load/store helpers
The CONFIG_TCG_PASS_AREG0 code for calling ld/st helpers
was not respecting the ABI requirement for 64-bit values
being aligned in registers.
Mirror the ARM port in use of helper functions to marshal
arguments into the correct registers.
Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
tcg/hppa/tcg-target.c | 136 +++++++++++++++++++++++++++-----------------------
1 file changed, 74 insertions(+), 62 deletions(-)
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index a76569d..5385d45 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -976,10 +976,11 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset);
}
- /* Compute the value that ought to appear in the TLB for a hit, namely, the page
- of the address. We include the low N bits of the address to catch unaligned
- accesses and force them onto the slow path. Do this computation after having
- issued the load from the TLB slot to give the load time to complete. */
+ /* Compute the value that ought to appear in the TLB for a hit, namely,
+ the page of the address. We include the low N bits of the address
+ to catch unaligned accesses and force them onto the slow path. Do
+ this computation after having issued the load from the TLB slot to
+ give the load time to complete. */
tcg_out_andi(s, r0, addrlo, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
/* If not equal, jump to lab_miss. */
@@ -992,6 +993,36 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
return ret;
}
+
+static int tcg_out_arg_reg32(TCGContext *s, int argno, TCGArg v, bool vconst)
+{
+ if (argno < 4) {
+ if (vconst) {
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v);
+ }
+ } else {
+ if (vconst && v != 0) {
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R20, v);
+ v = TCG_REG_R20;
+ }
+ tcg_out_st(s, TCG_TYPE_I32, v, TCG_REG_CALL_STACK,
+ TCG_TARGET_CALL_STACK_OFFSET - ((argno - 3) * 4));
+ }
+ return argno + 1;
+}
+
+static int tcg_out_arg_reg64(TCGContext *s, int argno, TCGArg vl, TCGArg vh)
+{
+ /* 64-bit arguments must go in even reg pairs and stack slots. */
+ if (argno & 1) {
+ argno++;
+ }
+ argno = tcg_out_arg_reg32(s, argno, vl, false);
+ argno = tcg_out_arg_reg32(s, argno, vh, false);
+ return argno;
+}
#endif
static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo_reg, int datahi_reg,
@@ -1072,39 +1103,36 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
/* Note that addrhi_reg is only used for 64-bit guests. */
int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
int mem_index = *args;
- int lab1, lab2, argreg, offset;
+ int lab1, lab2, argno, offset;
lab1 = gen_new_label();
lab2 = gen_new_label();
offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
- offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
- opc & 3, lab1, offset);
+ offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg,
+ addrhi_reg, opc & 3, lab1, offset);
/* TLB Hit. */
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20,
+ (offset ? TCG_REG_R1 : TCG_REG_R25),
offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
- tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, TCG_REG_R20, opc);
+ tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg,
+ TCG_REG_R20, opc);
tcg_out_branch(s, lab2, 1);
/* TLB Miss. */
/* label1: */
tcg_out_label(s, lab1, s->code_ptr);
- argreg = TCG_REG_R26;
- tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
+ argno = 0;
+ argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false);
if (TARGET_LONG_BITS == 64) {
- tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
+ argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg);
+ } else {
+ argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false);
}
- tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
-
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
+ argno = tcg_out_arg_reg32(s, argno, mem_index, true);
+
tcg_out_call(s, qemu_ld_helpers[opc & 3]);
switch (opc) {
@@ -1140,8 +1168,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
#endif
}
-static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg, int datahi_reg,
- int addr_reg, int opc)
+static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg,
+ int datahi_reg, int addr_reg, int opc)
{
#ifdef TARGET_WORDS_BIGENDIAN
const int bswap = 0;
@@ -1194,17 +1222,18 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
/* Note that addrhi_reg is only used for 64-bit guests. */
int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
int mem_index = *args;
- int lab1, lab2, argreg, offset;
+ int lab1, lab2, argno, next, offset;
lab1 = gen_new_label();
lab2 = gen_new_label();
offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
- offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
- opc, lab1, offset);
+ offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg,
+ addrhi_reg, opc, lab1, offset);
/* TLB Hit. */
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20,
+ (offset ? TCG_REG_R1 : TCG_REG_R25),
offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
/* There are no indexed stores, so we must do this addition explitly.
@@ -1217,63 +1246,46 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
/* label1: */
tcg_out_label(s, lab1, s->code_ptr);
- argreg = TCG_REG_R26;
- tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
+ argno = 0;
+ argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false);
if (TARGET_LONG_BITS == 64) {
- tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
+ argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg);
+ } else {
+ argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false);
}
+ next = (argno < 4 ? tcg_target_call_iarg_regs[argno] : TCG_REG_R20);
switch(opc) {
case 0:
- tcg_out_andi(s, argreg--, datalo_reg, 0xff);
- tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+ tcg_out_andi(s, next, datalo_reg, 0xff);
+ argno = tcg_out_arg_reg32(s, argno, next, false);
break;
case 1:
- tcg_out_andi(s, argreg--, datalo_reg, 0xffff);
- tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+ tcg_out_andi(s, next, datalo_reg, 0xffff);
+ argno = tcg_out_arg_reg32(s, argno, next, false);
break;
case 2:
- tcg_out_mov(s, TCG_TYPE_I32, argreg--, datalo_reg);
- tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+ argno = tcg_out_arg_reg32(s, argno, datalo_reg, false);
break;
case 3:
- /* Because of the alignment required by the 64-bit data argument,
- we will always use R23/R24. Also, we will always run out of
- argument registers for storing mem_index, so that will have
- to go on the stack. */
- if (mem_index == 0) {
- argreg = TCG_REG_R0;
- } else {
- argreg = TCG_REG_R20;
- tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
- }
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R23, datahi_reg);
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R24, datalo_reg);
- tcg_out_st(s, TCG_TYPE_I32, argreg, TCG_REG_CALL_STACK,
- TCG_TARGET_CALL_STACK_OFFSET - 4);
+ argno = tcg_out_arg_reg64(s, argno, datalo_reg, datahi_reg);
break;
default:
tcg_abort();
}
+ argno = tcg_out_arg_reg32(s, argno, mem_index, true);
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
- tcg_target_call_iarg_regs[2]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
tcg_out_call(s, qemu_st_helpers[opc]);
/* label2: */
tcg_out_label(s, lab2, s->code_ptr);
#else
- /* There are no indexed stores, so if GUEST_BASE is set we must do the add
- explicitly. Careful to avoid R20, which is used for the bswaps to follow. */
+ /* There are no indexed stores, so if GUEST_BASE is set we must do
+ the add explicitly. Careful to avoid R20, which is used for the
+ bswaps to follow. */
if (GUEST_BASE != 0) {
- tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_GUEST_BASE_REG, INSN_ADDL);
+ tcg_out_arith(s, TCG_REG_R31, addrlo_reg,
+ TCG_GUEST_BASE_REG, INSN_ADDL);
addrlo_reg = TCG_REG_R31;
}
tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, addrlo_reg, opc);
--
1.7.12.1