From fd1ce7e33887f7299b5630a0a3bf6a635ad9560a Mon Sep 17 00:00:00 2001 From: David Abdurachmanov Date: Sun, 2 Feb 2025 06:37:01 +0200 Subject: [PATCH] Revert "Backport two RISCV [target] fixes from trunk" This reverts commit 108ae1dd96f64e2f046264d02e13c39ab650b53b. Signed-off-by: David Abdurachmanov --- gcc.spec | 14 +-- gcc15-pr116256.patch | 227 ------------------------------------------- gcc15-pr118103.patch | 217 ----------------------------------------- 3 files changed, 1 insertion(+), 457 deletions(-) delete mode 100644 gcc15-pr116256.patch delete mode 100644 gcc15-pr118103.patch diff --git a/gcc.spec b/gcc.spec index 61a6ea5..f9debcb 100644 --- a/gcc.spec +++ b/gcc.spec @@ -143,7 +143,7 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: gcc Version: %{gcc_version} -Release: %{gcc_release}.4.0.riscv64%{?dist} +Release: %{gcc_release}.4%{?dist} # License notes for some of the less obvious ones: # gcc/doc/cppinternals.texi: Linux-man-pages-copyleft-2-para # isl: MIT, BSD-2-Clause @@ -302,10 +302,6 @@ Patch12: gcc15-pr118206.patch Patch13: gcc15-d-deps.patch Patch14: gcc15-pr117231.patch -# RISCV -Patch20: gcc15-pr116256.patch -Patch21: gcc15-pr118103.patch - Patch50: isl-rh2155127.patch Patch100: gcc15-fortran-fdec-duplicates.patch @@ -923,10 +919,6 @@ so that there cannot be any synchronization problems. %patch -P13 -p0 -b .d-deps~ %patch -P14 -p0 -b .pr117231~ -# RISCV -%patch -P20 -p0 -b .pr116256~ -%patch -P21 -p0 -b .pr118103~ - %patch -P50 -p0 -b .rh2155127~ touch -r isl-0.24/m4/ax_prog_cxx_for_build.m4 isl-0.24/m4/ax_prog_cc_for_build.m4 @@ -3685,10 +3677,6 @@ end %endif %changelog -* Sun Jan 26 2025 David Abdurachmanov 15.0.1-0.4.0.riscv64 -- Pull riscv fixes from trunk - - PRs target/116256, target/118103 - * Sat Jan 25 2025 Jakub Jelinek 15.0.1-0.4 - update from trunk - PRs c/118639, c++/105440, c++/107522, c++/107741, c++/115769, c++/116417, diff --git a/gcc15-pr116256.patch b/gcc15-pr116256.patch deleted file mode 100644 index 89f0ee5..0000000 --- a/gcc15-pr116256.patch +++ /dev/null @@ -1,227 +0,0 @@ -From e5990a6ce611f522b8f48c2b469983da19d39777 Mon Sep 17 00:00:00 2001 -From: Jeff Law -Date: Sat, 25 Jan 2025 09:42:19 -0700 -Subject: [PATCH] [RISC-V][PR target/116256] Improve handling of single bit - constants - -So under the umbrella of pr116256 (P3 regression) I've been exploring removal -of the mvconst_internal pattern. Not surprisingly, that's going to cause all -kinds of undesirable fallout. While I can kind of see a path forward for that -work, it's going to require some combine work that I don't think we want to -tackle in the context of gcc-15. - -Essentially without mvconst_internal we'll have fully exposed constant -synthesis prior to combine. Remember that combine has limits on what -combinations it will perform based on how many instructions are in the source -sequence. If we need 2+ instructions to synthesize the constant, those eat -into our budget. - -In a world without mvconst_internal we'd need to either improve combine to -handle 5 insns cases (which do show up in the testsuite) or we need to -significantly improve how combine handles REG_EQUAL notes. 5 insn combinations -sound like insanity to me. So I'd tend to lean towards the latter, though -that's going to need some refactoring and diving into note redistribution -(ugh!). - -In the mean time we can start limiting mvconst_internal. For the remaining -case in pr116256 we have this code in combine: - -> (insn 8 5 10 2 (set (reg:V2048HF 138 [ _5 ]) -> (vec_duplicate:V2048HF (reg:HF 142 [ x ]))) "j.c":152:11 3712 {*vec_duplicatev2048hf} -> (expr_list:REG_DEAD (reg:HF 142 [ x ]) -> (nil))) -> (insn 10 8 11 2 (set (reg:DI 139) -> (const_int 2048 [0x800])) "j.c":152:11 275 {*mvconst_internal} -> (nil)) (insn 11 10 0 2 (set (mem:V2048HF (reg/f:DI 141 [ in ]) [1 MEM [(_Float16 *)in_7(D)]+0 S4096 A128]) -> (if_then_else:V2048HF (unspec:V2048BI [ -> (const_vector:V2048BI [ -> (const_int 1 [0x1]) repeated x2048 -> ]) -> (reg:DI 139) -> (const_int 2 [0x2]) repeated x3 -> (reg:SI 66 vl) -> (reg:SI 67 vtype) -> ] UNSPEC_VPREDICATE) -> (reg:V2048HF 138 [ _5 ]) -> (unspec:V2048HF [ -> (reg:DI 0 zero) -> ] UNSPEC_VUNDEF))) "j.c":152:11 3843 {*pred_movv2048hf} -> (expr_list:REG_DEAD (reg/f:DI 141 [ in ]) -> (expr_list:REG_DEAD (reg:DI 0 zero) -> (expr_list:REG_DEAD (reg:SI 66 vl) -> (expr_list:REG_DEAD (reg:SI 67 vtype) -> (expr_list:REG_DEAD (reg:V2048HF 138 [ _5 ]) -> (expr_list:REG_DEAD (reg:DI 139) -> (nil)))))))) - -Note a couple things. First insn 8 will be split shortly after combine and -will need the constant 2048. But that's obviously exposed late. Second (of -course) is the mvconst_internal pattern at insn 10. After split1 we'll have: - -> (insn 16 5 17 2 (set (reg:DI 144) (const_int 4096 [0x1000])) "j.c":152:11 -1 -> (nil)) -> (insn 17 16 18 2 (set (reg:DI 143) -> (plus:DI (reg:DI 144) -> (const_int -2048 [0xfffffffffffff800]))) "j.c":152:11 -1 -> (expr_list:REG_EQUAL (const_int 2048 [0x800]) -> (nil))) -> (insn 18 17 19 2 (set (reg:V2048HF 138 [ _5 ]) -> (if_then_else:V2048HF (unspec:V2048BI [ (const_vector:V2048BI [ -> (const_int 1 [0x1]) repeated x2048 -> ]) -> (reg:DI 143) -> (const_int 2 [0x2]) repeated x3 -> (reg:SI 66 vl) -> (reg:SI 67 vtype) -> ] UNSPEC_VPREDICATE) -> (vec_duplicate:V2048HF (reg:HF 142 [ x ])) -> (unspec:V2048HF [ (reg:DI 0 zero) -> ] UNSPEC_VUNDEF))) "j.c":152:11 -1 -> (nil)) -> (insn 19 18 20 2 (set (reg:DI 145) -> (const_int 4096 [0x1000])) "j.c":152:11 -1 -> (nil)) -> (insn 20 19 11 2 (set (reg:DI 139) -> (plus:DI (reg:DI 145) -> (const_int -2048 [0xfffffffffffff800]))) "j.c":152:11 -1 -> (expr_list:REG_EQUAL (const_int 2048 [0x800]) -> (nil))) -> (insn 11 20 0 2 (set (mem:V2048HF (reg/f:DI 141 [ in ]) [1 MEM [(_Float16 *)in_7(D)]+0 S4096 A128]) -> (if_then_else:V2048HF (unspec:V2048BI [ -> (const_vector:V2048BI [ -> (const_int 1 [0x1]) repeated x2048 -> ]) -> (reg:DI 139) (const_int 2 [0x2]) repeated x3 -> (reg:SI 66 vl) -> (reg:SI 67 vtype) -> ] UNSPEC_VPREDICATE) -> (reg:V2048HF 138 [ _5 ]) -> (unspec:V2048HF [ (reg:DI 0 zero) -> ] UNSPEC_VUNDEF))) "j.c":152:11 3843 {*pred_movv2048hf} -> (expr_list:REG_DEAD (reg/f:DI 141 [ in ]) -> (expr_list:REG_DEAD (reg:DI 0 zero) (expr_list:REG_DEAD (reg:SI 66 vl) -> (expr_list:REG_DEAD (reg:SI 67 vtype) -> (expr_list:REG_DEAD (reg:V2048HF 138 [ _5 ]) -> (expr_list:REG_DEAD (reg:DI 139) -> (nil)))))))) -Note the synthesis of 2048 appears twice. I seriously considered adding a -local cprop pass at this point. That could be done with a bit of work. It -didn't look too bad -- the biggest problem is cprop isn't designed to run once -we've left cfglayout. But we could probably finesse that by not allowing it to -change jumps if we've left cfglayout or converting it to do the more complex -jump fixups. - -You might ask why the post-reload optimizers don't help since this at least -looks like a case where they could. After LRA the RTL looks like: - -> (insn 26 5 25 2 (set (reg:DI 15 a5 [144]) -> (const_int 4096 [0x1000])) "/home/jlaw/test/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c":152:11 277 {*movdi_64bit} (expr_list:REG_EQUIV (const_int 4096 [0x1000]) -> (nil))) -> (insn 25 26 19 2 (set (reg:DI 15 a5 [143]) -> (plus:DI (reg:DI 15 a5 [144]) -> (const_int -2048 [0xfffffffffffff800]))) "/home/jlaw/test/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c":152:11 5 {adddi3} -> (expr_list:REG_EQUIV (const_int 2048 [0x800]) -> (nil))) -> (insn 19 25 20 2 (set (reg:V2048QI 100 v4 [orig:138 _11 ] [138]) -> (if_then_else:V2048QI (unspec:V2048BI [ -> (const_vector:V2048BI [ -> (const_int 1 [0x1]) repeated x2048 -> ]) -> (reg:DI 15 a5 [143]) -> (const_int 2 [0x2]) repeated x3 -> (reg:SI 66 vl) -> (reg:SI 67 vtype) -> ] UNSPEC_VPREDICATE) -> (vec_duplicate:V2048QI (reg:QI 12 a2 [145])) -> (unspec:V2048QI [ (reg:DI 0 zero) -> ] UNSPEC_VUNDEF))) "/home/jlaw/test/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c":152:11 4172 {*pred_broadcastv2048qi} -> (nil)) (insn 20 19 21 2 (set (reg:DI 15 a5 [146]) -> (const_int 4096 [0x1000])) "/home/jlaw/test/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c":152:11 277 {*movdi_64bit} (expr_list:REG_EQUIV (const_int 4096 [0x1000]) -> (nil))) -> (insn 21 20 11 2 (set (reg:DI 15 a5 [139]) -> (plus:DI (reg:DI 15 a5 [146]) -> (const_int -2048 [0xfffffffffffff800]))) "/home/jlaw/test/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c":152:11 5 {adddi3} -> (expr_list:REG_EQUIV (const_int 2048 [0x800]) -> (nil))) - -Note the re-use of a5 for the constant synthesis steps. That's going to spoil -any chance of reload_cse saving us. That re-use also gets in the way of vsetvl -elimination and we ultimately get this code: - -> foo10: -> li a5,4096 -> addi a5,a5,-2048 -> vsetvli zero,a5,e16,m8,ta,ma -> vfmv.v.f v8,fa0 -> li a5,4096 -> addi a5,a5,-2048 -> vsetvli zero,a5,e16,m8,ta,ma -> vse16.v v8,0(a0) -> ret -The regression is we have the obviously redundant vsetvl. The additional copy -of the synthesis is undesirable as well. - -If we filter out single bit constants from mvconst_internal we trivially fix -that regression. The only fallout is a class of saturation tests which want to -test against 0x80000000. Under the hood this is a minor codegen issue -interacting badly with combine's deliberate rejection of simplification of -extensions of constants. Rather than constructing the SImode constant, then -zero extending the result we can just generate the constant we actually want -directly in DImode. - -The net is we fix the regression, don't introduce any obvious new regressions -and slightly reduce our dependence on mvconst_internal. All good in my book. -Obviously I'll wait for pre-commit CI to render a verdict. - - PR target/116256 -gcc/ - * config/riscv/riscv.md (mvconst_internal): Reject single bit - constants. - * config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Improve - handling constants. ---- - gcc/config/riscv/riscv.cc | 12 +++++++++--- - gcc/config/riscv/riscv.md | 3 ++- - 2 files changed, 11 insertions(+), 4 deletions(-) - -diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc -index 5a3a05041773..4652454b8fec 100644 ---- a/gcc/config/riscv/riscv.cc -+++ b/gcc/config/riscv/riscv.cc -@@ -12684,10 +12684,16 @@ riscv_gen_zero_extend_rtx (rtx x, machine_mode mode) - emit_move_insn (xmode_reg, x); - else - { -- rtx reg_x = gen_reg_rtx (mode); -+ /* Combine deliberately does not simplify extensions of constants -+ (long story). So try to generate the zero extended constant -+ efficiently. - -- emit_move_insn (reg_x, x); -- riscv_emit_unary (ZERO_EXTEND, xmode_reg, reg_x); -+ First extract the constant and mask off all the bits not in MODE. */ -+ HOST_WIDE_INT val = INTVAL (x); -+ val &= GET_MODE_MASK (mode); -+ -+ /* X may need synthesis, so do not blindly copy it. */ -+ xmode_reg = force_reg (Xmode, gen_int_mode (val, Xmode)); - } - - return xmode_reg; -diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md -index e4123c912dcb..09053df1eb9b 100644 ---- a/gcc/config/riscv/riscv.md -+++ b/gcc/config/riscv/riscv.md -@@ -2470,7 +2470,8 @@ - (match_operand:GPR 1 "splittable_const_int_operand" "i"))] - "!ira_in_progress - && !(p2m1_shift_operand (operands[1], mode) -- || high_mask_shift_operand (operands[1], mode))" -+ || high_mask_shift_operand (operands[1], mode) -+ || exact_log2 (INTVAL (operands[1])) >= 0)" - "#" - "&& 1" - [(const_int 0)] --- -2.43.5 - diff --git a/gcc15-pr118103.patch b/gcc15-pr118103.patch deleted file mode 100644 index 3cb654e..0000000 --- a/gcc15-pr118103.patch +++ /dev/null @@ -1,217 +0,0 @@ -From 55d288d4ff5360c572f2a017ba9385840ac5134e Mon Sep 17 00:00:00 2001 -From: Pan Li -Date: Sat, 25 Jan 2025 15:45:10 +0800 -Subject: [PATCH] RISC-V: Make FRM as global register [PR118103] -MIME-Version: 1.0 -Content-Type: text/plain; charset=utf8 -Content-Transfer-Encoding: 8bit - -After we enabled the labe-combine pass after the mode-switching pass, it -will try to combine below insn patterns into op. Aka: - -(insn 40 5 41 2 (set (reg:SI 11 a1 [151]) - (reg:SI 69 frm)) "pr118103-simple.c":67:15 2712 {frrmsi} - (nil)) -(insn 41 40 7 2 (set (reg:SI 69 frm) - (const_int 2 [0x2])) "pr118103-simple.c":69:8 2710 {fsrmsi_restore} - (nil)) -(insn 42 10 11 2 (set (reg:SI 69 frm) - (reg:SI 11 a1 [151])) "pr118103-simple.c":70:8 2710 {fsrmsi_restore} - (nil)) - -trying to combine definition of r11 in: -40: a1:SI=frm:SI - into: -42: frm:SI=a1:SI - instruction becomes a no-op: -(set (reg:SI 69 frm) -(reg:SI 69 frm)) -original cost = 4 + 4 (weighted: 8.000000), replacement cost = -2147483647; keeping replacement -rescanning insn with uid = 42. -updating insn 42 in-place -verify found no changes in insn with uid = 42. -deleting insn 40 - -For example we have code as blow: - 9 │ int test_exampe () { - 10 │ test (); - 11 │ - 12 │ size_t vl = 4; - 13 │ vfloat16m1_t va = __riscv_vle16_v_f16m1(a, vl); - 14 │ va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl); - 15 │ va = __riscv_vfmsac_vv_f16m1(va, va, va, vl); - 16 │ - 17 │ __riscv_vse16_v_f16m1(b, va, vl); - 18 │ - 19 │ return 0; - 20 │ } - -it will be compiled to: - 53 │ main: - 54 │ addi sp,sp,-16 - 55 │ sd ra,8(sp) - 56 │ call initialize - 57 │ lui a6,%hi(b) - 58 │ lui a2,%hi(a) - 59 │ addi a3,a6,%lo(b) - 60 │ addi a2,a2,%lo(a) - 61 │ li a4,4 - 62 │ .L8: - 63 │ fsrmi 2 - 64 │ vsetvli a5,a4,e16,m1,ta,ma - 65 │ vle16.v v1,0(a2) - 66 │ slli a1,a5,1 - 67 │ subw a4,a4,a5 - 68 │ add a2,a2,a1 - 69 │ vfnmadd.vv v1,v1,v1 - >> The fsrm a0 insn is deleted by late-combine << - 70 │ vfmsub.vv v1,v1,v1 - 71 │ vse16.v v1,0(a3) - 72 │ add a3,a3,a1 - 73 │ bgt a4,zero,.L8 - 74 │ lh a4,%lo(b)(a6) - 75 │ li a5,-20480 - 76 │ addi a5,a5,-1382 - 77 │ bne a4,a5,.L14 - 78 │ ld ra,8(sp) - 79 │ li a0,0 - 80 │ addi sp,sp,16 - 81 │ jr ra - -This patch would like to add the FRM register to the global_regs as it -is a cooperatively-managed global register. And then the fsrm insn will -not be eliminated by late-combine. The related spec17 cam4 failure may -also caused by this issue too. - -The below test suites are passed for this patch. -* The rv64gcv fully regression test. - - PR target/118103 - -gcc/ChangeLog: - - * config/riscv/riscv.cc (riscv_conditional_register_usage): Add - the FRM as the global_regs. - -gcc/testsuite/ChangeLog: - - * gcc.target/riscv/rvv/base/pr118103-1.c: New test. - * gcc.target/riscv/rvv/base/pr118103-run-1.c: New test. - -Signed-off-by: Pan Li ---- - gcc/config/riscv/riscv.cc | 4 +- - .../gcc.target/riscv/rvv/base/pr118103-1.c | 27 ++++++++++ - .../riscv/rvv/base/pr118103-run-1.c | 50 +++++++++++++++++++ - 3 files changed, 80 insertions(+), 1 deletion(-) - create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c - create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c - -diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc -index 4652454b8fec..dd50fe4eddfb 100644 ---- a/gcc/config/riscv/riscv.cc -+++ b/gcc/config/riscv/riscv.cc -@@ -10885,7 +10885,9 @@ riscv_conditional_register_usage (void) - call_used_regs[r] = 1; - } - -- if (!TARGET_HARD_FLOAT) -+ if (TARGET_HARD_FLOAT) -+ global_regs[FRM_REGNUM] = 1; -+ else - { - for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) - fixed_regs[regno] = call_used_regs[regno] = 1; -diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c -new file mode 100644 -index 000000000000..1afa5d3afb50 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c -@@ -0,0 +1,27 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3 -march=rv64gcv_zvfh -mabi=lp64d" } */ -+ -+#include "riscv_vector.h" -+ -+#define N 4 -+typedef _Float16 float16_t; -+float16_t a[N]; float16_t b[N]; -+ -+extern void test (); -+ -+int test_exampe () { -+ test (); -+ -+ size_t vl = N; -+ vfloat16m1_t va = __riscv_vle16_v_f16m1(a, vl); -+ va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl); -+ va = __riscv_vfmsac_vv_f16m1(va, va, va, vl); -+ -+ __riscv_vse16_v_f16m1(b, va, vl); -+ -+ return 0; -+} -+ -+/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 1 } } */ -+/* { dg-final { scan-assembler-times {fsrmi\s+[01234]} 1 } } */ -+/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c -new file mode 100644 -index 000000000000..62375c63ee86 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c -@@ -0,0 +1,50 @@ -+/* { dg-do run { target { riscv_zvfh } } } */ -+/* { dg-options "-O3 -fno-strict-aliasing" } */ -+ -+#include "riscv_vector.h" -+#define N 4 -+typedef _Float16 float16_t; -+float16_t a[N]; float16_t b[N]; -+ -+void initialize () { -+ uint16_t tmp_0[N] = {43883, 3213, 238, 275, }; -+ -+ for (int i = 0; i < N; ++i) -+ { -+ union { float16_t f16; uint16_t u16; } converter; -+ converter.u16 = tmp_0[i]; -+ a[i] = converter.f16; -+ } -+ -+ for (int i = 0; i < N; ++i) -+ b[i] = 0; -+} -+ -+void compute () -+{ -+ int avl = N; -+ float16_t* ptr_a = a; float16_t* ptr_b = b; -+ -+ for (size_t vl; avl > 0; avl -= vl) -+ { -+ vl = __riscv_vsetvl_e16m1(avl); -+ vfloat16m1_t va = __riscv_vle16_v_f16m1(ptr_a, vl); -+ va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl); -+ va = __riscv_vfmsac_vv_f16m1(va, va, va, vl); -+ __riscv_vse16_v_f16m1(ptr_b, va, vl); -+ ptr_a += vl; ptr_b += vl; -+ } -+} -+ -+int main () -+{ -+ initialize(); -+ compute(); -+ -+ short *tmp = (short *)b; -+ -+ if (*tmp != -21862) -+ __builtin_abort (); -+ -+ return 0; -+} --- -2.43.5 -