Revert "Backport two RISCV [target] fixes from trunk"

This reverts commit 108ae1dd96f64e2f046264d02e13c39ab650b53b. Signed-off-by: David Abdurachmanov <davidlt@rivosinc.com>
2025-02-02 06:37:01 +02:00 · 2025-02-02 06:37:01 +02:00 · fd1ce7e338
commit fd1ce7e338
parent 108ae1dd96
3 changed files with 1 additions and 457 deletions
--- a/gcc.spec
+++ b/gcc.spec
@ -143,7 +143,7 @@
 Summary: Various compilers (C, C++, Objective-C, ...)
 Name: gcc
 Version: %{gcc_version}
-Release: %{gcc_release}.4.0.riscv64%{?dist}
+Release: %{gcc_release}.4%{?dist}
 # License notes for some of the less obvious ones:
 #   gcc/doc/cppinternals.texi: Linux-man-pages-copyleft-2-para
 #   isl: MIT, BSD-2-Clause
@ -302,10 +302,6 @@ Patch12: gcc15-pr118206.patch
 Patch13: gcc15-d-deps.patch
 Patch14: gcc15-pr117231.patch

-# RISCV
-Patch20: gcc15-pr116256.patch
-Patch21: gcc15-pr118103.patch
-
 Patch50: isl-rh2155127.patch

 Patch100: gcc15-fortran-fdec-duplicates.patch
@ -923,10 +919,6 @@ so that there cannot be any synchronization problems.
 %patch -P13 -p0 -b .d-deps~
 %patch -P14 -p0 -b .pr117231~

-# RISCV
-%patch -P20 -p0 -b .pr116256~
-%patch -P21 -p0 -b .pr118103~
-
 %patch -P50 -p0 -b .rh2155127~
 touch -r isl-0.24/m4/ax_prog_cxx_for_build.m4 isl-0.24/m4/ax_prog_cc_for_build.m4

@ -3685,10 +3677,6 @@ end
 %endif

 %changelog
-* Sun Jan 26 2025 David Abdurachmanov <davidlt@rivosinc.com> 15.0.1-0.4.0.riscv64
- Pull riscv fixes from trunk
-  - PRs target/116256, target/118103
-
 * Sat Jan 25 2025 Jakub Jelinek <jakub@redhat.com> 15.0.1-0.4
 - update from trunk
  - PRs c/118639, c++/105440, c++/107522, c++/107741, c++/115769, c++/116417,
--- a/gcc15-pr116256.patch
+++ b/gcc15-pr116256.patch
@ -1,227 +0,0 @@
-From e5990a6ce611f522b8f48c2b469983da19d39777 Mon Sep 17 00:00:00 2001
-From: Jeff Law <jlaw@ventanamicro.com>
-Date: Sat, 25 Jan 2025 09:42:19 -0700
-Subject: [PATCH] [RISC-V][PR target/116256] Improve handling of single bit
- constants
-
-So under the umbrella of pr116256 (P3 regression) I've been exploring removal
-of the mvconst_internal pattern.   Not surprisingly, that's going to cause all
-kinds of undesirable fallout.  While I can kind of see a path forward for that
-work, it's going to require some combine work that I don't think we want to
-tackle in the context of gcc-15.
-
-Essentially without mvconst_internal we'll have fully exposed constant
-synthesis prior to combine.  Remember that combine has limits on what
-combinations it will perform based on how many instructions are in the source
-sequence.  If we need 2+ instructions to synthesize the constant, those eat
-into our budget.
-
-In a world without mvconst_internal we'd need to either improve combine to
-handle 5 insns cases (which do show up in the testsuite) or we need to
-significantly improve how combine handles REG_EQUAL notes.  5 insn combinations
-sound like insanity to me.  So I'd tend to lean towards the latter, though
-that's going to need some refactoring and diving into note redistribution
-(ugh!).
-
-In the mean time we can start limiting mvconst_internal.  For the remaining
-case in pr116256 we have this code in combine:
-
-> (insn 8 5 10 2 (set (reg:V2048HF 138 [ _5 ])
->         (vec_duplicate:V2048HF (reg:HF 142 [ x ]))) "j.c":152:11 3712 {*vec_duplicatev2048hf}
->      (expr_list:REG_DEAD (reg:HF 142 [ x ])
->         (nil)))
-> (insn 10 8 11 2 (set (reg:DI 139)
->         (const_int 2048 [0x800])) "j.c":152:11 275 {*mvconst_internal}
->      (nil))      (insn 11 10 0 2 (set (mem:V2048HF (reg/f:DI 141 [ in ]) [1 MEM <vector(2048) _Float16> [(_Float16 *)in_7(D)]+0 S4096 A128])
->         (if_then_else:V2048HF (unspec:V2048BI [
->                     (const_vector:V2048BI [
->                             (const_int 1 [0x1]) repeated x2048
->                         ])
->                     (reg:DI 139)
->                     (const_int 2 [0x2]) repeated x3
->                     (reg:SI 66 vl)
->                     (reg:SI 67 vtype)
->                 ] UNSPEC_VPREDICATE)
->             (reg:V2048HF 138 [ _5 ])
->             (unspec:V2048HF [
->                     (reg:DI 0 zero)
->                 ] UNSPEC_VUNDEF))) "j.c":152:11 3843 {*pred_movv2048hf}
->      (expr_list:REG_DEAD (reg/f:DI 141 [ in ])
->         (expr_list:REG_DEAD (reg:DI 0 zero)
->             (expr_list:REG_DEAD (reg:SI 66 vl)
->                 (expr_list:REG_DEAD (reg:SI 67 vtype)
->                     (expr_list:REG_DEAD (reg:V2048HF 138 [ _5 ])
->                         (expr_list:REG_DEAD (reg:DI 139)
->                             (nil))))))))
-
-Note a couple things.  First insn 8 will be split shortly after combine and
-will need the constant 2048.  But that's obviously exposed  late. Second (of
-course) is the mvconst_internal pattern at insn 10.  After split1 we'll have:
-
-> (insn 16 5 17 2 (set (reg:DI 144)         (const_int 4096 [0x1000])) "j.c":152:11 -1
->      (nil))
-> (insn 17 16 18 2 (set (reg:DI 143)
->         (plus:DI (reg:DI 144)
->             (const_int -2048 [0xfffffffffffff800]))) "j.c":152:11 -1
->      (expr_list:REG_EQUAL (const_int 2048 [0x800])
->         (nil)))
-> (insn 18 17 19 2 (set (reg:V2048HF 138 [ _5 ])
->         (if_then_else:V2048HF (unspec:V2048BI [                     (const_vector:V2048BI [
->                             (const_int 1 [0x1]) repeated x2048
->                         ])
->                     (reg:DI 143)
->                     (const_int 2 [0x2]) repeated x3
->                     (reg:SI 66 vl)
->                     (reg:SI 67 vtype)
->                 ] UNSPEC_VPREDICATE)
->             (vec_duplicate:V2048HF (reg:HF 142 [ x ]))
->             (unspec:V2048HF [                     (reg:DI 0 zero)
->                 ] UNSPEC_VUNDEF))) "j.c":152:11 -1
->      (nil))
-> (insn 19 18 20 2 (set (reg:DI 145)
->         (const_int 4096 [0x1000])) "j.c":152:11 -1
->      (nil))
-> (insn 20 19 11 2 (set (reg:DI 139)
->         (plus:DI (reg:DI 145)
->             (const_int -2048 [0xfffffffffffff800]))) "j.c":152:11 -1
->      (expr_list:REG_EQUAL (const_int 2048 [0x800])
->         (nil)))
-> (insn 11 20 0 2 (set (mem:V2048HF (reg/f:DI 141 [ in ]) [1 MEM <vector(2048) _Float16> [(_Float16 *)in_7(D)]+0 S4096 A128])
->         (if_then_else:V2048HF (unspec:V2048BI [
->                     (const_vector:V2048BI [
->                             (const_int 1 [0x1]) repeated x2048
->                         ])
->                     (reg:DI 139)                     (const_int 2 [0x2]) repeated x3
->                     (reg:SI 66 vl)
->                     (reg:SI 67 vtype)
->                 ] UNSPEC_VPREDICATE)
->             (reg:V2048HF 138 [ _5 ])
->             (unspec:V2048HF [                     (reg:DI 0 zero)
->                 ] UNSPEC_VUNDEF))) "j.c":152:11 3843 {*pred_movv2048hf}
->      (expr_list:REG_DEAD (reg/f:DI 141 [ in ])
->         (expr_list:REG_DEAD (reg:DI 0 zero)             (expr_list:REG_DEAD (reg:SI 66 vl)
->                 (expr_list:REG_DEAD (reg:SI 67 vtype)
->                     (expr_list:REG_DEAD (reg:V2048HF 138 [ _5 ])
->                         (expr_list:REG_DEAD (reg:DI 139)
->                             (nil))))))))
-Note the synthesis of 2048 appears twice.  I seriously considered adding a
-local cprop pass at this point.  That could be done with a bit of work.  It
-didn't look too bad -- the biggest problem is cprop isn't designed to run once
-we've left cfglayout.  But we could probably finesse that by not allowing it to
-change jumps if we've left cfglayout or converting it to do the more complex
-jump fixups.
-
-You might ask why the post-reload optimizers don't help since this at least
-looks like a case where they could.  After LRA the RTL looks like:
-
-> (insn 26 5 25 2 (set (reg:DI 15 a5 [144])
->         (const_int 4096 [0x1000])) "/home/jlaw/test/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c":152:11 277 {*movdi_64bit}      (expr_list:REG_EQUIV (const_int 4096 [0x1000])
->         (nil)))
-> (insn 25 26 19 2 (set (reg:DI 15 a5 [143])
->         (plus:DI (reg:DI 15 a5 [144])
->             (const_int -2048 [0xfffffffffffff800]))) "/home/jlaw/test/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c":152:11 5 {adddi3}
->      (expr_list:REG_EQUIV (const_int 2048 [0x800])
->         (nil)))
-> (insn 19 25 20 2 (set (reg:V2048QI 100 v4 [orig:138 _11 ] [138])
->         (if_then_else:V2048QI (unspec:V2048BI [
->                     (const_vector:V2048BI [
->                             (const_int 1 [0x1]) repeated x2048
->                         ])
->                     (reg:DI 15 a5 [143])
->                     (const_int 2 [0x2]) repeated x3
->                     (reg:SI 66 vl)
->                     (reg:SI 67 vtype)
->                 ] UNSPEC_VPREDICATE)
->             (vec_duplicate:V2048QI (reg:QI 12 a2 [145]))
->             (unspec:V2048QI [                     (reg:DI 0 zero)
->                 ] UNSPEC_VUNDEF))) "/home/jlaw/test/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c":152:11 4172 {*pred_broadcastv2048qi}
->      (nil)) (insn 20 19 21 2 (set (reg:DI 15 a5 [146])
->         (const_int 4096 [0x1000])) "/home/jlaw/test/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c":152:11 277 {*movdi_64bit}       (expr_list:REG_EQUIV (const_int 4096 [0x1000])
->         (nil)))
-> (insn 21 20 11 2 (set (reg:DI 15 a5 [139])
->         (plus:DI (reg:DI 15 a5 [146])
->             (const_int -2048 [0xfffffffffffff800]))) "/home/jlaw/test/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c":152:11 5 {adddi3}
->      (expr_list:REG_EQUIV (const_int 2048 [0x800])
->         (nil)))
-
-Note the re-use of a5 for the constant synthesis steps.  That's going to spoil
-any chance of reload_cse saving us.  That re-use also gets in the way of vsetvl
-elimination and we ultimately get this code:
-
-> foo10:
->         li      a5,4096
->         addi    a5,a5,-2048
->         vsetvli zero,a5,e16,m8,ta,ma
->         vfmv.v.f        v8,fa0
->         li      a5,4096
->         addi    a5,a5,-2048
->         vsetvli zero,a5,e16,m8,ta,ma
->         vse16.v v8,0(a0)
->         ret
-The regression is we have the obviously redundant vsetvl.  The additional copy
-of the synthesis is undesirable as well.
-
-If we filter out single bit constants from mvconst_internal we trivially fix
-that regression.  The only fallout is a class of saturation tests which want to
-test against 0x80000000.   Under the hood this is a minor codegen issue
-interacting badly with combine's deliberate rejection of simplification of
-extensions of constants.  Rather than constructing the SImode constant, then
-zero extending the result we can just generate the constant we actually want
-directly in DImode.
-
-The net is we fix the regression, don't introduce any obvious new regressions
-and slightly reduce our dependence on mvconst_internal.  All good in my book.
-Obviously I'll wait for pre-commit CI to render a verdict.
-
-	PR target/116256
-gcc/
-	* config/riscv/riscv.md (mvconst_internal): Reject single bit
-	constants.
-	* config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Improve
-	handling constants.
---
- gcc/config/riscv/riscv.cc | 12 +++++++++---
- gcc/config/riscv/riscv.md |  3 ++-
- 2 files changed, 11 insertions(+), 4 deletions(-)
-
-diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
-index 5a3a05041773..4652454b8fec 100644
--- a/gcc/config/riscv/riscv.cc
-+++ b/gcc/config/riscv/riscv.cc
-@@ -12684,10 +12684,16 @@ riscv_gen_zero_extend_rtx (rtx x, machine_mode mode)
-     emit_move_insn (xmode_reg, x);
-   else
-     {
-      rtx reg_x = gen_reg_rtx (mode);
-+      /* Combine deliberately does not simplify extensions of constants
-+	 (long story).  So try to generate the zero extended constant
-+	 efficiently.
- 
-      emit_move_insn (reg_x, x);
-      riscv_emit_unary (ZERO_EXTEND, xmode_reg, reg_x);
-+	 First extract the constant and mask off all the bits not in MODE.  */
-+      HOST_WIDE_INT val = INTVAL (x);
-+      val &= GET_MODE_MASK (mode);
-+
-+      /* X may need synthesis, so do not blindly copy it.  */
-+      xmode_reg = force_reg (Xmode, gen_int_mode (val, Xmode));
-     }
- 
-   return xmode_reg;
-diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
-index e4123c912dcb..09053df1eb9b 100644
--- a/gcc/config/riscv/riscv.md
-+++ b/gcc/config/riscv/riscv.md
-@@ -2470,7 +2470,8 @@
-         (match_operand:GPR 1 "splittable_const_int_operand" "i"))]
-   "!ira_in_progress
-    && !(p2m1_shift_operand (operands[1], <MODE>mode)
-        || high_mask_shift_operand (operands[1], <MODE>mode))"
-+	|| high_mask_shift_operand (operands[1], <MODE>mode)
-+	|| exact_log2 (INTVAL (operands[1])) >= 0)"
-   "#"
-   "&& 1"
-   [(const_int 0)]
-- 
-2.43.5
-
--- a/gcc15-pr118103.patch
+++ b/gcc15-pr118103.patch
@ -1,217 +0,0 @@
-From 55d288d4ff5360c572f2a017ba9385840ac5134e Mon Sep 17 00:00:00 2001
-From: Pan Li <pan2.li@intel.com>
-Date: Sat, 25 Jan 2025 15:45:10 +0800
-Subject: [PATCH] RISC-V: Make FRM as global register [PR118103]
-MIME-Version: 1.0
-Content-Type: text/plain; charset=utf8
-Content-Transfer-Encoding: 8bit
-
-After we enabled the labe-combine pass after the mode-switching pass, it
-will try to combine below insn patterns into op.  Aka:
-
-(insn 40 5 41 2 (set (reg:SI 11 a1 [151])
-  (reg:SI 69 frm)) "pr118103-simple.c":67:15 2712 {frrmsi}
-  (nil))
-(insn 41 40 7 2 (set (reg:SI 69 frm)
-  (const_int 2 [0x2])) "pr118103-simple.c":69:8 2710 {fsrmsi_restore}
-  (nil))
-(insn 42 10 11 2 (set (reg:SI 69 frm)
-  (reg:SI 11 a1 [151])) "pr118103-simple.c":70:8 2710 {fsrmsi_restore}
-    (nil))
-
-trying to combine definition of r11 in:
-40: a1:SI=frm:SI
-    into:
-42: frm:SI=a1:SI
-    instruction becomes a no-op:
-(set (reg:SI 69 frm)
-(reg:SI 69 frm))
-original cost = 4 + 4 (weighted: 8.000000), replacement cost =
-2147483647; keeping replacement
-rescanning insn with uid = 42.
-updating insn 42 in-place
-verify found no changes in insn with uid = 42.
-deleting insn 40
-
-For example we have code as blow:
-   9   â int test_exampe () {
-  10   â   test ();
-  11   â
-  12   â   size_t vl = 4;
-  13   â   vfloat16m1_t va = __riscv_vle16_v_f16m1(a, vl);
-  14   â   va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl);
-  15   â   va = __riscv_vfmsac_vv_f16m1(va, va, va, vl);
-  16   â
-  17   â   __riscv_vse16_v_f16m1(b, va, vl);
-  18   â
-  19   â   return 0;
-  20   â }
-
-it will be compiled to:
-  53   â main:
-  54   â     addi    sp,sp,-16
-  55   â     sd  ra,8(sp)
-  56   â     call    initialize
-  57   â     lui a6,%hi(b)
-  58   â     lui a2,%hi(a)
-  59   â     addi    a3,a6,%lo(b)
-  60   â     addi    a2,a2,%lo(a)
-  61   â     li  a4,4
-  62   â .L8:
-  63   â     fsrmi   2
-  64   â     vsetvli a5,a4,e16,m1,ta,ma
-  65   â     vle16.v v1,0(a2)
-  66   â     slli    a1,a5,1
-  67   â     subw    a4,a4,a5
-  68   â     add a2,a2,a1
-  69   â     vfnmadd.vv  v1,v1,v1
-  >> The fsrm a0 insn is deleted by late-combine <<
-  70   â     vfmsub.vv   v1,v1,v1
-  71   â     vse16.v v1,0(a3)
-  72   â     add a3,a3,a1
-  73   â     bgt a4,zero,.L8
-  74   â     lh  a4,%lo(b)(a6)
-  75   â     li  a5,-20480
-  76   â     addi    a5,a5,-1382
-  77   â     bne a4,a5,.L14
-  78   â     ld  ra,8(sp)
-  79   â     li  a0,0
-  80   â     addi    sp,sp,16
-  81   â     jr  ra
-
-This patch would like to add the FRM register to the global_regs as it
-is a cooperatively-managed global register.  And then the fsrm insn will
-not be eliminated by late-combine.  The related spec17 cam4 failure may
-also caused by this issue too.
-
-The below test suites are passed for this patch.
-* The rv64gcv fully regression test.
-
-	PR target/118103
-
-gcc/ChangeLog:
-
-	* config/riscv/riscv.cc (riscv_conditional_register_usage): Add
-	the FRM as the global_regs.
-
-gcc/testsuite/ChangeLog:
-
-	* gcc.target/riscv/rvv/base/pr118103-1.c: New test.
-	* gcc.target/riscv/rvv/base/pr118103-run-1.c: New test.
-
-Signed-off-by: Pan Li <pan2.li@intel.com>
---
- gcc/config/riscv/riscv.cc                     |  4 +-
- .../gcc.target/riscv/rvv/base/pr118103-1.c    | 27 ++++++++++
- .../riscv/rvv/base/pr118103-run-1.c           | 50 +++++++++++++++++++
- 3 files changed, 80 insertions(+), 1 deletion(-)
- create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c
- create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c
-
-diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
-index 4652454b8fec..dd50fe4eddfb 100644
--- a/gcc/config/riscv/riscv.cc
-+++ b/gcc/config/riscv/riscv.cc
-@@ -10885,7 +10885,9 @@ riscv_conditional_register_usage (void)
- 	call_used_regs[r] = 1;
-     }
- 
-  if (!TARGET_HARD_FLOAT)
-+  if (TARGET_HARD_FLOAT)
-+    global_regs[FRM_REGNUM] = 1;
-+  else
-     {
-       for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
- 	fixed_regs[regno] = call_used_regs[regno] = 1;
-diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c
-new file mode 100644
-index 000000000000..1afa5d3afb50
--- /dev/null
-+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c
-@@ -0,0 +1,27 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O3 -march=rv64gcv_zvfh -mabi=lp64d" } */
-+
-+#include "riscv_vector.h"
-+
-+#define N 4
-+typedef _Float16 float16_t;
-+float16_t a[N]; float16_t b[N];
-+
-+extern void test ();
-+
-+int test_exampe () {
-+  test ();
-+
-+  size_t vl = N;
-+  vfloat16m1_t va = __riscv_vle16_v_f16m1(a, vl);
-+  va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl);
-+  va = __riscv_vfmsac_vv_f16m1(va, va, va, vl);
-+
-+  __riscv_vse16_v_f16m1(b, va, vl);
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 1 } } */
-+/* { dg-final { scan-assembler-times {fsrmi\s+[01234]} 1 } } */
-+/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c
-new file mode 100644
-index 000000000000..62375c63ee86
--- /dev/null
-+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c
-@@ -0,0 +1,50 @@
-+/* { dg-do run { target { riscv_zvfh } } } */
-+/* { dg-options "-O3 -fno-strict-aliasing" } */
-+
-+#include "riscv_vector.h"
-+#define N 4
-+typedef _Float16 float16_t;
-+float16_t a[N]; float16_t b[N];
-+
-+void initialize () {
-+  uint16_t tmp_0[N] = {43883, 3213, 238, 275, };
-+
-+  for (int i = 0; i < N; ++i)
-+    {
-+      union { float16_t f16; uint16_t u16; } converter;
-+      converter.u16 = tmp_0[i];
-+      a[i] = converter.f16; 
-+    }
-+
-+  for (int i = 0; i < N; ++i)
-+    b[i] = 0;
-+}
-+
-+void compute ()
-+{
-+  int avl = N;
-+  float16_t* ptr_a = a; float16_t* ptr_b = b;
-+
-+  for (size_t vl; avl > 0; avl -= vl)
-+    {
-+      vl = __riscv_vsetvl_e16m1(avl);
-+      vfloat16m1_t va = __riscv_vle16_v_f16m1(ptr_a, vl);
-+      va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl);
-+      va = __riscv_vfmsac_vv_f16m1(va, va, va, vl);
-+      __riscv_vse16_v_f16m1(ptr_b, va, vl);
-+      ptr_a += vl; ptr_b += vl;
-+    }
-+}
-+
-+int main ()
-+{
-+  initialize();
-+  compute();
-+
-+  short *tmp = (short *)b;
-+
-+  if (*tmp != -21862)
-+    __builtin_abort ();
-+
-+  return 0;
-+}
-- 
-2.43.5
-