Revert "Backport two RISCV [target] fixes from trunk"
This reverts commit 108ae1dd96f64e2f046264d02e13c39ab650b53b. Signed-off-by: David Abdurachmanov <davidlt@rivosinc.com>
This commit is contained in:
parent
108ae1dd96
commit
fd1ce7e338
14
gcc.spec
14
gcc.spec
@ -143,7 +143,7 @@
|
||||
Summary: Various compilers (C, C++, Objective-C, ...)
|
||||
Name: gcc
|
||||
Version: %{gcc_version}
|
||||
Release: %{gcc_release}.4.0.riscv64%{?dist}
|
||||
Release: %{gcc_release}.4%{?dist}
|
||||
# License notes for some of the less obvious ones:
|
||||
# gcc/doc/cppinternals.texi: Linux-man-pages-copyleft-2-para
|
||||
# isl: MIT, BSD-2-Clause
|
||||
@ -302,10 +302,6 @@ Patch12: gcc15-pr118206.patch
|
||||
Patch13: gcc15-d-deps.patch
|
||||
Patch14: gcc15-pr117231.patch
|
||||
|
||||
# RISCV
|
||||
Patch20: gcc15-pr116256.patch
|
||||
Patch21: gcc15-pr118103.patch
|
||||
|
||||
Patch50: isl-rh2155127.patch
|
||||
|
||||
Patch100: gcc15-fortran-fdec-duplicates.patch
|
||||
@ -923,10 +919,6 @@ so that there cannot be any synchronization problems.
|
||||
%patch -P13 -p0 -b .d-deps~
|
||||
%patch -P14 -p0 -b .pr117231~
|
||||
|
||||
# RISCV
|
||||
%patch -P20 -p0 -b .pr116256~
|
||||
%patch -P21 -p0 -b .pr118103~
|
||||
|
||||
%patch -P50 -p0 -b .rh2155127~
|
||||
touch -r isl-0.24/m4/ax_prog_cxx_for_build.m4 isl-0.24/m4/ax_prog_cc_for_build.m4
|
||||
|
||||
@ -3685,10 +3677,6 @@ end
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Sun Jan 26 2025 David Abdurachmanov <davidlt@rivosinc.com> 15.0.1-0.4.0.riscv64
|
||||
- Pull riscv fixes from trunk
|
||||
- PRs target/116256, target/118103
|
||||
|
||||
* Sat Jan 25 2025 Jakub Jelinek <jakub@redhat.com> 15.0.1-0.4
|
||||
- update from trunk
|
||||
- PRs c/118639, c++/105440, c++/107522, c++/107741, c++/115769, c++/116417,
|
||||
|
@ -1,227 +0,0 @@
|
||||
From e5990a6ce611f522b8f48c2b469983da19d39777 Mon Sep 17 00:00:00 2001
|
||||
From: Jeff Law <jlaw@ventanamicro.com>
|
||||
Date: Sat, 25 Jan 2025 09:42:19 -0700
|
||||
Subject: [PATCH] [RISC-V][PR target/116256] Improve handling of single bit
|
||||
constants
|
||||
|
||||
So under the umbrella of pr116256 (P3 regression) I've been exploring removal
|
||||
of the mvconst_internal pattern. Not surprisingly, that's going to cause all
|
||||
kinds of undesirable fallout. While I can kind of see a path forward for that
|
||||
work, it's going to require some combine work that I don't think we want to
|
||||
tackle in the context of gcc-15.
|
||||
|
||||
Essentially without mvconst_internal we'll have fully exposed constant
|
||||
synthesis prior to combine. Remember that combine has limits on what
|
||||
combinations it will perform based on how many instructions are in the source
|
||||
sequence. If we need 2+ instructions to synthesize the constant, those eat
|
||||
into our budget.
|
||||
|
||||
In a world without mvconst_internal we'd need to either improve combine to
|
||||
handle 5 insns cases (which do show up in the testsuite) or we need to
|
||||
significantly improve how combine handles REG_EQUAL notes. 5 insn combinations
|
||||
sound like insanity to me. So I'd tend to lean towards the latter, though
|
||||
that's going to need some refactoring and diving into note redistribution
|
||||
(ugh!).
|
||||
|
||||
In the mean time we can start limiting mvconst_internal. For the remaining
|
||||
case in pr116256 we have this code in combine:
|
||||
|
||||
> (insn 8 5 10 2 (set (reg:V2048HF 138 [ _5 ])
|
||||
> (vec_duplicate:V2048HF (reg:HF 142 [ x ]))) "j.c":152:11 3712 {*vec_duplicatev2048hf}
|
||||
> (expr_list:REG_DEAD (reg:HF 142 [ x ])
|
||||
> (nil)))
|
||||
> (insn 10 8 11 2 (set (reg:DI 139)
|
||||
> (const_int 2048 [0x800])) "j.c":152:11 275 {*mvconst_internal}
|
||||
> (nil)) (insn 11 10 0 2 (set (mem:V2048HF (reg/f:DI 141 [ in ]) [1 MEM <vector(2048) _Float16> [(_Float16 *)in_7(D)]+0 S4096 A128])
|
||||
> (if_then_else:V2048HF (unspec:V2048BI [
|
||||
> (const_vector:V2048BI [
|
||||
> (const_int 1 [0x1]) repeated x2048
|
||||
> ])
|
||||
> (reg:DI 139)
|
||||
> (const_int 2 [0x2]) repeated x3
|
||||
> (reg:SI 66 vl)
|
||||
> (reg:SI 67 vtype)
|
||||
> ] UNSPEC_VPREDICATE)
|
||||
> (reg:V2048HF 138 [ _5 ])
|
||||
> (unspec:V2048HF [
|
||||
> (reg:DI 0 zero)
|
||||
> ] UNSPEC_VUNDEF))) "j.c":152:11 3843 {*pred_movv2048hf}
|
||||
> (expr_list:REG_DEAD (reg/f:DI 141 [ in ])
|
||||
> (expr_list:REG_DEAD (reg:DI 0 zero)
|
||||
> (expr_list:REG_DEAD (reg:SI 66 vl)
|
||||
> (expr_list:REG_DEAD (reg:SI 67 vtype)
|
||||
> (expr_list:REG_DEAD (reg:V2048HF 138 [ _5 ])
|
||||
> (expr_list:REG_DEAD (reg:DI 139)
|
||||
> (nil))))))))
|
||||
|
||||
Note a couple things. First insn 8 will be split shortly after combine and
|
||||
will need the constant 2048. But that's obviously exposed late. Second (of
|
||||
course) is the mvconst_internal pattern at insn 10. After split1 we'll have:
|
||||
|
||||
> (insn 16 5 17 2 (set (reg:DI 144) (const_int 4096 [0x1000])) "j.c":152:11 -1
|
||||
> (nil))
|
||||
> (insn 17 16 18 2 (set (reg:DI 143)
|
||||
> (plus:DI (reg:DI 144)
|
||||
> (const_int -2048 [0xfffffffffffff800]))) "j.c":152:11 -1
|
||||
> (expr_list:REG_EQUAL (const_int 2048 [0x800])
|
||||
> (nil)))
|
||||
> (insn 18 17 19 2 (set (reg:V2048HF 138 [ _5 ])
|
||||
> (if_then_else:V2048HF (unspec:V2048BI [ (const_vector:V2048BI [
|
||||
> (const_int 1 [0x1]) repeated x2048
|
||||
> ])
|
||||
> (reg:DI 143)
|
||||
> (const_int 2 [0x2]) repeated x3
|
||||
> (reg:SI 66 vl)
|
||||
> (reg:SI 67 vtype)
|
||||
> ] UNSPEC_VPREDICATE)
|
||||
> (vec_duplicate:V2048HF (reg:HF 142 [ x ]))
|
||||
> (unspec:V2048HF [ (reg:DI 0 zero)
|
||||
> ] UNSPEC_VUNDEF))) "j.c":152:11 -1
|
||||
> (nil))
|
||||
> (insn 19 18 20 2 (set (reg:DI 145)
|
||||
> (const_int 4096 [0x1000])) "j.c":152:11 -1
|
||||
> (nil))
|
||||
> (insn 20 19 11 2 (set (reg:DI 139)
|
||||
> (plus:DI (reg:DI 145)
|
||||
> (const_int -2048 [0xfffffffffffff800]))) "j.c":152:11 -1
|
||||
> (expr_list:REG_EQUAL (const_int 2048 [0x800])
|
||||
> (nil)))
|
||||
> (insn 11 20 0 2 (set (mem:V2048HF (reg/f:DI 141 [ in ]) [1 MEM <vector(2048) _Float16> [(_Float16 *)in_7(D)]+0 S4096 A128])
|
||||
> (if_then_else:V2048HF (unspec:V2048BI [
|
||||
> (const_vector:V2048BI [
|
||||
> (const_int 1 [0x1]) repeated x2048
|
||||
> ])
|
||||
> (reg:DI 139) (const_int 2 [0x2]) repeated x3
|
||||
> (reg:SI 66 vl)
|
||||
> (reg:SI 67 vtype)
|
||||
> ] UNSPEC_VPREDICATE)
|
||||
> (reg:V2048HF 138 [ _5 ])
|
||||
> (unspec:V2048HF [ (reg:DI 0 zero)
|
||||
> ] UNSPEC_VUNDEF))) "j.c":152:11 3843 {*pred_movv2048hf}
|
||||
> (expr_list:REG_DEAD (reg/f:DI 141 [ in ])
|
||||
> (expr_list:REG_DEAD (reg:DI 0 zero) (expr_list:REG_DEAD (reg:SI 66 vl)
|
||||
> (expr_list:REG_DEAD (reg:SI 67 vtype)
|
||||
> (expr_list:REG_DEAD (reg:V2048HF 138 [ _5 ])
|
||||
> (expr_list:REG_DEAD (reg:DI 139)
|
||||
> (nil))))))))
|
||||
Note the synthesis of 2048 appears twice. I seriously considered adding a
|
||||
local cprop pass at this point. That could be done with a bit of work. It
|
||||
didn't look too bad -- the biggest problem is cprop isn't designed to run once
|
||||
we've left cfglayout. But we could probably finesse that by not allowing it to
|
||||
change jumps if we've left cfglayout or converting it to do the more complex
|
||||
jump fixups.
|
||||
|
||||
You might ask why the post-reload optimizers don't help since this at least
|
||||
looks like a case where they could. After LRA the RTL looks like:
|
||||
|
||||
> (insn 26 5 25 2 (set (reg:DI 15 a5 [144])
|
||||
> (const_int 4096 [0x1000])) "/home/jlaw/test/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c":152:11 277 {*movdi_64bit} (expr_list:REG_EQUIV (const_int 4096 [0x1000])
|
||||
> (nil)))
|
||||
> (insn 25 26 19 2 (set (reg:DI 15 a5 [143])
|
||||
> (plus:DI (reg:DI 15 a5 [144])
|
||||
> (const_int -2048 [0xfffffffffffff800]))) "/home/jlaw/test/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c":152:11 5 {adddi3}
|
||||
> (expr_list:REG_EQUIV (const_int 2048 [0x800])
|
||||
> (nil)))
|
||||
> (insn 19 25 20 2 (set (reg:V2048QI 100 v4 [orig:138 _11 ] [138])
|
||||
> (if_then_else:V2048QI (unspec:V2048BI [
|
||||
> (const_vector:V2048BI [
|
||||
> (const_int 1 [0x1]) repeated x2048
|
||||
> ])
|
||||
> (reg:DI 15 a5 [143])
|
||||
> (const_int 2 [0x2]) repeated x3
|
||||
> (reg:SI 66 vl)
|
||||
> (reg:SI 67 vtype)
|
||||
> ] UNSPEC_VPREDICATE)
|
||||
> (vec_duplicate:V2048QI (reg:QI 12 a2 [145]))
|
||||
> (unspec:V2048QI [ (reg:DI 0 zero)
|
||||
> ] UNSPEC_VUNDEF))) "/home/jlaw/test/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c":152:11 4172 {*pred_broadcastv2048qi}
|
||||
> (nil)) (insn 20 19 21 2 (set (reg:DI 15 a5 [146])
|
||||
> (const_int 4096 [0x1000])) "/home/jlaw/test/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c":152:11 277 {*movdi_64bit} (expr_list:REG_EQUIV (const_int 4096 [0x1000])
|
||||
> (nil)))
|
||||
> (insn 21 20 11 2 (set (reg:DI 15 a5 [139])
|
||||
> (plus:DI (reg:DI 15 a5 [146])
|
||||
> (const_int -2048 [0xfffffffffffff800]))) "/home/jlaw/test/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c":152:11 5 {adddi3}
|
||||
> (expr_list:REG_EQUIV (const_int 2048 [0x800])
|
||||
> (nil)))
|
||||
|
||||
Note the re-use of a5 for the constant synthesis steps. That's going to spoil
|
||||
any chance of reload_cse saving us. That re-use also gets in the way of vsetvl
|
||||
elimination and we ultimately get this code:
|
||||
|
||||
> foo10:
|
||||
> li a5,4096
|
||||
> addi a5,a5,-2048
|
||||
> vsetvli zero,a5,e16,m8,ta,ma
|
||||
> vfmv.v.f v8,fa0
|
||||
> li a5,4096
|
||||
> addi a5,a5,-2048
|
||||
> vsetvli zero,a5,e16,m8,ta,ma
|
||||
> vse16.v v8,0(a0)
|
||||
> ret
|
||||
The regression is we have the obviously redundant vsetvl. The additional copy
|
||||
of the synthesis is undesirable as well.
|
||||
|
||||
If we filter out single bit constants from mvconst_internal we trivially fix
|
||||
that regression. The only fallout is a class of saturation tests which want to
|
||||
test against 0x80000000. Under the hood this is a minor codegen issue
|
||||
interacting badly with combine's deliberate rejection of simplification of
|
||||
extensions of constants. Rather than constructing the SImode constant, then
|
||||
zero extending the result we can just generate the constant we actually want
|
||||
directly in DImode.
|
||||
|
||||
The net is we fix the regression, don't introduce any obvious new regressions
|
||||
and slightly reduce our dependence on mvconst_internal. All good in my book.
|
||||
Obviously I'll wait for pre-commit CI to render a verdict.
|
||||
|
||||
PR target/116256
|
||||
gcc/
|
||||
* config/riscv/riscv.md (mvconst_internal): Reject single bit
|
||||
constants.
|
||||
* config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Improve
|
||||
handling constants.
|
||||
---
|
||||
gcc/config/riscv/riscv.cc | 12 +++++++++---
|
||||
gcc/config/riscv/riscv.md | 3 ++-
|
||||
2 files changed, 11 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
|
||||
index 5a3a05041773..4652454b8fec 100644
|
||||
--- a/gcc/config/riscv/riscv.cc
|
||||
+++ b/gcc/config/riscv/riscv.cc
|
||||
@@ -12684,10 +12684,16 @@ riscv_gen_zero_extend_rtx (rtx x, machine_mode mode)
|
||||
emit_move_insn (xmode_reg, x);
|
||||
else
|
||||
{
|
||||
- rtx reg_x = gen_reg_rtx (mode);
|
||||
+ /* Combine deliberately does not simplify extensions of constants
|
||||
+ (long story). So try to generate the zero extended constant
|
||||
+ efficiently.
|
||||
|
||||
- emit_move_insn (reg_x, x);
|
||||
- riscv_emit_unary (ZERO_EXTEND, xmode_reg, reg_x);
|
||||
+ First extract the constant and mask off all the bits not in MODE. */
|
||||
+ HOST_WIDE_INT val = INTVAL (x);
|
||||
+ val &= GET_MODE_MASK (mode);
|
||||
+
|
||||
+ /* X may need synthesis, so do not blindly copy it. */
|
||||
+ xmode_reg = force_reg (Xmode, gen_int_mode (val, Xmode));
|
||||
}
|
||||
|
||||
return xmode_reg;
|
||||
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
|
||||
index e4123c912dcb..09053df1eb9b 100644
|
||||
--- a/gcc/config/riscv/riscv.md
|
||||
+++ b/gcc/config/riscv/riscv.md
|
||||
@@ -2470,7 +2470,8 @@
|
||||
(match_operand:GPR 1 "splittable_const_int_operand" "i"))]
|
||||
"!ira_in_progress
|
||||
&& !(p2m1_shift_operand (operands[1], <MODE>mode)
|
||||
- || high_mask_shift_operand (operands[1], <MODE>mode))"
|
||||
+ || high_mask_shift_operand (operands[1], <MODE>mode)
|
||||
+ || exact_log2 (INTVAL (operands[1])) >= 0)"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(const_int 0)]
|
||||
--
|
||||
2.43.5
|
||||
|
@ -1,217 +0,0 @@
|
||||
From 55d288d4ff5360c572f2a017ba9385840ac5134e Mon Sep 17 00:00:00 2001
|
||||
From: Pan Li <pan2.li@intel.com>
|
||||
Date: Sat, 25 Jan 2025 15:45:10 +0800
|
||||
Subject: [PATCH] RISC-V: Make FRM as global register [PR118103]
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=utf8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
After we enabled the labe-combine pass after the mode-switching pass, it
|
||||
will try to combine below insn patterns into op. Aka:
|
||||
|
||||
(insn 40 5 41 2 (set (reg:SI 11 a1 [151])
|
||||
(reg:SI 69 frm)) "pr118103-simple.c":67:15 2712 {frrmsi}
|
||||
(nil))
|
||||
(insn 41 40 7 2 (set (reg:SI 69 frm)
|
||||
(const_int 2 [0x2])) "pr118103-simple.c":69:8 2710 {fsrmsi_restore}
|
||||
(nil))
|
||||
(insn 42 10 11 2 (set (reg:SI 69 frm)
|
||||
(reg:SI 11 a1 [151])) "pr118103-simple.c":70:8 2710 {fsrmsi_restore}
|
||||
(nil))
|
||||
|
||||
trying to combine definition of r11 in:
|
||||
40: a1:SI=frm:SI
|
||||
into:
|
||||
42: frm:SI=a1:SI
|
||||
instruction becomes a no-op:
|
||||
(set (reg:SI 69 frm)
|
||||
(reg:SI 69 frm))
|
||||
original cost = 4 + 4 (weighted: 8.000000), replacement cost =
|
||||
2147483647; keeping replacement
|
||||
rescanning insn with uid = 42.
|
||||
updating insn 42 in-place
|
||||
verify found no changes in insn with uid = 42.
|
||||
deleting insn 40
|
||||
|
||||
For example we have code as blow:
|
||||
9 â int test_exampe () {
|
||||
10 â test ();
|
||||
11 â
|
||||
12 â size_t vl = 4;
|
||||
13 â vfloat16m1_t va = __riscv_vle16_v_f16m1(a, vl);
|
||||
14 â va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl);
|
||||
15 â va = __riscv_vfmsac_vv_f16m1(va, va, va, vl);
|
||||
16 â
|
||||
17 â __riscv_vse16_v_f16m1(b, va, vl);
|
||||
18 â
|
||||
19 â return 0;
|
||||
20 â }
|
||||
|
||||
it will be compiled to:
|
||||
53 â main:
|
||||
54 â addi sp,sp,-16
|
||||
55 â sd ra,8(sp)
|
||||
56 â call initialize
|
||||
57 â lui a6,%hi(b)
|
||||
58 â lui a2,%hi(a)
|
||||
59 â addi a3,a6,%lo(b)
|
||||
60 â addi a2,a2,%lo(a)
|
||||
61 â li a4,4
|
||||
62 â .L8:
|
||||
63 â fsrmi 2
|
||||
64 â vsetvli a5,a4,e16,m1,ta,ma
|
||||
65 â vle16.v v1,0(a2)
|
||||
66 â slli a1,a5,1
|
||||
67 â subw a4,a4,a5
|
||||
68 â add a2,a2,a1
|
||||
69 â vfnmadd.vv v1,v1,v1
|
||||
>> The fsrm a0 insn is deleted by late-combine <<
|
||||
70 â vfmsub.vv v1,v1,v1
|
||||
71 â vse16.v v1,0(a3)
|
||||
72 â add a3,a3,a1
|
||||
73 â bgt a4,zero,.L8
|
||||
74 â lh a4,%lo(b)(a6)
|
||||
75 â li a5,-20480
|
||||
76 â addi a5,a5,-1382
|
||||
77 â bne a4,a5,.L14
|
||||
78 â ld ra,8(sp)
|
||||
79 â li a0,0
|
||||
80 â addi sp,sp,16
|
||||
81 â jr ra
|
||||
|
||||
This patch would like to add the FRM register to the global_regs as it
|
||||
is a cooperatively-managed global register. And then the fsrm insn will
|
||||
not be eliminated by late-combine. The related spec17 cam4 failure may
|
||||
also caused by this issue too.
|
||||
|
||||
The below test suites are passed for this patch.
|
||||
* The rv64gcv fully regression test.
|
||||
|
||||
PR target/118103
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/riscv/riscv.cc (riscv_conditional_register_usage): Add
|
||||
the FRM as the global_regs.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/riscv/rvv/base/pr118103-1.c: New test.
|
||||
* gcc.target/riscv/rvv/base/pr118103-run-1.c: New test.
|
||||
|
||||
Signed-off-by: Pan Li <pan2.li@intel.com>
|
||||
---
|
||||
gcc/config/riscv/riscv.cc | 4 +-
|
||||
.../gcc.target/riscv/rvv/base/pr118103-1.c | 27 ++++++++++
|
||||
.../riscv/rvv/base/pr118103-run-1.c | 50 +++++++++++++++++++
|
||||
3 files changed, 80 insertions(+), 1 deletion(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c
|
||||
|
||||
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
|
||||
index 4652454b8fec..dd50fe4eddfb 100644
|
||||
--- a/gcc/config/riscv/riscv.cc
|
||||
+++ b/gcc/config/riscv/riscv.cc
|
||||
@@ -10885,7 +10885,9 @@ riscv_conditional_register_usage (void)
|
||||
call_used_regs[r] = 1;
|
||||
}
|
||||
|
||||
- if (!TARGET_HARD_FLOAT)
|
||||
+ if (TARGET_HARD_FLOAT)
|
||||
+ global_regs[FRM_REGNUM] = 1;
|
||||
+ else
|
||||
{
|
||||
for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||||
fixed_regs[regno] = call_used_regs[regno] = 1;
|
||||
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c
|
||||
new file mode 100644
|
||||
index 000000000000..1afa5d3afb50
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c
|
||||
@@ -0,0 +1,27 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O3 -march=rv64gcv_zvfh -mabi=lp64d" } */
|
||||
+
|
||||
+#include "riscv_vector.h"
|
||||
+
|
||||
+#define N 4
|
||||
+typedef _Float16 float16_t;
|
||||
+float16_t a[N]; float16_t b[N];
|
||||
+
|
||||
+extern void test ();
|
||||
+
|
||||
+int test_exampe () {
|
||||
+ test ();
|
||||
+
|
||||
+ size_t vl = N;
|
||||
+ vfloat16m1_t va = __riscv_vle16_v_f16m1(a, vl);
|
||||
+ va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl);
|
||||
+ va = __riscv_vfmsac_vv_f16m1(va, va, va, vl);
|
||||
+
|
||||
+ __riscv_vse16_v_f16m1(b, va, vl);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 1 } } */
|
||||
+/* { dg-final { scan-assembler-times {fsrmi\s+[01234]} 1 } } */
|
||||
+/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 1 } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c
|
||||
new file mode 100644
|
||||
index 000000000000..62375c63ee86
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c
|
||||
@@ -0,0 +1,50 @@
|
||||
+/* { dg-do run { target { riscv_zvfh } } } */
|
||||
+/* { dg-options "-O3 -fno-strict-aliasing" } */
|
||||
+
|
||||
+#include "riscv_vector.h"
|
||||
+#define N 4
|
||||
+typedef _Float16 float16_t;
|
||||
+float16_t a[N]; float16_t b[N];
|
||||
+
|
||||
+void initialize () {
|
||||
+ uint16_t tmp_0[N] = {43883, 3213, 238, 275, };
|
||||
+
|
||||
+ for (int i = 0; i < N; ++i)
|
||||
+ {
|
||||
+ union { float16_t f16; uint16_t u16; } converter;
|
||||
+ converter.u16 = tmp_0[i];
|
||||
+ a[i] = converter.f16;
|
||||
+ }
|
||||
+
|
||||
+ for (int i = 0; i < N; ++i)
|
||||
+ b[i] = 0;
|
||||
+}
|
||||
+
|
||||
+void compute ()
|
||||
+{
|
||||
+ int avl = N;
|
||||
+ float16_t* ptr_a = a; float16_t* ptr_b = b;
|
||||
+
|
||||
+ for (size_t vl; avl > 0; avl -= vl)
|
||||
+ {
|
||||
+ vl = __riscv_vsetvl_e16m1(avl);
|
||||
+ vfloat16m1_t va = __riscv_vle16_v_f16m1(ptr_a, vl);
|
||||
+ va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl);
|
||||
+ va = __riscv_vfmsac_vv_f16m1(va, va, va, vl);
|
||||
+ __riscv_vse16_v_f16m1(ptr_b, va, vl);
|
||||
+ ptr_a += vl; ptr_b += vl;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int main ()
|
||||
+{
|
||||
+ initialize();
|
||||
+ compute();
|
||||
+
|
||||
+ short *tmp = (short *)b;
|
||||
+
|
||||
+ if (*tmp != -21862)
|
||||
+ __builtin_abort ();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
--
|
||||
2.43.5
|
||||
|
Loading…
x
Reference in New Issue
Block a user