From c741e6965dd8974ed96ce829d06f9af488d192dc Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 13 Nov 2023 18:00:18 +0100 Subject: [PATCH] gcc-13.2.1-5 --- .gitignore | 1 + gcc.spec | 26 ++++++- gcc13-pr110792.patch | 163 +++++++++++++++++++++++++++++++++++++++++++ sources | 2 +- 4 files changed, 188 insertions(+), 4 deletions(-) create mode 100644 gcc13-pr110792.patch diff --git a/.gitignore b/.gitignore index 947393e..3b98d49 100644 --- a/.gitignore +++ b/.gitignore @@ -112,3 +112,4 @@ /nvptx-tools-aa3404ad5a496cda5d79a50bedb1344fd63e8763.tar.xz /gcc-13.2.1-20230918.tar.xz /gcc-13.2.1-20231011.tar.xz +/gcc-13.2.1-20231113.tar.xz diff --git a/gcc.spec b/gcc.spec index 5d6c7c4..ce8cee9 100644 --- a/gcc.spec +++ b/gcc.spec @@ -1,10 +1,10 @@ -%global DATE 20231011 -%global gitrev dc4aa177146b55dd7bd0b2dd63c721f3eaf2d2a8 +%global DATE 20231113 +%global gitrev 4292c70489195b52615f16679dc6df18cd1c8432 %global gcc_version 13.2.1 %global gcc_major 13 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 4 +%global gcc_release 5 %global nvptx_tools_gitrev aa3404ad5a496cda5d79a50bedb1344fd63e8763 %global newlib_cygwin_gitrev 9e09d6ed83cce4777a5950412647ccc603040409 %global _unpackaged_files_terminate_build 0 @@ -286,6 +286,7 @@ Patch8: gcc13-no-add-needed.patch Patch9: gcc13-Wno-format-security.patch Patch10: gcc13-rh1574936.patch Patch11: gcc13-d-shared-libphobos.patch +Patch12: gcc13-pr110792.patch Patch50: isl-rh2155127.patch @@ -863,6 +864,7 @@ so that there cannot be any synchronization problems. %patch -P10 -p0 -b .rh1574936~ %endif %patch -P11 -p0 -b .d-shared-libphobos~ +%patch -P12 -p0 -b .pr110792~ %patch -P50 -p0 -b .rh2155127~ touch -r isl-0.24/m4/ax_prog_cxx_for_build.m4 isl-0.24/m4/ax_prog_cc_for_build.m4 @@ -3457,6 +3459,24 @@ end %endif %changelog +* Mon Nov 13 2023 Jakub Jelinek 13.2.1-5 +- update from releases/gcc-13 branch + - PRs c++/89038, c/111884, d/110712, d/112270, fortran/67740, fortran/97245, + fortran/111837, fortran/112316, libbacktrace/111315, + libbacktrace/112263, libstdc++/110944, libstdc++/111172, + libstdc++/111936, libstdc++/112089, libstdc++/112314, + middle-end/111253, middle-end/111818, modula2/111756, modula2/112110, + target/101177, target/110170, target/111001, target/111366, + target/111367, target/111380, target/111935, target/112443, + tree-optimization/111397, tree-optimization/111445, + tree-optimization/111489, tree-optimization/111583, + tree-optimization/111614, tree-optimization/111622, + tree-optimization/111694, tree-optimization/111764, + tree-optimization/111820, tree-optimization/111833, + tree-optimization/111917 + - fix aarch64 RA ICE (#2241139, PR target/111528) +- fix ia32 doubleword rotates (#2238781, PR target/110792) + * Wed Oct 11 2023 Jakub Jelinek 13.2.1-4 - update from releases/gcc-13 branch - PRs ada/110488, ada/111434, c++/99631, c++/111471, c++/111485, c++/111493, diff --git a/gcc13-pr110792.patch b/gcc13-pr110792.patch new file mode 100644 index 0000000..ce129f9 --- /dev/null +++ b/gcc13-pr110792.patch @@ -0,0 +1,163 @@ +PR target/110792: Early clobber issues with rot32di2_doubleword on i386. + +This patch is a conservative fix for PR target/110792, a wrong-code +regression affecting doubleword rotations by BITS_PER_WORD, which +effectively swaps the highpart and lowpart words, when the source to be +rotated resides in memory. The issue is that if the register used to +hold the lowpart of the destination is mentioned in the address of +the memory operand, the current define_insn_and_split unintentionally +clobbers it before reading the highpart. + +Hence, for the testcase, the incorrectly generated code looks like: + + salq $4, %rdi // calculate address + movq WHIRL_S+8(%rdi), %rdi // accidentally clobber addr + movq WHIRL_S(%rdi), %rbp // load (wrong) lowpart + +Traditionally, the textbook way to fix this would be to add an +explicit early clobber to the instruction's constraints. + + (define_insn_and_split "32di2_doubleword" +- [(set (match_operand:DI 0 "register_operand" "=r,r,r") ++ [(set (match_operand:DI 0 "register_operand" "=r,r,&r") + (any_rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,r,o") + (const_int 32)))] + +but unfortunately this currently generates significantly worse code, +due to a strange choice of reloads (effectively memcpy), which ends up +looking like: + + salq $4, %rdi // calculate address + movdqa WHIRL_S(%rdi), %xmm0 // load the double word in SSE reg. + movaps %xmm0, -16(%rsp) // store the SSE reg back to the stack + movq -8(%rsp), %rdi // load highpart + movq -16(%rsp), %rbp // load lowpart + +Note that reload's "&" doesn't distinguish between the memory being +early clobbered, vs the registers used in an addressing mode being +early clobbered. + +The fix proposed in this patch is to remove the third alternative, that +allowed offsetable memory as an operand, forcing reload to place the +operand into a register before the rotation. This results in: + + salq $4, %rdi + movq WHIRL_S(%rdi), %rax + movq WHIRL_S+8(%rdi), %rdi + movq %rax, %rbp + +I believe there's a more advanced solution, by swapping the order of +the loads (if first destination register is mentioned in the address), +or inserting a lea insn (if both destination registers are mentioned +in the address), but this fix is a minimal "safe" solution, that +should hopefully be suitable for backporting. + +2023-08-06 Roger Sayle + +gcc/testsuite/ChangeLog + PR target/110792 + * gcc.target/i386/pr110792.c: Remove dg-final scan-assembler-not. + +2023-08-03 Roger Sayle + +gcc/ChangeLog + PR target/110792 + * config/i386/i386.md (ti3): For rotations by 64 bits + place operand in a register before gen_64ti2_doubleword. + (di3): Likewise, for rotations by 32 bits, place + operand in a register before gen_32di2_doubleword. + (32di2_doubleword): Constrain operand to be in register. + (64ti2_doubleword): Likewise. + +gcc/testsuite/ChangeLog + PR target/110792 + * g++.target/i386/pr110792.C: New 32-bit C++ test case. + * gcc.target/i386/pr110792.c: New 64-bit C test case. + +--- gcc/config/i386/i386.md ++++ gcc/config/i386/i386.md +@@ -15341,7 +15341,10 @@ (define_expand "ti3" + emit_insn (gen_ix86_ti3_doubleword + (operands[0], operands[1], operands[2])); + else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 64) +- emit_insn (gen_64ti2_doubleword (operands[0], operands[1])); ++ { ++ operands[1] = force_reg (TImode, operands[1]); ++ emit_insn (gen_64ti2_doubleword (operands[0], operands[1])); ++ } + else + { + rtx amount = force_reg (QImode, operands[2]); +@@ -15376,7 +15379,10 @@ (define_expand "di3" + emit_insn (gen_ix86_di3_doubleword + (operands[0], operands[1], operands[2])); + else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 32) +- emit_insn (gen_32di2_doubleword (operands[0], operands[1])); ++ { ++ operands[1] = force_reg (DImode, operands[1]); ++ emit_insn (gen_32di2_doubleword (operands[0], operands[1])); ++ } + else + FAIL; + +@@ -15544,8 +15550,8 @@ (define_insn_and_split "ix86_rotr3_doubleword" + }) + + (define_insn_and_split "32di2_doubleword" +- [(set (match_operand:DI 0 "register_operand" "=r,r,r") +- (any_rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,r,o") ++ [(set (match_operand:DI 0 "register_operand" "=r,r") ++ (any_rotate:DI (match_operand:DI 1 "register_operand" "0,r") + (const_int 32)))] + "!TARGET_64BIT" + "#" +@@ -15562,8 +15568,8 @@ (define_insn_and_split "32di2_doubleword" + }) + + (define_insn_and_split "64ti2_doubleword" +- [(set (match_operand:TI 0 "register_operand" "=r,r,r") +- (any_rotate:TI (match_operand:TI 1 "nonimmediate_operand" "0,r,o") ++ [(set (match_operand:TI 0 "register_operand" "=r,r") ++ (any_rotate:TI (match_operand:TI 1 "register_operand" "0,r") + (const_int 64)))] + "TARGET_64BIT" + "#" +--- gcc/testsuite/g++.target/i386/pr110792.C ++++ gcc/testsuite/g++.target/i386/pr110792.C +@@ -0,0 +1,16 @@ ++/* { dg-do compile { target ia32 } } */ ++/* { dg-options "-O2" } */ ++ ++template ++inline T rotr(T input) ++{ ++ return static_cast((input >> ROT) | (input << (8 * sizeof(T) - ROT))); ++} ++ ++unsigned long long WHIRL_S[256] = {0x18186018C07830D8}; ++unsigned long long whirl(unsigned char x0) ++{ ++ const unsigned long long s4 = WHIRL_S[x0&0xFF]; ++ return rotr<32>(s4); ++} ++/* { dg-final { scan-assembler-not "movl\tWHIRL_S\\+4\\(,%eax,8\\), %eax" } } */ +--- gcc/testsuite/gcc.target/i386/pr110792.c ++++ gcc/testsuite/gcc.target/i386/pr110792.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile { target int128 } } */ ++/* { dg-options "-O2" } */ ++ ++static inline unsigned __int128 rotr(unsigned __int128 input) ++{ ++ return ((input >> 64) | (input << (64))); ++} ++ ++unsigned __int128 WHIRL_S[256] = {((__int128)0x18186018C07830D8) << 64 |0x18186018C07830D8}; ++unsigned __int128 whirl(unsigned char x0) ++{ ++ register int t __asm("rdi") = x0&0xFF; ++ const unsigned __int128 s4 = WHIRL_S[t]; ++ register unsigned __int128 tt __asm("rdi") = rotr(s4); ++ asm("":::"memory"); ++ return tt; ++} diff --git a/sources b/sources index 94cf85d..182bfd5 100644 --- a/sources +++ b/sources @@ -1,4 +1,4 @@ -SHA512 (gcc-13.2.1-20231011.tar.xz) = 24fea15e0b67a2faa3d5476b105983b474b654e9ec50cdffb1967f18338c098539d5bd39e99f5f607eb64f6b0cf6d8e3ae91c9d5ea8069fc10b566739971be60 +SHA512 (gcc-13.2.1-20231113.tar.xz) = d5897c4dc4a223f20ffbbefe1c59077c5680fcb7287c97d9a84d4c25712dc02e4eb91aa3a14e3ae79b9a986d14bdeff1b93489ae9741053e0b069c42f52fe934 SHA512 (isl-0.24.tar.bz2) = aab3bddbda96b801d0f56d2869f943157aad52a6f6e6a61745edd740234c635c38231af20bc3f1a08d416a5e973a90e18249078ed8e4ae2f1d5de57658738e95 SHA512 (newlib-cygwin-9e09d6ed83cce4777a5950412647ccc603040409.tar.xz) = bef3fa04f7b1a915fc1356ebed114698b5cc835e9fa04b0becff05a9efc76c59fb376482990873d222d7acdcfee3c4f30f5a4cb7f3be1f291f1fa5f1c7a9d983 SHA512 (nvptx-tools-aa3404ad5a496cda5d79a50bedb1344fd63e8763.tar.xz) = 33a024326426375533cb5dd9b68b2508f37540be418d2506bfa19a5f5866485e9af150469064e9059b68136ad8cb080b3b12e7eb5c6b7d1288cf6bfb3f6bb5d0