8.0.1-0.6
This commit is contained in:
parent
b147991d7c
commit
da33ffbe57
3
.gitignore
vendored
3
.gitignore
vendored
@ -28,3 +28,6 @@
|
||||
/gcc-7.2.1-20180104.tar.bz2
|
||||
/gcc-7.2.1-20180117.tar.bz2
|
||||
/gcc-7.3.1-20180125.tar.bz2
|
||||
/gcc-8.0.1-20180127.tar.xz
|
||||
/nvptx-newlib-aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24.tar.xz
|
||||
/nvptx-tools-c28050f60193b3b95a18866a96f03334e874e78f.tar.xz
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,98 +0,0 @@
|
||||
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
|
||||
index 346fb4f..f0b8346 100644
|
||||
--- gcc/config/i386/i386.c
|
||||
+++ gcc/config/i386/i386.c
|
||||
@@ -12763,6 +12763,18 @@ ix86_builtin_setjmp_frame_value (void)
|
||||
return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
|
||||
}
|
||||
|
||||
+/* Return the probing interval for -fstack-clash-protection. */
|
||||
+
|
||||
+static HOST_WIDE_INT
|
||||
+get_probe_interval (void)
|
||||
+{
|
||||
+ if (flag_stack_clash_protection)
|
||||
+ return (HOST_WIDE_INT_1U
|
||||
+ << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
|
||||
+ else
|
||||
+ return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
|
||||
+}
|
||||
+
|
||||
/* When using -fsplit-stack, the allocation routines set a field in
|
||||
the TCB to the bottom of the stack plus this much space, measured
|
||||
in bytes. */
|
||||
@@ -12948,7 +12960,14 @@ ix86_compute_frame_layout (void)
|
||||
to_allocate = offset - frame->sse_reg_save_offset;
|
||||
|
||||
if ((!to_allocate && frame->nregs <= 1)
|
||||
- || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)))
|
||||
+ || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
|
||||
+ /* If stack clash probing needs a loop, then it needs a
|
||||
+ scratch register. But the returned register is only guaranteed
|
||||
+ to be safe to use after register saves are complete. So if
|
||||
+ stack clash protections are enabled and the allocated frame is
|
||||
+ larger than the probe interval, then use pushes to save
|
||||
+ callee saved registers. */
|
||||
+ || (flag_stack_clash_protection && to_allocate > get_probe_interval ()))
|
||||
frame->save_regs_using_mov = false;
|
||||
|
||||
if (ix86_using_red_zone ()
|
||||
@@ -13619,18 +13638,6 @@ release_scratch_register_on_entry (struct scratch_reg *sr)
|
||||
}
|
||||
}
|
||||
|
||||
-/* Return the probing interval for -fstack-clash-protection. */
|
||||
-
|
||||
-static HOST_WIDE_INT
|
||||
-get_probe_interval (void)
|
||||
-{
|
||||
- if (flag_stack_clash_protection)
|
||||
- return (HOST_WIDE_INT_1U
|
||||
- << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
|
||||
- else
|
||||
- return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
|
||||
-}
|
||||
-
|
||||
/* Emit code to adjust the stack pointer by SIZE bytes while probing it.
|
||||
|
||||
This differs from the next routine in that it tries hard to prevent
|
||||
@@ -14558,12 +14565,11 @@ ix86_expand_prologue (void)
|
||||
&& (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
|
||||
|| flag_stack_clash_protection))
|
||||
{
|
||||
- /* This assert wants to verify that integer registers were saved
|
||||
- prior to probing. This is necessary when probing may be implemented
|
||||
- as a function call (Windows). It is not necessary for stack clash
|
||||
- protection probing. */
|
||||
- if (!flag_stack_clash_protection)
|
||||
- gcc_assert (int_registers_saved);
|
||||
+ /* We expect the GP registers to be saved when probes are used
|
||||
+ as the probing sequences might need a scratch register and
|
||||
+ the routine to allocate one assumes the integer registers
|
||||
+ have already been saved. */
|
||||
+ gcc_assert (int_registers_saved);
|
||||
|
||||
if (flag_stack_clash_protection)
|
||||
{
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/pr83994.c b/gcc/testsuite/gcc.target/i386/pr83994.c
|
||||
new file mode 100644
|
||||
index 0000000..dc0b7cb
|
||||
--- /dev/null
|
||||
+++ gcc/testsuite/gcc.target/i386/pr83994.c
|
||||
@@ -0,0 +1,16 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -march=i686 -fpic -fstack-clash-protection" } */
|
||||
+/* { dg-require-effective-target ia32 } */
|
||||
+
|
||||
+void f1 (char *);
|
||||
+
|
||||
+__attribute__ ((regparm (3)))
|
||||
+int
|
||||
+f2 (int arg1, int arg2, int arg3)
|
||||
+{
|
||||
+ char buf[16384];
|
||||
+ f1 (buf);
|
||||
+ f1 (buf);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
@ -54,17 +54,6 @@
|
||||
|
||||
# Rules to build gnatlib.
|
||||
.PHONY: gnatlib gnatlib-plain gnatlib-sjlj gnatlib-zcx gnatlib-shared osconstool
|
||||
--- gcc/ada/sem_util.adb (revision 161677)
|
||||
+++ gcc/ada/sem_util.adb (working copy)
|
||||
@@ -5487,7 +5487,7 @@ package body Sem_Util is
|
||||
Exp : Node_Id;
|
||||
Assn : Node_Id;
|
||||
Choice : Node_Id;
|
||||
- Comp_Type : Entity_Id;
|
||||
+ Comp_Type : Entity_Id := Empty;
|
||||
Is_Array_Aggr : Boolean;
|
||||
|
||||
begin
|
||||
--- config-ml.in.jj 2010-06-30 09:50:44.000000000 +0200
|
||||
+++ config-ml.in 2010-07-02 21:24:17.994211151 +0200
|
||||
@@ -511,6 +511,8 @@ multi-do:
|
@ -27,7 +27,7 @@
|
||||
# and compile them.
|
||||
--- gcc/graphite.h.jj 2016-01-27 12:44:06.000000000 +0100
|
||||
+++ gcc/graphite.h 2016-01-27 13:26:38.309876856 +0100
|
||||
@@ -37,6 +37,586 @@ along with GCC; see the file COPYING3.
|
||||
@@ -37,6 +37,590 @@ along with GCC; see the file COPYING3.
|
||||
#include <isl/schedule.h>
|
||||
#include <isl/ast_build.h>
|
||||
#include <isl/schedule_node.h>
|
||||
@ -317,7 +317,9 @@
|
||||
+ DYNSYM (isl_val_get_abs_num_chunks); \
|
||||
+ DYNSYM (isl_val_int_from_chunks); \
|
||||
+ DYNSYM (isl_val_is_neg); \
|
||||
+ DYNSYM (isl_version);
|
||||
+ DYNSYM (isl_version); \
|
||||
+ DYNSYM (isl_options_get_on_error); \
|
||||
+ DYNSYM (isl_ctx_reset_error);
|
||||
+
|
||||
+extern struct isl_pointers_s__
|
||||
+{
|
||||
@ -611,10 +613,12 @@
|
||||
+#define isl_val_int_from_chunks (*isl_pointers__.p_isl_val_int_from_chunks)
|
||||
+#define isl_val_is_neg (*isl_pointers__.p_isl_val_is_neg)
|
||||
+#define isl_version (*isl_pointers__.p_isl_version)
|
||||
+#define isl_options_get_on_error (*isl_pointers__.p_isl_options_get_on_error)
|
||||
+#define isl_ctx_reset_error (*isl_pointers__.p_isl_ctx_reset_error)
|
||||
|
||||
typedef struct poly_dr *poly_dr_p;
|
||||
|
||||
@@ -458,5 +1038,6 @@ extern void build_scops (vec<scop_p> *);
|
||||
@@ -461,5 +1045,6 @@ extern void build_scops (vec<scop_p> *);
|
||||
extern void dot_all_sese (FILE *, vec<sese_l> &);
|
||||
extern void dot_sese (sese_l &);
|
||||
extern void dot_cfg ();
|
||||
@ -623,8 +627,8 @@
|
||||
#endif
|
||||
--- gcc/graphite.c.jj 2015-11-04 14:15:32.000000000 +0100
|
||||
+++ gcc/graphite.c 2015-11-04 14:56:02.645536409 +0100
|
||||
@@ -55,6 +55,35 @@ along with GCC; see the file COPYING3.
|
||||
#include "tree-vectorizer.h"
|
||||
@@ -60,6 +60,35 @@ along with GCC; see the file COPYING3.
|
||||
#include "tree-into-ssa.h"
|
||||
#include "graphite.h"
|
||||
|
||||
+__typeof (isl_pointers__) isl_pointers__;
|
||||
@ -659,7 +663,7 @@
|
||||
/* Print global statistics to FILE. */
|
||||
|
||||
static void
|
||||
@@ -299,6 +328,15 @@ graphite_transform_loops (void)
|
||||
@@ -365,6 +394,15 @@ graphite_transform_loops (void)
|
||||
if (parallelized_function_p (cfun->decl))
|
||||
return;
|
||||
|
||||
@ -672,11 +676,11 @@
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
ctx = isl_ctx_alloc ();
|
||||
isl_options_set_on_error (ctx, ISL_ON_ERROR_ABORT);
|
||||
if (!graphite_initialize (ctx))
|
||||
@@ -342,6 +380,14 @@ graphite_transform_loops (void)
|
||||
isl_ctx_free (ctx);
|
||||
calculate_dominance_info (CDI_DOMINATORS);
|
||||
|
||||
/* We rely on post-dominators during merging of SESE regions so those
|
||||
@@ -455,6 +493,14 @@ graphite_transform_loops (void)
|
||||
}
|
||||
}
|
||||
|
||||
+const char *
|
@ -4,7 +4,7 @@
|
||||
<a class="link" href="http://www.fsf.org/" target="_top">FSF
|
||||
</a>
|
||||
</p><p>
|
||||
+ Release 7.3.1
|
||||
+ Release 8.0.0
|
||||
+ </p><p>
|
||||
Permission is granted to copy, distribute and/or modify this
|
||||
document under the terms of the GNU Free Documentation
|
||||
@ -17,7 +17,7 @@
|
||||
</p><p>
|
||||
- The API documentation, rendered into HTML, can be viewed online
|
||||
+ The API documentation, rendered into HTML, can be viewed locally
|
||||
+ <a class="link" href="api/index.html" target="_top">for the 7.3.1 release</a>,
|
||||
+ <a class="link" href="api/index.html" target="_top">for the 8.0.0 release</a>,
|
||||
+ online
|
||||
<a class="link" href="http://gcc.gnu.org/onlinedocs/" target="_top">for each GCC release</a>
|
||||
and
|
@ -8,12 +8,12 @@
|
||||
|
||||
--- gcc/config/alpha/elf.h.jj 2011-01-03 12:52:31.118056764 +0100
|
||||
+++ gcc/config/alpha/elf.h 2011-01-04 18:14:10.931874160 +0100
|
||||
@@ -165,5 +165,5 @@ extern int alpha_this_gpdisp_sequence_nu
|
||||
@@ -168,5 +168,5 @@ extern int alpha_this_gpdisp_sequence_nu
|
||||
I imagine that other systems will catch up. In the meantime, it
|
||||
doesn't harm to make sure that the data exists to be used later. */
|
||||
#if defined(HAVE_LD_EH_FRAME_HDR)
|
||||
-#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
|
||||
+#define LINK_EH_SPEC "--no-add-needed %{!static:--eh-frame-hdr} "
|
||||
-#define LINK_EH_SPEC "%{!static|static-pie:--eh-frame-hdr} "
|
||||
+#define LINK_EH_SPEC "--no-add-needed %{!static|static-pie:--eh-frame-hdr} "
|
||||
#endif
|
||||
--- gcc/config/ia64/linux.h.jj 2011-01-03 13:02:11.462994522 +0100
|
||||
+++ gcc/config/ia64/linux.h 2011-01-04 18:14:10.931874160 +0100
|
||||
@ -28,23 +28,23 @@
|
||||
#define TARGET_INIT_LIBFUNCS ia64_soft_fp_init_libfuncs
|
||||
--- gcc/config/gnu-user.h.jj 2011-01-03 12:53:03.739057299 +0100
|
||||
+++ gcc/config/gnu-user.h 2011-01-04 18:14:10.932814884 +0100
|
||||
@@ -82,7 +82,7 @@ see the files COPYING3 and COPYING.RUNTI
|
||||
@@ -133,7 +133,7 @@ see the files COPYING3 and COPYING.RUNTI
|
||||
#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC
|
||||
|
||||
#if defined(HAVE_LD_EH_FRAME_HDR)
|
||||
-#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
|
||||
+#define LINK_EH_SPEC "--no-add-needed %{!static:--eh-frame-hdr} "
|
||||
-#define LINK_EH_SPEC "%{!static|static-pie:--eh-frame-hdr} "
|
||||
+#define LINK_EH_SPEC "--no-add-needed %{!static|static-pie:--eh-frame-hdr} "
|
||||
#endif
|
||||
|
||||
#undef LINK_GCC_C_SEQUENCE_SPEC
|
||||
--- gcc/config/rs6000/sysv4.h.jj 2011-01-03 13:02:18.255994215 +0100
|
||||
+++ gcc/config/rs6000/sysv4.h 2011-01-04 18:14:10.933888871 +0100
|
||||
@@ -820,7 +820,7 @@ extern int fixuplabelno;
|
||||
@@ -816,7 +816,7 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEF
|
||||
-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}"
|
||||
|
||||
#if defined(HAVE_LD_EH_FRAME_HDR)
|
||||
-# define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
|
||||
+# define LINK_EH_SPEC "--no-add-needed %{!static:--eh-frame-hdr} "
|
||||
-# define LINK_EH_SPEC "%{!static|static-pie:--eh-frame-hdr} "
|
||||
+# define LINK_EH_SPEC "--no-add-needed %{!static|static-pie:--eh-frame-hdr} "
|
||||
#endif
|
||||
|
||||
#define CPP_OS_LINUX_SPEC "-D__unix__ -D__gnu_linux__ -D__linux__ \
|
445
gcc8-rh1512529-aarch64.patch
Normal file
445
gcc8-rh1512529-aarch64.patch
Normal file
@ -0,0 +1,445 @@
|
||||
--- gcc/config/aarch64/aarch64.c
|
||||
+++ gcc/config/aarch64/aarch64.c
|
||||
@@ -3799,7 +3799,14 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
|
||||
output_asm_insn ("sub\t%0, %0, %1", xops);
|
||||
|
||||
/* Probe at TEST_ADDR. */
|
||||
- output_asm_insn ("str\txzr, [%0]", xops);
|
||||
+ if (flag_stack_clash_protection)
|
||||
+ {
|
||||
+ gcc_assert (xops[0] == stack_pointer_rtx);
|
||||
+ xops[1] = GEN_INT (PROBE_INTERVAL - 8);
|
||||
+ output_asm_insn ("str\txzr, [%0, %1]", xops);
|
||||
+ }
|
||||
+ else
|
||||
+ output_asm_insn ("str\txzr, [%0]", xops);
|
||||
|
||||
/* Test if TEST_ADDR == LAST_ADDR. */
|
||||
xops[1] = reg2;
|
||||
@@ -4589,6 +4596,133 @@ aarch64_set_handled_components (sbitmap components)
|
||||
cfun->machine->reg_is_wrapped_separately[regno] = true;
|
||||
}
|
||||
|
||||
+/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
|
||||
+ registers. */
|
||||
+
|
||||
+static void
|
||||
+aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
|
||||
+ poly_int64 poly_size)
|
||||
+{
|
||||
+ HOST_WIDE_INT size;
|
||||
+ if (!poly_size.is_constant (&size))
|
||||
+ {
|
||||
+ sorry ("stack probes for SVE frames");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ HOST_WIDE_INT probe_interval
|
||||
+ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
|
||||
+ HOST_WIDE_INT guard_size
|
||||
+ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
|
||||
+ HOST_WIDE_INT guard_used_by_caller = 1024;
|
||||
+
|
||||
+ /* SIZE should be large enough to require probing here. ie, it
|
||||
+ must be larger than GUARD_SIZE - GUARD_USED_BY_CALLER.
|
||||
+
|
||||
+ We can allocate GUARD_SIZE - GUARD_USED_BY_CALLER as a single chunk
|
||||
+ without any probing. */
|
||||
+ gcc_assert (size >= guard_size - guard_used_by_caller);
|
||||
+ aarch64_sub_sp (temp1, temp2, guard_size - guard_used_by_caller, true);
|
||||
+ HOST_WIDE_INT orig_size = size;
|
||||
+ size -= (guard_size - guard_used_by_caller);
|
||||
+
|
||||
+ HOST_WIDE_INT rounded_size = size & -probe_interval;
|
||||
+ HOST_WIDE_INT residual = size - rounded_size;
|
||||
+
|
||||
+ /* We can handle a small number of allocations/probes inline. Otherwise
|
||||
+ punt to a loop. */
|
||||
+ if (rounded_size && rounded_size <= 4 * probe_interval)
|
||||
+ {
|
||||
+ /* We don't use aarch64_sub_sp here because we don't want to
|
||||
+ repeatedly load TEMP1. */
|
||||
+ rtx step = GEN_INT (-probe_interval);
|
||||
+ if (probe_interval > ARITH_FACTOR)
|
||||
+ {
|
||||
+ emit_move_insn (temp1, step);
|
||||
+ step = temp1;
|
||||
+ }
|
||||
+
|
||||
+ for (HOST_WIDE_INT i = 0; i < rounded_size; i += probe_interval)
|
||||
+ {
|
||||
+ rtx_insn *insn = emit_insn (gen_add2_insn (stack_pointer_rtx, step));
|
||||
+ add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
|
||||
+
|
||||
+ if (probe_interval > ARITH_FACTOR)
|
||||
+ {
|
||||
+ RTX_FRAME_RELATED_P (insn) = 1;
|
||||
+ rtx adj = plus_constant (Pmode, stack_pointer_rtx, -probe_interval);
|
||||
+ add_reg_note (insn, REG_CFA_ADJUST_CFA,
|
||||
+ gen_rtx_SET (stack_pointer_rtx, adj));
|
||||
+ }
|
||||
+
|
||||
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
|
||||
+ (probe_interval
|
||||
+ - GET_MODE_SIZE (word_mode))));
|
||||
+ emit_insn (gen_blockage ());
|
||||
+ }
|
||||
+ dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
|
||||
+ }
|
||||
+ else if (rounded_size)
|
||||
+ {
|
||||
+ /* Compute the ending address. */
|
||||
+ unsigned int scratchreg = REGNO (temp1);
|
||||
+ emit_move_insn (temp1, GEN_INT (-rounded_size));
|
||||
+ rtx_insn *insn
|
||||
+ = emit_insn (gen_add3_insn (temp1, stack_pointer_rtx, temp1));
|
||||
+
|
||||
+ /* For the initial allocation, we don't have a frame pointer
|
||||
+ set up, so we always need CFI notes. If we're doing the
|
||||
+ final allocation, then we may have a frame pointer, in which
|
||||
+ case it is the CFA, otherwise we need CFI notes.
|
||||
+
|
||||
+ We can determine which allocation we are doing by looking at
|
||||
+ the temporary register. IP0 is the initial allocation, IP1
|
||||
+ is the final allocation. */
|
||||
+ if (scratchreg == IP0_REGNUM || !frame_pointer_needed)
|
||||
+ {
|
||||
+ /* We want the CFA independent of the stack pointer for the
|
||||
+ duration of the loop. */
|
||||
+ add_reg_note (insn, REG_CFA_DEF_CFA,
|
||||
+ plus_constant (Pmode, temp1,
|
||||
+ (rounded_size + (orig_size - size))));
|
||||
+ RTX_FRAME_RELATED_P (insn) = 1;
|
||||
+ }
|
||||
+
|
||||
+ /* This allocates and probes the stack.
|
||||
+
|
||||
+ It also probes at a 4k interval regardless of the value of
|
||||
+ PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL. */
|
||||
+ insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx,
|
||||
+ stack_pointer_rtx, temp1));
|
||||
+
|
||||
+ /* Now reset the CFA register if needed. */
|
||||
+ if (scratchreg == IP0_REGNUM || !frame_pointer_needed)
|
||||
+ {
|
||||
+ add_reg_note (insn, REG_CFA_DEF_CFA,
|
||||
+ plus_constant (Pmode, stack_pointer_rtx,
|
||||
+ (rounded_size + (orig_size - size))));
|
||||
+ RTX_FRAME_RELATED_P (insn) = 1;
|
||||
+ }
|
||||
+
|
||||
+ emit_insn (gen_blockage ());
|
||||
+ dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
|
||||
+ }
|
||||
+ else
|
||||
+ dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
|
||||
+
|
||||
+ /* Handle any residuals.
|
||||
+ Note that any residual must be probed. */
|
||||
+ if (residual)
|
||||
+ {
|
||||
+ aarch64_sub_sp (temp1, temp2, residual, true);
|
||||
+ add_reg_note (get_last_insn (), REG_STACK_CHECK, const0_rtx);
|
||||
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
|
||||
+ (residual - GET_MODE_SIZE (word_mode))));
|
||||
+ emit_insn (gen_blockage ());
|
||||
+ }
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
|
||||
is saved at BASE + OFFSET. */
|
||||
|
||||
@@ -4686,7 +4820,54 @@ aarch64_expand_prologue (void)
|
||||
rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
|
||||
rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
|
||||
|
||||
- aarch64_sub_sp (ip0_rtx, ip1_rtx, initial_adjust, true);
|
||||
+ /* We do not fully protect aarch64 against stack clash style attacks
|
||||
+ as doing so would be prohibitively expensive with less utility over
|
||||
+ time as newer compilers are deployed.
|
||||
+
|
||||
+ We assume the guard is at least 64k. Furthermore, we assume that
|
||||
+ the caller has not pushed the stack pointer more than 1k into
|
||||
+ the guard. A caller that pushes the stack pointer than 1k into
|
||||
+ the guard is considered invalid.
|
||||
+
|
||||
+ Note that the caller's ability to push the stack pointer into the
|
||||
+ guard is a function of the number and size of outgoing arguments and/or
|
||||
+ dynamic stack allocations due to the mandatory save of the link register
|
||||
+ in the caller's frame.
|
||||
+
|
||||
+ With those assumptions the callee can allocate up to 63k of stack
|
||||
+ space without probing.
|
||||
+
|
||||
+ When probing is needed, we emit a probe at the start of the prologue
|
||||
+ and every PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes thereafter.
|
||||
+
|
||||
+ We have to track how much space has been allocated, but we do not
|
||||
+ track stores into the stack as implicit probes except for the
|
||||
+ fp/lr store. */
|
||||
+ HOST_WIDE_INT guard_size
|
||||
+ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
|
||||
+ HOST_WIDE_INT guard_used_by_caller = 1024;
|
||||
+ if (flag_stack_clash_protection)
|
||||
+ {
|
||||
+ if (known_eq (frame_size, 0))
|
||||
+ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
|
||||
+ else if (known_lt (initial_adjust, guard_size - guard_used_by_caller)
|
||||
+ && known_lt (final_adjust, guard_size - guard_used_by_caller))
|
||||
+ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
|
||||
+ }
|
||||
+
|
||||
+ /* In theory we should never have both an initial adjustment
|
||||
+ and a callee save adjustment. Verify that is the case since the
|
||||
+ code below does not handle it for -fstack-clash-protection. */
|
||||
+ gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
|
||||
+
|
||||
+ /* Only probe if the initial adjustment is larger than the guard
|
||||
+ less the amount of the guard reserved for use by the caller's
|
||||
+ outgoing args. */
|
||||
+ if (flag_stack_clash_protection
|
||||
+ && maybe_ge (initial_adjust, guard_size - guard_used_by_caller))
|
||||
+ aarch64_allocate_and_probe_stack_space (ip0_rtx, ip1_rtx, initial_adjust);
|
||||
+ else
|
||||
+ aarch64_sub_sp (ip0_rtx, ip1_rtx, initial_adjust, true);
|
||||
|
||||
if (callee_adjust != 0)
|
||||
aarch64_push_regs (reg1, reg2, callee_adjust);
|
||||
@@ -4742,7 +4923,31 @@ aarch64_expand_prologue (void)
|
||||
callee_adjust != 0 || emit_frame_chain);
|
||||
aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
|
||||
callee_adjust != 0 || emit_frame_chain);
|
||||
- aarch64_sub_sp (ip1_rtx, ip0_rtx, final_adjust, !frame_pointer_needed);
|
||||
+
|
||||
+ /* We may need to probe the final adjustment as well. */
|
||||
+ if (flag_stack_clash_protection && maybe_ne (final_adjust, 0))
|
||||
+ {
|
||||
+ /* First probe if the final adjustment is larger than the guard size
|
||||
+ less the amount of the guard reserved for use by the caller's
|
||||
+ outgoing args. */
|
||||
+ if (maybe_ge (final_adjust, guard_size - guard_used_by_caller))
|
||||
+ aarch64_allocate_and_probe_stack_space (ip1_rtx, ip0_rtx,
|
||||
+ final_adjust);
|
||||
+ else
|
||||
+ aarch64_sub_sp (ip1_rtx, ip0_rtx, final_adjust, !frame_pointer_needed);
|
||||
+
|
||||
+ /* We must also probe if the final adjustment is larger than the guard
|
||||
+ that is assumed used by the caller. This may be sub-optimal. */
|
||||
+ if (maybe_ge (final_adjust, guard_used_by_caller))
|
||||
+ {
|
||||
+ if (dump_file)
|
||||
+ fprintf (dump_file,
|
||||
+ "Stack clash aarch64 large outgoing arg, probing\n");
|
||||
+ emit_stack_probe (stack_pointer_rtx);
|
||||
+ }
|
||||
+ }
|
||||
+ else
|
||||
+ aarch64_sub_sp (ip1_rtx, ip0_rtx, final_adjust, !frame_pointer_needed);
|
||||
}
|
||||
|
||||
/* Return TRUE if we can use a simple_return insn.
|
||||
@@ -10476,6 +10681,12 @@ aarch64_override_options_internal (struct gcc_options *opts)
|
||||
&& opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
|
||||
opts->x_flag_prefetch_loop_arrays = 1;
|
||||
|
||||
+ /* We assume the guard page is 64k. */
|
||||
+ maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE,
|
||||
+ 16,
|
||||
+ opts->x_param_values,
|
||||
+ global_options_set.x_param_values);
|
||||
+
|
||||
aarch64_override_options_after_change_1 (opts);
|
||||
}
|
||||
|
||||
@@ -17161,6 +17372,28 @@ aarch64_sched_can_speculate_insn (rtx_insn *insn)
|
||||
}
|
||||
}
|
||||
|
||||
+/* It has been decided that to allow up to 1kb of outgoing argument
|
||||
+ space to be allocated w/o probing. If more than 1kb of outgoing
|
||||
+ argment space is allocated, then it must be probed and the last
|
||||
+ probe must occur no more than 1kbyte away from the end of the
|
||||
+ allocated space.
|
||||
+
|
||||
+ This implies that the residual part of an alloca allocation may
|
||||
+ need probing in cases where the generic code might not otherwise
|
||||
+ think a probe is needed.
|
||||
+
|
||||
+ This target hook returns TRUE when allocating RESIDUAL bytes of
|
||||
+ alloca space requires an additional probe, otherwise FALSE is
|
||||
+ returned. */
|
||||
+
|
||||
+static bool
|
||||
+aarch64_stack_clash_protection_final_dynamic_probe (rtx residual)
|
||||
+{
|
||||
+ return (residual == CONST0_RTX (Pmode)
|
||||
+ || GET_CODE (residual) != CONST_INT
|
||||
+ || INTVAL (residual) >= 1024);
|
||||
+}
|
||||
+
|
||||
/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
|
||||
|
||||
static int
|
||||
@@ -17669,6 +17902,10 @@ aarch64_libgcc_floating_mode_supported_p
|
||||
#undef TARGET_CONSTANT_ALIGNMENT
|
||||
#define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
|
||||
|
||||
+#undef TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE
|
||||
+#define TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE \
|
||||
+ aarch64_stack_clash_protection_final_dynamic_probe
|
||||
+
|
||||
#undef TARGET_COMPUTE_PRESSURE_CLASSES
|
||||
#define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
|
||||
|
||||
--- gcc/config/aarch64/aarch64.md
|
||||
+++ gcc/config/aarch64/aarch64.md
|
||||
@@ -5812,7 +5812,7 @@
|
||||
)
|
||||
|
||||
(define_insn "probe_stack_range"
|
||||
- [(set (match_operand:DI 0 "register_operand" "=r")
|
||||
+ [(set (match_operand:DI 0 "register_operand" "=rk")
|
||||
(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
|
||||
(match_operand:DI 2 "register_operand" "r")]
|
||||
UNSPECV_PROBE_STACK_RANGE))]
|
||||
--- gcc/testsuite/gcc.target/aarch64/stack-check-12.c
|
||||
+++ gcc/testsuite/gcc.target/aarch64/stack-check-12.c
|
||||
@@ -0,0 +1,20 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+
|
||||
+extern void arf (unsigned long int *, unsigned long int *);
|
||||
+void
|
||||
+frob ()
|
||||
+{
|
||||
+ unsigned long int num[1000];
|
||||
+ unsigned long int den[1000];
|
||||
+ arf (den, num);
|
||||
+}
|
||||
+
|
||||
+/* This verifies that the scheduler did not break the dependencies
|
||||
+ by adjusting the offsets within the probe and that the scheduler
|
||||
+ did not reorder around the stack probes. */
|
||||
+/* { dg-final { scan-assembler-times "sub\\tsp, sp, #4096\\n\\tstr\\txzr, .sp, 4088." 3 } } */
|
||||
+
|
||||
+
|
||||
+
|
||||
--- gcc/testsuite/gcc.target/aarch64/stack-check-13.c
|
||||
+++ gcc/testsuite/gcc.target/aarch64/stack-check-13.c
|
||||
@@ -0,0 +1,28 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+
|
||||
+#define ARG32(X) X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
|
||||
+#define ARG192(X) ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X)
|
||||
+void out1(ARG192(__int128));
|
||||
+int t1(int);
|
||||
+
|
||||
+int t3(int x)
|
||||
+{
|
||||
+ if (x < 1000)
|
||||
+ return t1 (x) + 1;
|
||||
+
|
||||
+ out1 (ARG192(1));
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+
|
||||
+
|
||||
+/* This test creates a large (> 1k) outgoing argument area that needs
|
||||
+ to be probed. We don't test the exact size of the space or the
|
||||
+ exact offset to make the test a little less sensitive to trivial
|
||||
+ output changes. */
|
||||
+/* { dg-final { scan-assembler-times "sub\\tsp, sp, #....\\n\\tstr\\txzr, \\\[sp" 1 } } */
|
||||
+
|
||||
+
|
||||
+
|
||||
--- gcc/testsuite/gcc.target/aarch64/stack-check-14.c
|
||||
+++ gcc/testsuite/gcc.target/aarch64/stack-check-14.c
|
||||
@@ -0,0 +1,25 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+
|
||||
+int t1(int);
|
||||
+
|
||||
+int t2(int x)
|
||||
+{
|
||||
+ char *p = __builtin_alloca (4050);
|
||||
+ x = t1 (x);
|
||||
+ return p[x];
|
||||
+}
|
||||
+
|
||||
+
|
||||
+/* This test has a constant sized alloca that is smaller than the
|
||||
+ probe interval. But it actually requires two probes instead
|
||||
+ of one because of the optimistic assumptions we made in the
|
||||
+ aarch64 prologue code WRT probing state.
|
||||
+
|
||||
+ The form can change quite a bit so we just check for two
|
||||
+ probes without looking at the actual address. */
|
||||
+/* { dg-final { scan-assembler-times "str\\txzr," 2 } } */
|
||||
+
|
||||
+
|
||||
+
|
||||
--- gcc/testsuite/gcc.target/aarch64/stack-check-15.c
|
||||
+++ gcc/testsuite/gcc.target/aarch64/stack-check-15.c
|
||||
@@ -0,0 +1,24 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+
|
||||
+int t1(int);
|
||||
+
|
||||
+int t2(int x)
|
||||
+{
|
||||
+ char *p = __builtin_alloca (x);
|
||||
+ x = t1 (x);
|
||||
+ return p[x];
|
||||
+}
|
||||
+
|
||||
+
|
||||
+/* This test has a variable sized alloca. It requires 3 probes.
|
||||
+ One in the loop, one for the residual and at the end of the
|
||||
+ alloca area.
|
||||
+
|
||||
+ The form can change quite a bit so we just check for two
|
||||
+ probes without looking at the actual address. */
|
||||
+/* { dg-final { scan-assembler-times "str\\txzr," 3 } } */
|
||||
+
|
||||
+
|
||||
+
|
||||
--- gcc/testsuite/lib/target-supports.exp
|
||||
+++ gcc/testsuite/lib/target-supports.exp
|
||||
@@ -9201,14 +9201,9 @@ proc check_effective_target_autoincdec { } {
|
||||
#
|
||||
proc check_effective_target_supports_stack_clash_protection { } {
|
||||
|
||||
- # Temporary until the target bits are fully ACK'd.
|
||||
-# if { [istarget aarch*-*-*] } {
|
||||
-# return 1
|
||||
-# }
|
||||
-
|
||||
if { [istarget x86_64-*-*] || [istarget i?86-*-*]
|
||||
|| [istarget powerpc*-*-*] || [istarget rs6000*-*-*]
|
||||
- || [istarget s390*-*-*] } {
|
||||
+ || [istarget aarch64*-**] || [istarget s390*-*-*] } {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
@@ -9217,9 +9212,9 @@ proc check_effective_target_supports_stack_clash_protection { } {
|
||||
# Return 1 if the target creates a frame pointer for non-leaf functions
|
||||
# Note we ignore cases where we apply tail call optimization here.
|
||||
proc check_effective_target_frame_pointer_for_non_leaf { } {
|
||||
- if { [istarget aarch*-*-*] } {
|
||||
- return 1
|
||||
- }
|
||||
+# if { [istarget aarch*-*-*] } {
|
||||
+# return 1
|
||||
+# }
|
||||
|
||||
# Solaris/x86 defaults to -fno-omit-frame-pointer.
|
||||
if { [istarget i?86-*-solaris*] || [istarget x86_64-*-solaris*] } {
|
6
sources
6
sources
@ -1,3 +1,3 @@
|
||||
SHA512 (gcc-7.3.1-20180125.tar.bz2) = fd3d407f767d01f63e6e564755c82baf25a9a9665c2dbdc0ae9796e4e522cb58b0f8ed369ed1ac31ea88ff1dddc15dc8f2e6d7461085bd54a03bbb2db6c7f102
|
||||
SHA512 (nvptx-newlib-aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24.tar.bz2) = 38f97c9297ad108568352a4d28277455a3c01fd8b7864e798037e5006b6f757022e874bbf3f165775fe3b873781bc108137bbeb42dd5ed3c7d3e6747746fa918
|
||||
SHA512 (nvptx-tools-c28050f60193b3b95a18866a96f03334e874e78f.tar.bz2) = 95b577a06a93bb044dbc8033e550cb36bcf2ab2687da030a7318cdc90e7467ed49665e247dcafb5ff4a7e92cdc264291d19728bd17fab902fb64b22491269330
|
||||
SHA512 (gcc-8.0.1-20180127.tar.xz) = 7d46928874f0dc2e9c00435f51d4939c283a5738523d3f42b798344d6ab0d123c01b6acc3a01b4bcfd78302f3032e1b1f848d75e752047639c54d90401b76b2b
|
||||
SHA512 (nvptx-newlib-aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24.tar.xz) = 94f7089365296f7dfa485107b4143bebc850a81586f3460fd896bbbb6ba099a00217d4042133424fd2183b352132f4fd367e6a60599bdae2a26dfd48a77d0e04
|
||||
SHA512 (nvptx-tools-c28050f60193b3b95a18866a96f03334e874e78f.tar.xz) = a688cb12cf805950a5abbb13b52f45c81dbee98e310b7ed57ae20e76dbfa5964a16270148374a6426d177db71909d28360490f091c86a5d19d4faa5127beeee1
|
||||
|
Loading…
Reference in New Issue
Block a user