diff --git a/gcc.spec b/gcc.spec index 98742a1..37c61b2 100644 --- a/gcc.spec +++ b/gcc.spec @@ -4,7 +4,7 @@ %global gcc_major 7 # Note, gcc_release must be integer, if you want to add suffixes to # %{release}, append them after %{gcc_release} on Release: line. -%global gcc_release 2 +%global gcc_release 3 %global nvptx_tools_gitrev c28050f60193b3b95a18866a96f03334e874e78f %global nvptx_newlib_gitrev aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24 %global _unpackaged_files_terminate_build 0 @@ -242,6 +242,25 @@ Patch1000: nvptx-tools-no-ptxas.patch Patch1001: nvptx-tools-build.patch Patch1002: nvptx-tools-glibc.patch +Patch2001: gcc7-rh1512529-1.patch +Patch2002: gcc7-rh1512529-2.patch +Patch2003: gcc7-rh1512529-3.patch +Patch2004: gcc7-rh1512529-4.patch +Patch2005: gcc7-rh1512529-5.patch +Patch2006: gcc7-rh1512529-6.patch +Patch2007: gcc7-rh1512529-7.patch +Patch2008: gcc7-rh1512529-8.patch +Patch2009: gcc7-rh1512529-9.patch +Patch2010: gcc7-rh1512529-10.patch +Patch2011: gcc7-rh1512529-11.patch +Patch2012: gcc7-rh1512529-12.patch +Patch2013: gcc7-rh1512529-13.patch +Patch2014: gcc7-rh1512529-14.patch +Patch2015: gcc7-rh1512529-15.patch +Patch2016: gcc7-rh1512529-16.patch +Patch2017: gcc7-rh1512529-17.patch +Patch2018: gcc7-rh1512529-18.patch + # On ARM EABI systems, we do want -gnueabi to be part of the # target triple. %ifnarch %{arm} @@ -838,6 +857,25 @@ cd nvptx-tools-%{nvptx_tools_gitrev} %patch1002 -p1 -b .nvptx-tools-glibc~ cd .. +%patch2001 -p1 +%patch2002 -p1 +%patch2003 -p1 +%patch2004 -p1 +%patch2005 -p1 +%patch2006 -p1 +%patch2007 -p1 +%patch2008 -p1 +%patch2009 -p1 +%patch2010 -p1 +%patch2011 -p1 +%patch2012 -p1 +%patch2013 -p1 +%patch2014 -p1 +%patch2015 -p1 +%patch2016 -p1 +%patch2017 -p1 +%patch2018 -p1 + %if 0%{?_enable_debug_packages} mkdir dwz-wrapper if [ -f /usr/bin/dwz ]; then @@ -3248,6 +3286,9 @@ fi %endif %changelog +* Tue Nov 28 2017 Jeff Law 7.2.1-3 +- Backport -fstack-clash-protection from development trunk (#1512529) + * Fri Sep 15 2017 Jakub Jelinek 7.2.1-2 - update from the 7 branch - PRs ada/62235, ada/79441, ada/79542, bootstrap/81926, c++/81355, diff --git a/gcc7-rh1512529-1.patch b/gcc7-rh1512529-1.patch new file mode 100644 index 0000000..7b3e763 --- /dev/null +++ b/gcc7-rh1512529-1.patch @@ -0,0 +1,340 @@ +commit cc3586c7f4704acbbd3f2f99de5b82bcc8f6fe36 +Author: law +Date: Wed Sep 20 04:56:54 2017 +0000 + + * common.opt (-fstack-clash-protection): New option. + * flag-types.h (enum stack_check_type): Note difference between + -fstack-check= and -fstack-clash-protection. + * params.def (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE): New PARAM. + (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL): Likewise. + * toplev.c (process_options): Issue warnings/errors for cases + not handled with -fstack-clash-protection. + * doc/invoke.texi (-fstack-clash-protection): Document new option. + (-fstack-check): Note additional problem with -fstack-check=generic. + Note that -fstack-check is primarily for Ada and refer users + to -fstack-clash-protection for stack-clash-protection. + Document new params for stack clash protection. + + * gcc.dg/stack-check-2.c: New test. + * lib/target-supports.exp + (check_effective_target_supports_stack_clash_protection): New function. + (check_effective_target_frame_pointer_for_non_leaf): Likewise. + (check_effective_target_caller_implicit_probes): Likewise. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@252994 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/common.opt b/gcc/common.opt +index 592bbd15309..bcaea91c130 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -2291,13 +2291,18 @@ Common Report Var(flag_variable_expansion_in_unroller) Optimization + Apply variable expansion when loops are unrolled. + + fstack-check= +-Common Report RejectNegative Joined ++Common Report RejectNegative Joined Optimization + -fstack-check=[no|generic|specific] Insert stack checking code into the program. + + fstack-check + Common Alias(fstack-check=, specific, no) + Insert stack checking code into the program. Same as -fstack-check=specific. + ++fstack-clash-protection ++Common Report Var(flag_stack_clash_protection) Optimization ++Insert code to probe each page of stack space as it is allocated to protect ++from stack-clash style attacks. ++ + fstack-limit + Common Var(common_deferred_options) Defer + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index a0fb09eb9e1..f77035571a1 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -10026,6 +10026,21 @@ compilation without. The value for compilation with profile feedback + needs to be more conservative (higher) in order to make tracer + effective. + ++@item stack-clash-protection-guard-size ++Specify the size of the operating system provided stack guard as ++2 raised to @var{num} bytes. The default value is 12 (4096 bytes). ++Acceptable values are between 12 and 30. Higher values may reduce the ++number of explicit probes, but a value larger than the operating system ++provided guard will leave code vulnerable to stack clash style attacks. ++ ++@item stack-clash-protection-probe-interval ++Stack clash protection involves probing stack space as it is allocated. This ++param controls the maximum distance between probes into the stack as 2 raised ++to @var{num} bytes. Acceptable values are between 10 and 16 and defaults to ++12. Higher values may reduce the number of explicit probes, but a value ++larger than the operating system provided guard will leave code vulnerable to ++stack clash style attacks. ++ + @item max-cse-path-length + + The maximum number of basic blocks on path that CSE considers. +@@ -11218,7 +11233,8 @@ target support in the compiler but comes with the following drawbacks: + @enumerate + @item + Modified allocation strategy for large objects: they are always +-allocated dynamically if their size exceeds a fixed threshold. ++allocated dynamically if their size exceeds a fixed threshold. Note this ++may change the semantics of some code. + + @item + Fixed limit on the size of the static frame of functions: when it is +@@ -11233,6 +11249,25 @@ generic implementation, code performance is hampered. + Note that old-style stack checking is also the fallback method for + @samp{specific} if no target support has been added in the compiler. + ++@samp{-fstack-check=} is designed for Ada's needs to detect infinite recursion ++and stack overflows. @samp{specific} is an excellent choice when compiling ++Ada code. It is not generally sufficient to protect against stack-clash ++attacks. To protect against those you want @samp{-fstack-clash-protection}. ++ ++@item -fstack-clash-protection ++@opindex fstack-clash-protection ++Generate code to prevent stack clash style attacks. When this option is ++enabled, the compiler will only allocate one page of stack space at a time ++and each page is accessed immediately after allocation. Thus, it prevents ++allocations from jumping over any stack guard page provided by the ++operating system. ++ ++Most targets do not fully support stack clash protection. However, on ++those targets @option{-fstack-clash-protection} will protect dynamic stack ++allocations. @option{-fstack-clash-protection} may also provide limited ++protection for static stack allocations if the target supports ++@option{-fstack-check=specific}. ++ + @item -fstack-limit-register=@var{reg} + @itemx -fstack-limit-symbol=@var{sym} + @itemx -fno-stack-limit +diff --git a/gcc/flag-types.h b/gcc/flag-types.h +index 27a38efdc8e..4e5a4e58119 100644 +--- a/gcc/flag-types.h ++++ b/gcc/flag-types.h +@@ -166,7 +166,14 @@ enum permitted_flt_eval_methods + PERMITTED_FLT_EVAL_METHODS_C11 + }; + +-/* Type of stack check. */ ++/* Type of stack check. ++ ++ Stack checking is designed to detect infinite recursion and stack ++ overflows for Ada programs. Furthermore stack checking tries to ensure ++ in that scenario that enough stack space is left to run a signal handler. ++ ++ -fstack-check= does not prevent stack-clash style attacks. For that ++ you want -fstack-clash-protection. */ + enum stack_check_type + { + /* Do not check the stack. */ +diff --git a/gcc/params.def b/gcc/params.def +index 6b07518a34b..ce66e393eb1 100644 +--- a/gcc/params.def ++++ b/gcc/params.def +@@ -213,6 +213,16 @@ DEFPARAM(PARAM_STACK_FRAME_GROWTH, + "Maximal stack frame growth due to inlining (in percent).", + 1000, 0, 0) + ++DEFPARAM(PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE, ++ "stack-clash-protection-guard-size", ++ "Size of the stack guard expressed as a power of two.", ++ 12, 12, 30) ++ ++DEFPARAM(PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL, ++ "stack-clash-protection-probe-interval", ++ "Interval in which to probe the stack expressed as a power of two.", ++ 12, 10, 16) ++ + /* The GCSE optimization will be disabled if it would require + significantly more memory than this value. */ + DEFPARAM(PARAM_MAX_GCSE_MEMORY, +diff --git a/gcc/testsuite/gcc.dg/stack-check-2.c b/gcc/testsuite/gcc.dg/stack-check-2.c +new file mode 100644 +index 00000000000..196c4bbfbdd +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-2.c +@@ -0,0 +1,66 @@ ++/* The goal here is to ensure that we never consider a call to a noreturn ++ function as a potential tail call. ++ ++ Right now GCC discovers potential tail calls by looking at the ++ predecessors of the exit block. A call to a non-return function ++ has no successors and thus can never match that first filter. ++ ++ But that could change one day and we want to catch it. The problem ++ is the compiler could potentially optimize a tail call to a nonreturn ++ function, even if the caller has a frame. That breaks the assumption ++ that calls probe *sp when saving the return address that some targets ++ depend on to elide stack probes. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-tree-tailc -fdump-tree-optimized" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++extern void foo (void) __attribute__ ((__noreturn__)); ++ ++ ++void ++test_direct_1 (void) ++{ ++ foo (); ++} ++ ++void ++test_direct_2 (void) ++{ ++ return foo (); ++} ++ ++void (*indirect)(void)__attribute__ ((noreturn)); ++ ++ ++void ++test_indirect_1 () ++{ ++ (*indirect)(); ++} ++ ++void ++test_indirect_2 (void) ++{ ++ return (*indirect)();; ++} ++ ++ ++typedef void (*pvfn)() __attribute__ ((noreturn)); ++ ++void (*indirect_casted)(void); ++ ++void ++test_indirect_casted_1 () ++{ ++ (*(pvfn)indirect_casted)(); ++} ++ ++void ++test_indirect_casted_2 (void) ++{ ++ return (*(pvfn)indirect_casted)(); ++} ++/* { dg-final { scan-tree-dump-not "tail call" "tailc" } } */ ++/* { dg-final { scan-tree-dump-not "tail call" "optimized" } } */ ++ +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index 57caec74836..1000f15358b 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -8371,3 +8371,80 @@ proc check_effective_target_arm_coproc4_ok { } { + return [check_cached_effective_target arm_coproc4_ok \ + check_effective_target_arm_coproc4_ok_nocache] + } ++ ++# Return 1 if the target has support for stack probing designed ++# to avoid stack-clash style attacks. ++# ++# This is used to restrict the stack-clash mitigation tests to ++# just those targets that have been explicitly supported. ++# ++# In addition to the prologue work on those targets, each target's ++# properties should be described in the functions below so that ++# tests do not become a mess of unreadable target conditions. ++# ++proc check_effective_target_supports_stack_clash_protection { } { ++ ++ # Temporary until the target bits are fully ACK'd. ++# if { [istarget aarch*-*-*] || [istarget x86_64-*-*] ++# || [istarget i?86-*-*] || [istarget s390*-*-*] ++# || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] } { ++# return 1 ++# } ++ return 0 ++} ++ ++# Return 1 if the target creates a frame pointer for non-leaf functions ++# Note we ignore cases where we apply tail call optimization here. ++proc check_effective_target_frame_pointer_for_non_leaf { } { ++ if { [istarget aarch*-*-*] } { ++ return 1 ++ } ++ return 0 ++} ++ ++# Return 1 if the target's calling sequence or its ABI ++# create implicit stack probes at or prior to function entry. ++proc check_effective_target_caller_implicit_probes { } { ++ ++ # On x86/x86_64 the call instruction itself pushes the return ++ # address onto the stack. That is an implicit probe of *sp. ++ if { [istarget x86_64-*-*] || [istarget i?86-*-*] } { ++ return 1 ++ } ++ ++ # On PPC, the ABI mandates that the address of the outer ++ # frame be stored at *sp. Thus each allocation of stack ++ # space is itself an implicit probe of *sp. ++ if { [istarget powerpc*-*-*] || [istarget rs6000*-*-*] } { ++ return 1 ++ } ++ ++ # s390's ABI has a register save area allocated by the ++ # caller for use by the callee. The mere existence does ++ # not constitute a probe by the caller, but when the slots ++ # used by the callee those stores are implicit probes. ++ if { [istarget s390*-*-*] } { ++ return 1 ++ } ++ ++ # Not strictly true on aarch64, but we have agreed that we will ++ # consider any function that pushes SP more than 3kbytes into ++ # the guard page as broken. This essentially means that we can ++ # consider the aarch64 as having a caller implicit probe at ++ # *(sp + 1k). ++ if { [istarget aarch64*-*-*] } { ++ return 1; ++ } ++ ++ return 0 ++} ++ ++# Targets that potentially realign the stack pointer often cause residual ++# stack allocations and make it difficult to elimination loops or residual ++# allocations for dynamic stack allocations ++proc check_effective_target_callee_realigns_stack { } { ++ if { [istarget x86_64-*-*] || [istarget i?86-*-*] } { ++ return 1 ++ } ++ return 0 ++} +diff --git a/gcc/toplev.c b/gcc/toplev.c +index e7a5d487313..a7da7964fbb 100644 +--- a/gcc/toplev.c ++++ b/gcc/toplev.c +@@ -1574,6 +1574,26 @@ process_options (void) + flag_associative_math = 0; + } + ++ /* -fstack-clash-protection is not currently supported on targets ++ where the stack grows up. */ ++ if (flag_stack_clash_protection && !STACK_GROWS_DOWNWARD) ++ { ++ warning_at (UNKNOWN_LOCATION, 0, ++ "%<-fstack-clash-protection%> is not supported on targets " ++ "where the stack grows from lower to higher addresses"); ++ flag_stack_clash_protection = 0; ++ } ++ ++ /* We can not support -fstack-check= and -fstack-clash-protection at ++ the same time. */ ++ if (flag_stack_check != NO_STACK_CHECK && flag_stack_clash_protection) ++ { ++ warning_at (UNKNOWN_LOCATION, 0, ++ "%<-fstack-check=%> and %<-fstack-clash_protection%> are " ++ "mutually exclusive. Disabling %<-fstack-check=%>"); ++ flag_stack_check = NO_STACK_CHECK; ++ } ++ + /* With -fcx-limited-range, we do cheap and quick complex arithmetic. */ + if (flag_cx_limited_range) + flag_complex_method = 0; diff --git a/gcc7-rh1512529-10.patch b/gcc7-rh1512529-10.patch new file mode 100644 index 0000000..26ccbf4 --- /dev/null +++ b/gcc7-rh1512529-10.patch @@ -0,0 +1,605 @@ +commit b552ddab57a34fe1acb8d714b5f556082600b9e0 +Author: law +Date: Mon Sep 25 23:13:55 2017 +0000 + + * config/rs6000/rs6000-protos.h (output_probe_stack_range): Update + prototype for new argument. + * config/rs6000/rs6000.c (rs6000_emit_allocate_stack_1): New function, + mostly extracted from rs6000_emit_allocate_stack. + (rs6000_emit_probe_stack_range_stack_clash): New function. + (rs6000_emit_allocate_stack): Call + rs6000_emit_probe_stack_range_stack_clash as needed. + (rs6000_emit_probe_stack_range): Add additional argument + to call to gen_probe_stack_range{si,di}. + (output_probe_stack_range): New. + (output_probe_stack_range_1): Renamed from output_probe_stack_range. + (output_probe_stack_range_stack_clash): New. + (rs6000_emit_prologue): Emit notes into dump file as requested. + * rs6000.md (allocate_stack): Handle -fstack-clash-protection. + (probe_stack_range): Operand 0 is now early-clobbered. + Add additional operand and pass it to output_probe_stack_range. + + * lib/target-supports.exp + (check_effective_target_supports_stack_clash_protection): Enable for + rs6000 and powerpc targets. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@253179 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h +index 74ad733d1b9..d48aa88f4b1 100644 +--- a/gcc/config/rs6000/rs6000-protos.h ++++ b/gcc/config/rs6000/rs6000-protos.h +@@ -134,7 +134,7 @@ extern void rs6000_emit_sCOND (machine_mode, rtx[]); + extern void rs6000_emit_cbranch (machine_mode, rtx[]); + extern char * output_cbranch (rtx, const char *, int, rtx_insn *); + extern char * output_e500_flip_gt_bit (rtx, rtx); +-extern const char * output_probe_stack_range (rtx, rtx); ++extern const char * output_probe_stack_range (rtx, rtx, rtx); + extern bool rs6000_emit_set_const (rtx, rtx); + extern int rs6000_emit_cmove (rtx, rtx, rtx, rtx); + extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx); +diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c +index 1836e1d147d..8235bff2469 100644 +--- a/gcc/config/rs6000/rs6000.c ++++ b/gcc/config/rs6000/rs6000.c +@@ -28133,6 +28133,221 @@ rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed) + emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p))); + } + ++/* Allocate SIZE_INT bytes on the stack using a store with update style insn ++ and set the appropriate attributes for the generated insn. Return the ++ first insn which adjusts the stack pointer or the last insn before ++ the stack adjustment loop. ++ ++ SIZE_INT is used to create the CFI note for the allocation. ++ ++ SIZE_RTX is an rtx containing the size of the adjustment. Note that ++ since stacks grow to lower addresses its runtime value is -SIZE_INT. ++ ++ ORIG_SP contains the backchain value that must be stored at *sp. */ ++ ++static rtx_insn * ++rs6000_emit_allocate_stack_1 (HOST_WIDE_INT size_int, rtx orig_sp) ++{ ++ rtx_insn *insn; ++ ++ rtx size_rtx = GEN_INT (-size_int); ++ if (size_int > 32767) ++ { ++ rtx tmp_reg = gen_rtx_REG (Pmode, 0); ++ /* Need a note here so that try_split doesn't get confused. */ ++ if (get_last_insn () == NULL_RTX) ++ emit_note (NOTE_INSN_DELETED); ++ insn = emit_move_insn (tmp_reg, size_rtx); ++ try_split (PATTERN (insn), insn, 0); ++ size_rtx = tmp_reg; ++ } ++ ++ if (Pmode == SImode) ++ insn = emit_insn (gen_movsi_update_stack (stack_pointer_rtx, ++ stack_pointer_rtx, ++ size_rtx, ++ orig_sp)); ++ else ++ insn = emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx, ++ stack_pointer_rtx, ++ size_rtx, ++ orig_sp)); ++ rtx par = PATTERN (insn); ++ gcc_assert (GET_CODE (par) == PARALLEL); ++ rtx set = XVECEXP (par, 0, 0); ++ gcc_assert (GET_CODE (set) == SET); ++ rtx mem = SET_DEST (set); ++ gcc_assert (MEM_P (mem)); ++ MEM_NOTRAP_P (mem) = 1; ++ set_mem_alias_set (mem, get_frame_alias_set ()); ++ ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, ++ gen_rtx_SET (stack_pointer_rtx, ++ gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, ++ GEN_INT (-size_int)))); ++ ++ /* Emit a blockage to ensure the allocation/probing insns are ++ not optimized, combined, removed, etc. Add REG_STACK_CHECK ++ note for similar reasons. */ ++ if (flag_stack_clash_protection) ++ { ++ add_reg_note (insn, REG_STACK_CHECK, const0_rtx); ++ emit_insn (gen_blockage ()); ++ } ++ ++ return insn; ++} ++ ++static HOST_WIDE_INT ++get_stack_clash_protection_probe_interval (void) ++{ ++ return (HOST_WIDE_INT_1U ++ << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL)); ++} ++ ++static HOST_WIDE_INT ++get_stack_clash_protection_guard_size (void) ++{ ++ return (HOST_WIDE_INT_1U ++ << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE)); ++} ++ ++/* Allocate ORIG_SIZE bytes on the stack and probe the newly ++ allocated space every STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes. ++ ++ COPY_REG, if non-null, should contain a copy of the original ++ stack pointer at exit from this function. ++ ++ This is subtly different than the Ada probing in that it tries hard to ++ prevent attacks that jump the stack guard. Thus it is never allowed to ++ allocate more than STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes of stack ++ space without a suitable probe. */ ++static rtx_insn * ++rs6000_emit_probe_stack_range_stack_clash (HOST_WIDE_INT orig_size, ++ rtx copy_reg) ++{ ++ rtx orig_sp = copy_reg; ++ ++ HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval (); ++ ++ /* Round the size down to a multiple of PROBE_INTERVAL. */ ++ HOST_WIDE_INT rounded_size = ROUND_DOWN (orig_size, probe_interval); ++ ++ /* If explicitly requested, ++ or the rounded size is not the same as the original size ++ or the the rounded size is greater than a page, ++ then we will need a copy of the original stack pointer. */ ++ if (rounded_size != orig_size ++ || rounded_size > probe_interval ++ || copy_reg) ++ { ++ /* If the caller did not request a copy of the incoming stack ++ pointer, then we use r0 to hold the copy. */ ++ if (!copy_reg) ++ orig_sp = gen_rtx_REG (Pmode, 0); ++ emit_move_insn (orig_sp, stack_pointer_rtx); ++ } ++ ++ /* There's three cases here. ++ ++ One is a single probe which is the most common and most efficiently ++ implemented as it does not have to have a copy of the original ++ stack pointer if there are no residuals. ++ ++ Second is unrolled allocation/probes which we use if there's just ++ a few of them. It needs to save the original stack pointer into a ++ temporary for use as a source register in the allocation/probe. ++ ++ Last is a loop. This is the most uncommon case and least efficient. */ ++ rtx_insn *retval = NULL; ++ if (rounded_size == probe_interval) ++ { ++ retval = rs6000_emit_allocate_stack_1 (probe_interval, stack_pointer_rtx); ++ ++ dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size); ++ } ++ else if (rounded_size <= 8 * probe_interval) ++ { ++ /* The ABI requires using the store with update insns to allocate ++ space and store the backchain into the stack ++ ++ So we save the current stack pointer into a temporary, then ++ emit the store-with-update insns to store the saved stack pointer ++ into the right location in each new page. */ ++ for (int i = 0; i < rounded_size; i += probe_interval) ++ { ++ rtx_insn *insn ++ = rs6000_emit_allocate_stack_1 (probe_interval, orig_sp); ++ ++ /* Save the first stack adjustment in RETVAL. */ ++ if (i == 0) ++ retval = insn; ++ } ++ ++ dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size); ++ } ++ else ++ { ++ /* Compute the ending address. */ ++ rtx end_addr ++ = copy_reg ? gen_rtx_REG (Pmode, 0) : gen_rtx_REG (Pmode, 12); ++ rtx rs = GEN_INT (-rounded_size); ++ rtx_insn *insn; ++ if (add_operand (rs, Pmode)) ++ insn = emit_insn (gen_add3_insn (end_addr, stack_pointer_rtx, rs)); ++ else ++ { ++ emit_move_insn (end_addr, GEN_INT (-rounded_size)); ++ insn = emit_insn (gen_add3_insn (end_addr, end_addr, ++ stack_pointer_rtx)); ++ /* Describe the effect of INSN to the CFI engine. */ ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, ++ gen_rtx_SET (end_addr, ++ gen_rtx_PLUS (Pmode, stack_pointer_rtx, ++ rs))); ++ } ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ /* Emit the loop. */ ++ if (TARGET_64BIT) ++ retval = emit_insn (gen_probe_stack_rangedi (stack_pointer_rtx, ++ stack_pointer_rtx, orig_sp, ++ end_addr)); ++ else ++ retval = emit_insn (gen_probe_stack_rangesi (stack_pointer_rtx, ++ stack_pointer_rtx, orig_sp, ++ end_addr)); ++ RTX_FRAME_RELATED_P (retval) = 1; ++ /* Describe the effect of INSN to the CFI engine. */ ++ add_reg_note (retval, REG_FRAME_RELATED_EXPR, ++ gen_rtx_SET (stack_pointer_rtx, end_addr)); ++ ++ /* Emit a blockage to ensure the allocation/probing insns are ++ not optimized, combined, removed, etc. Other cases handle this ++ within their call to rs6000_emit_allocate_stack_1. */ ++ emit_insn (gen_blockage ()); ++ ++ dump_stack_clash_frame_info (PROBE_LOOP, rounded_size != orig_size); ++ } ++ ++ if (orig_size != rounded_size) ++ { ++ /* Allocate (and implicitly probe) any residual space. */ ++ HOST_WIDE_INT residual = orig_size - rounded_size; ++ ++ rtx_insn *insn = rs6000_emit_allocate_stack_1 (residual, orig_sp); ++ ++ /* If the residual was the only allocation, then we can return the ++ allocating insn. */ ++ if (!retval) ++ retval = insn; ++ } ++ ++ return retval; ++} ++ + /* Emit the correct code for allocating stack space, as insns. + If COPY_REG, make sure a copy of the old frame is left there. + The generated code may use hard register 0 as a temporary. */ +@@ -28144,7 +28359,6 @@ rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) + rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); + rtx tmp_reg = gen_rtx_REG (Pmode, 0); + rtx todec = gen_int_mode (-size, Pmode); +- rtx par, set, mem; + + if (INTVAL (todec) != -size) + { +@@ -28184,6 +28398,23 @@ rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) + warning (0, "stack limit expression is not supported"); + } + ++ if (flag_stack_clash_protection) ++ { ++ if (size < get_stack_clash_protection_guard_size ()) ++ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); ++ else ++ { ++ rtx_insn *insn = rs6000_emit_probe_stack_range_stack_clash (size, ++ copy_reg); ++ ++ /* If we asked for a copy with an offset, then we still need add in ++ the offset. */ ++ if (copy_reg && copy_off) ++ emit_insn (gen_add3_insn (copy_reg, copy_reg, GEN_INT (copy_off))); ++ return insn; ++ } ++ } ++ + if (copy_reg) + { + if (copy_off != 0) +@@ -28192,38 +28423,11 @@ rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) + emit_move_insn (copy_reg, stack_reg); + } + +- if (size > 32767) +- { +- /* Need a note here so that try_split doesn't get confused. */ +- if (get_last_insn () == NULL_RTX) +- emit_note (NOTE_INSN_DELETED); +- insn = emit_move_insn (tmp_reg, todec); +- try_split (PATTERN (insn), insn, 0); +- todec = tmp_reg; +- } +- +- insn = emit_insn (TARGET_32BIT +- ? gen_movsi_update_stack (stack_reg, stack_reg, +- todec, stack_reg) +- : gen_movdi_di_update_stack (stack_reg, stack_reg, +- todec, stack_reg)); + /* Since we didn't use gen_frame_mem to generate the MEM, grab + it now and set the alias set/attributes. The above gen_*_update + calls will generate a PARALLEL with the MEM set being the first + operation. */ +- par = PATTERN (insn); +- gcc_assert (GET_CODE (par) == PARALLEL); +- set = XVECEXP (par, 0, 0); +- gcc_assert (GET_CODE (set) == SET); +- mem = SET_DEST (set); +- gcc_assert (MEM_P (mem)); +- MEM_NOTRAP_P (mem) = 1; +- set_mem_alias_set (mem, get_frame_alias_set ()); +- +- RTX_FRAME_RELATED_P (insn) = 1; +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, +- gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg, +- GEN_INT (-size)))); ++ insn = rs6000_emit_allocate_stack_1 (size, stack_reg); + return insn; + } + +@@ -28305,9 +28509,9 @@ rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) + until it is equal to ROUNDED_SIZE. */ + + if (TARGET_64BIT) +- emit_insn (gen_probe_stack_rangedi (r12, r12, r0)); ++ emit_insn (gen_probe_stack_rangedi (r12, r12, stack_pointer_rtx, r0)); + else +- emit_insn (gen_probe_stack_rangesi (r12, r12, r0)); ++ emit_insn (gen_probe_stack_rangesi (r12, r12, stack_pointer_rtx, r0)); + + + /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time +@@ -28319,10 +28523,10 @@ rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) + } + + /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are +- absolute addresses. */ ++ addresses, not offsets. */ + +-const char * +-output_probe_stack_range (rtx reg1, rtx reg2) ++static const char * ++output_probe_stack_range_1 (rtx reg1, rtx reg2) + { + static int labelno = 0; + char loop_lab[32]; +@@ -28357,6 +28561,95 @@ output_probe_stack_range (rtx reg1, rtx reg2) + return ""; + } + ++/* This function is called when rs6000_frame_related is processing ++ SETs within a PARALLEL, and returns whether the REGNO save ought to ++ be marked RTX_FRAME_RELATED_P. The PARALLELs involved are those ++ for out-of-line register save functions, store multiple, and the ++ Darwin world_save. They may contain registers that don't really ++ need saving. */ ++ ++static bool ++interesting_frame_related_regno (unsigned int regno) ++{ ++ /* Saves apparently of r0 are actually saving LR. It doesn't make ++ sense to substitute the regno here to test save_reg_p (LR_REGNO). ++ We *know* LR needs saving, and dwarf2cfi.c is able to deduce that ++ (set (mem) (r0)) is saving LR from a prior (set (r0) (lr)) marked ++ as frame related. */ ++ if (regno == 0) ++ return true; ++ /* If we see CR2 then we are here on a Darwin world save. Saves of ++ CR2 signify the whole CR is being saved. This is a long-standing ++ ABI wart fixed by ELFv2. As for r0/lr there is no need to check ++ that CR needs to be saved. */ ++ if (regno == CR2_REGNO) ++ return true; ++ /* Omit frame info for any user-defined global regs. If frame info ++ is supplied for them, frame unwinding will restore a user reg. ++ Also omit frame info for any reg we don't need to save, as that ++ bloats frame info and can cause problems with shrink wrapping. ++ Since global regs won't be seen as needing to be saved, both of ++ these conditions are covered by save_reg_p. */ ++ return save_reg_p (regno); ++} ++ ++/* Probe a range of stack addresses from REG1 to REG3 inclusive. These are ++ addresses, not offsets. ++ ++ REG2 contains the backchain that must be stored into *sp at each allocation. ++ ++ This is subtly different than the Ada probing above in that it tries hard ++ to prevent attacks that jump the stack guard. Thus, it is never allowed ++ to allocate more than PROBE_INTERVAL bytes of stack space without a ++ suitable probe. */ ++ ++static const char * ++output_probe_stack_range_stack_clash (rtx reg1, rtx reg2, rtx reg3) ++{ ++ static int labelno = 0; ++ char loop_lab[32]; ++ rtx xops[3]; ++ ++ HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval (); ++ ++ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); ++ ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); ++ ++ /* This allocates and probes. */ ++ xops[0] = reg1; ++ xops[1] = reg2; ++ xops[2] = GEN_INT (-probe_interval); ++ if (TARGET_64BIT) ++ output_asm_insn ("stdu %1,%2(%0)", xops); ++ else ++ output_asm_insn ("stwu %1,%2(%0)", xops); ++ ++ /* Jump to LOOP_LAB if TEST_ADDR != LAST_ADDR. */ ++ xops[0] = reg1; ++ xops[1] = reg3; ++ if (TARGET_64BIT) ++ output_asm_insn ("cmpd 0,%0,%1", xops); ++ else ++ output_asm_insn ("cmpw 0,%0,%1", xops); ++ ++ fputs ("\tbne 0,", asm_out_file); ++ assemble_name_raw (asm_out_file, loop_lab); ++ fputc ('\n', asm_out_file); ++ ++ return ""; ++} ++ ++/* Wrapper around the output_probe_stack_range routines. */ ++const char * ++output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3) ++{ ++ if (flag_stack_clash_protection) ++ return output_probe_stack_range_stack_clash (reg1, reg2, reg3); ++ else ++ return output_probe_stack_range_1 (reg1, reg3); ++} ++ + /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced + with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2 + is not NULL. It would be nice if dwarf2out_frame_debug_expr could +@@ -29951,6 +30244,13 @@ rs6000_emit_prologue (void) + } + } + ++ /* If we are emitting stack probes, but allocate no stack, then ++ just note that in the dump file. */ ++ if (flag_stack_clash_protection ++ && dump_file ++ && !info->push_p) ++ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); ++ + /* Update stack and set back pointer unless this is V.4, + for which it was done previously. */ + if (!WORLD_SAVE_P (info) && info->push_p +diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md +index 3323976a35d..843148e9703 100644 +--- a/gcc/config/rs6000/rs6000.md ++++ b/gcc/config/rs6000/rs6000.md +@@ -10386,10 +10386,20 @@ + ;; + ;; First, an insn to allocate new stack space for dynamic use (e.g., alloca). + ;; We move the back-chain and decrement the stack pointer. +- ++;; ++;; Operand1 is more naturally reg_or_short_operand. However, for a large ++;; constant alloca, using that predicate will force the generic code to put ++;; the constant size into a register before calling the expander. ++;; ++;; As a result the expander would not have the constant size information ++;; in those cases and would have to generate less efficient code. ++;; ++;; Thus we allow reg_or_cint_operand instead so that the expander can see ++;; the constant size. The value is forced into a register if necessary. ++;; + (define_expand "allocate_stack" + [(set (match_operand 0 "gpc_reg_operand" "") +- (minus (reg 1) (match_operand 1 "reg_or_short_operand" ""))) ++ (minus (reg 1) (match_operand 1 "reg_or_cint_operand" ""))) + (set (reg 1) + (minus (reg 1) (match_dup 1)))] + "" +@@ -10399,6 +10409,15 @@ + rtx neg_op0; + rtx insn, par, set, mem; + ++ /* By allowing reg_or_cint_operand as the predicate we can get ++ better code for stack-clash-protection because we do not lose ++ size information. But the rest of the code expects the operand ++ to be reg_or_short_operand. If it isn't, then force it into ++ a register. */ ++ rtx orig_op1 = operands[1]; ++ if (!reg_or_short_operand (operands[1], Pmode)) ++ operands[1] = force_reg (Pmode, operands[1]); ++ + emit_move_insn (chain, stack_bot); + + /* Check stack bounds if necessary. */ +@@ -10411,6 +10430,51 @@ + emit_insn (gen_cond_trap (LTU, available, operands[1], const0_rtx)); + } + ++ /* Allocate and probe if requested. ++ This may look similar to the loop we use for prologue allocations, ++ but it is critically different. For the former we know the loop ++ will iterate, but do not know that generally here. The former ++ uses that knowledge to rotate the loop. Combining them would be ++ possible with some performance cost. */ ++ if (flag_stack_clash_protection) ++ { ++ rtx rounded_size, last_addr, residual; ++ HOST_WIDE_INT probe_interval; ++ compute_stack_clash_protection_loop_data (&rounded_size, &last_addr, ++ &residual, &probe_interval, ++ orig_op1); ++ ++ /* We do occasionally get in here with constant sizes, we might ++ as well do a reasonable job when we obviously can. */ ++ if (rounded_size != const0_rtx) ++ { ++ rtx loop_lab, end_loop; ++ bool rotated = CONST_INT_P (rounded_size); ++ ++ emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop, ++ last_addr, rotated); ++ ++ if (Pmode == SImode) ++ emit_insn (gen_movsi_update_stack (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-probe_interval), ++ chain)); ++ else ++ emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-probe_interval), ++ chain)); ++ emit_stack_clash_protection_probe_loop_end (loop_lab, end_loop, ++ last_addr, rotated); ++ } ++ ++ /* Now handle residuals. We just have to set operands[1] correctly ++ and let the rest of the expander run. */ ++ operands[1] = residual; ++ if (!CONST_INT_P (residual)) ++ operands[1] = force_reg (Pmode, operands[1]); ++ } ++ + if (GET_CODE (operands[1]) != CONST_INT + || INTVAL (operands[1]) < -32767 + || INTVAL (operands[1]) > 32768) +@@ -11549,12 +11613,13 @@ + (set_attr "length" "4")]) + + (define_insn "probe_stack_range" +- [(set (match_operand:P 0 "register_operand" "=r") ++ [(set (match_operand:P 0 "register_operand" "=&r") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0") +- (match_operand:P 2 "register_operand" "r")] ++ (match_operand:P 2 "register_operand" "r") ++ (match_operand:P 3 "register_operand" "r")] + UNSPECV_PROBE_STACK_RANGE))] + "" +- "* return output_probe_stack_range (operands[0], operands[2]);" ++ "* return output_probe_stack_range (operands[0], operands[2], operands[3]);" + [(set_attr "type" "three")]) + + ;; Compare insns are next. Note that the RS/6000 has two types of compares, +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index cb58a2be35f..02eb2066393 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -8385,12 +8385,12 @@ proc check_effective_target_arm_coproc4_ok { } { + proc check_effective_target_supports_stack_clash_protection { } { + + # Temporary until the target bits are fully ACK'd. +-# if { [istarget aarch*-*-*] +-# || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] } { ++# if { [istarget aarch*-*-*] } { + # return 1 + # } + + if { [istarget x86_64-*-*] || [istarget i?86-*-*] ++ || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] + || [istarget s390*-*-*] } { + return 1 + } diff --git a/gcc7-rh1512529-11.patch b/gcc7-rh1512529-11.patch new file mode 100644 index 0000000..4d9c746 --- /dev/null +++ b/gcc7-rh1512529-11.patch @@ -0,0 +1,28 @@ +commit eecbe7ada9a8ab913e779a27c0e90ec736eedc78 +Author: law +Date: Fri Nov 3 17:14:23 2017 +0000 + + * config/i386/i386.c (ix86_expand_prologue): Tighten assert + for int_registers_saved. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@254386 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index 086848b2c73..fd74f1f6c0c 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -14172,8 +14172,12 @@ ix86_expand_prologue (void) + && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK + || flag_stack_clash_protection)) + { +- /* We expect the registers to be saved when probes are used. */ +- gcc_assert (int_registers_saved); ++ /* This assert wants to verify that integer registers were saved ++ prior to probing. This is necessary when probing may be implemented ++ as a function call (Windows). It is not necessary for stack clash ++ protection probing. */ ++ if (!flag_stack_clash_protection) ++ gcc_assert (int_registers_saved); + + if (flag_stack_clash_protection) + { diff --git a/gcc7-rh1512529-12.patch b/gcc7-rh1512529-12.patch new file mode 100644 index 0000000..3f2786e --- /dev/null +++ b/gcc7-rh1512529-12.patch @@ -0,0 +1,81 @@ +commit 75179275ecc35724a058676199188e0d13e65054 +Author: law +Date: Fri Nov 3 20:30:53 2017 +0000 + + * config/i386/i386.c (ix86_emit_restore_reg_using_pop): Prototype. + (ix86_adjust_stack_and_probe_stack_clash): Use a push/pop sequence + to probe at the start of a noreturn function. + + * gcc.target/i386/stack-check-12.c: New test. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@254396 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index fd74f1f6c0c..3c828ba492d 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -96,6 +96,7 @@ static rtx legitimize_dllimport_symbol (rtx, bool); + static rtx legitimize_pe_coff_extern_decl (rtx, bool); + static rtx legitimize_pe_coff_symbol (rtx, bool); + static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool); ++static void ix86_emit_restore_reg_using_pop (rtx); + + #ifndef CHECK_STACK_LIMIT + #define CHECK_STACK_LIMIT (-1) +@@ -13287,10 +13288,13 @@ ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size) + no probes are needed. */ + if (!size) + { ++ struct ix86_frame frame; ++ ix86_compute_frame_layout (&frame); ++ + /* However, the allocation of space via pushes for register + saves could be viewed as allocating space, but without the + need to probe. */ +- if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed) ++ if (frame.nregs || frame.nsseregs || frame_pointer_needed) + dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); + else + dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); +@@ -13312,8 +13316,14 @@ ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size) + we just probe when we cross PROBE_INTERVAL. */ + if (TREE_THIS_VOLATILE (cfun->decl)) + { +- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, +- -GET_MODE_SIZE (word_mode))); ++ /* We can safely use any register here since we're just going to push ++ its value and immediately pop it back. But we do try and avoid ++ argument passing registers so as not to introduce dependencies in ++ the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */ ++ rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG); ++ rtx_insn *insn = emit_insn (gen_push (dummy_reg)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ix86_emit_restore_reg_using_pop (dummy_reg); + emit_insn (gen_blockage ()); + } + +diff --git a/gcc/testsuite/gcc.target/i386/stack-check-12.c b/gcc/testsuite/gcc.target/i386/stack-check-12.c +new file mode 100644 +index 00000000000..cb69bb08086 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/stack-check-12.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -mtune=generic" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++__attribute__ ((noreturn)) void exit (int); ++ ++__attribute__ ((noreturn)) void ++f (void) ++{ ++ asm volatile ("nop" ::: "edi"); ++ exit (1); ++} ++ ++/* { dg-final { scan-assembler-not "or\[ql\]" } } */ ++/* { dg-final { scan-assembler "pushl %esi" { target ia32 } } } */ ++/* { dg-final { scan-assembler "popl %esi" { target ia32 } } }*/ ++/* { dg-final { scan-assembler "pushq %rax" { target { ! ia32 } } } } */ ++/* { dg-final { scan-assembler "popq %rax" { target { ! ia32 } } } }*/ ++ diff --git a/gcc7-rh1512529-13.patch b/gcc7-rh1512529-13.patch new file mode 100644 index 0000000..8221622 --- /dev/null +++ b/gcc7-rh1512529-13.patch @@ -0,0 +1,228 @@ +commit 2f97a7844f2211bbd4fd869c18e26801bb795aa9 +Author: law +Date: Mon Nov 6 15:51:16 2017 +0000 + + PR target/82788 + * config/i386/i386.c (PROBE_INTERVAL): Remove. + (get_probe_interval): New functions. + (ix86_adjust_stack_and_probe_stack_clash): Use get_probe_interval. + (ix86_adjust_stack_and_probe): Likewise. + (output_adjust_stack_and_probe): Likewise. + (ix86_emit_probe_stack_range): Likewise. + (ix86_expand_prologue): Likewise. + + PR target/82788 + * gcc.dg/pr82788.c: New test. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@254456 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index acd192a98c5..a904afe828d 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -13270,7 +13270,17 @@ release_scratch_register_on_entry (struct scratch_reg *sr) + } + } + +-#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) ++/* Return the probing interval for -fstack-clash-protection. */ ++ ++static HOST_WIDE_INT ++get_probe_interval (void) ++{ ++ if (flag_stack_clash_protection) ++ return (HOST_WIDE_INT_1U ++ << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL)); ++ else ++ return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP); ++} + + /* Emit code to adjust the stack pointer by SIZE bytes while probing it. + +@@ -13339,8 +13349,7 @@ ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size) + /* We're allocating a large enough stack frame that we need to + emit probes. Either emit them inline or in a loop depending + on the size. */ +- HOST_WIDE_INT probe_interval +- = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL); ++ HOST_WIDE_INT probe_interval = get_probe_interval (); + if (size <= 4 * probe_interval) + { + HOST_WIDE_INT i; +@@ -13349,7 +13358,7 @@ ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size) + /* Allocate PROBE_INTERVAL bytes. */ + rtx insn + = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (-PROBE_INTERVAL), -1, ++ GEN_INT (-probe_interval), -1, + m->fs.cfa_reg == stack_pointer_rtx); + add_reg_note (insn, REG_STACK_CHECK, const0_rtx); + +@@ -13442,7 +13451,7 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT size) + that's the easy case. The run-time loop is made up of 9 insns in the + generic case while the compile-time loop is made up of 3+2*(n-1) insns + for n # of intervals. */ +- if (size <= 4 * PROBE_INTERVAL) ++ if (size <= 4 * get_probe_interval ()) + { + HOST_WIDE_INT i, adjust; + bool first_probe = true; +@@ -13451,15 +13460,15 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT size) + values of N from 1 until it exceeds SIZE. If only one probe is + needed, this will not generate any code. Then adjust and probe + to PROBE_INTERVAL + SIZE. */ +- for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) ++ for (i = get_probe_interval (); i < size; i += get_probe_interval ()) + { + if (first_probe) + { +- adjust = 2 * PROBE_INTERVAL + dope; ++ adjust = 2 * get_probe_interval () + dope; + first_probe = false; + } + else +- adjust = PROBE_INTERVAL; ++ adjust = get_probe_interval (); + + emit_insn (gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +@@ -13468,9 +13477,9 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT size) + } + + if (first_probe) +- adjust = size + PROBE_INTERVAL + dope; ++ adjust = size + get_probe_interval () + dope; + else +- adjust = size + PROBE_INTERVAL - i; ++ adjust = size + get_probe_interval () - i; + + emit_insn (gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +@@ -13480,7 +13489,8 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT size) + /* Adjust back to account for the additional first interval. */ + last = emit_insn (gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +- PROBE_INTERVAL + dope))); ++ (get_probe_interval () ++ + dope)))); + } + + /* Otherwise, do the same as above, but in a loop. Note that we must be +@@ -13498,7 +13508,7 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT size) + + /* Step 1: round SIZE to the previous multiple of the interval. */ + +- rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); ++ rounded_size = ROUND_DOWN (size, get_probe_interval ()); + + + /* Step 2: compute initial and final value of the loop counter. */ +@@ -13506,7 +13516,7 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT size) + /* SP = SP_0 + PROBE_INTERVAL. */ + emit_insn (gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +- - (PROBE_INTERVAL + dope)))); ++ - (get_probe_interval () + dope)))); + + /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */ + if (rounded_size <= (HOST_WIDE_INT_1 << 31)) +@@ -13551,7 +13561,8 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT size) + /* Adjust back to account for the additional first interval. */ + last = emit_insn (gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +- PROBE_INTERVAL + dope))); ++ (get_probe_interval () ++ + dope)))); + + release_scratch_register_on_entry (&sr); + } +@@ -13568,7 +13579,7 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT size) + XVECEXP (expr, 0, 1) + = gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, +- PROBE_INTERVAL + dope + size)); ++ get_probe_interval () + dope + size)); + add_reg_note (last, REG_FRAME_RELATED_EXPR, expr); + RTX_FRAME_RELATED_P (last) = 1; + +@@ -13595,7 +13606,7 @@ output_adjust_stack_and_probe (rtx reg) + + /* SP = SP + PROBE_INTERVAL. */ + xops[0] = stack_pointer_rtx; +- xops[1] = GEN_INT (PROBE_INTERVAL); ++ xops[1] = GEN_INT (get_probe_interval ()); + output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); + + /* Probe at SP. */ +@@ -13625,14 +13636,14 @@ ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) + that's the easy case. The run-time loop is made up of 6 insns in the + generic case while the compile-time loop is made up of n insns for n # + of intervals. */ +- if (size <= 6 * PROBE_INTERVAL) ++ if (size <= 6 * get_probe_interval ()) + { + HOST_WIDE_INT i; + + /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until + it exceeds SIZE. If only one probe is needed, this will not + generate any code. Then probe at FIRST + SIZE. */ +- for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) ++ for (i = get_probe_interval (); i < size; i += get_probe_interval ()) + emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, + -(first + i))); + +@@ -13655,7 +13666,7 @@ ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) + + /* Step 1: round SIZE to the previous multiple of the interval. */ + +- rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); ++ rounded_size = ROUND_DOWN (size, get_probe_interval ()); + + + /* Step 2: compute initial and final value of the loop counter. */ +@@ -13716,7 +13727,7 @@ output_probe_stack_range (rtx reg, rtx end) + + /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ + xops[0] = reg; +- xops[1] = GEN_INT (PROBE_INTERVAL); ++ xops[1] = GEN_INT (get_probe_interval ()); + output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); + + /* Probe at TEST_ADDR. */ +@@ -14194,7 +14205,7 @@ ix86_expand_prologue (void) + else if (STACK_CHECK_MOVING_SP) + { + if (!(crtl->is_leaf && !cfun->calls_alloca +- && allocate <= PROBE_INTERVAL)) ++ && allocate <= get_probe_interval ())) + { + ix86_adjust_stack_and_probe (allocate); + allocate = 0; +@@ -14211,7 +14222,7 @@ ix86_expand_prologue (void) + { + if (crtl->is_leaf && !cfun->calls_alloca) + { +- if (size > PROBE_INTERVAL) ++ if (size > get_probe_interval ()) + ix86_emit_probe_stack_range (0, size); + } + else +@@ -14222,7 +14233,7 @@ ix86_expand_prologue (void) + { + if (crtl->is_leaf && !cfun->calls_alloca) + { +- if (size > PROBE_INTERVAL ++ if (size > get_probe_interval () + && size > get_stack_check_protect ()) + ix86_emit_probe_stack_range (get_stack_check_protect (), + size - get_stack_check_protect ()); +diff --git a/gcc/testsuite/gcc.dg/pr82788.c b/gcc/testsuite/gcc.dg/pr82788.c +new file mode 100644 +index 00000000000..a8f628fd7f6 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr82788.c +@@ -0,0 +1,4 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-probe-interval=10 --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++int main() { int a[1442]; return 0;} diff --git a/gcc7-rh1512529-14.patch b/gcc7-rh1512529-14.patch new file mode 100644 index 0000000..b79edf8 --- /dev/null +++ b/gcc7-rh1512529-14.patch @@ -0,0 +1,43 @@ +commit 799fb6fee433891510ab613a024450c926cdb4b3 +Author: law +Date: Wed Nov 15 06:30:31 2017 +0000 + + * explow.c (anti_adjust_stack_and_probe_stack_clash): Avoid probing + the red zone for stack_clash_protection_final_dynamic_probe targets + when the total dynamic stack size is zero bytes. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@254753 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/explow.c b/gcc/explow.c +index 6af6355fe30..53c4a6d7db9 100644 +--- a/gcc/explow.c ++++ b/gcc/explow.c +@@ -1978,6 +1978,13 @@ anti_adjust_stack_and_probe_stack_clash (rtx size) + if (size != CONST0_RTX (Pmode) + && targetm.stack_clash_protection_final_dynamic_probe (residual)) + { ++ /* SIZE could be zero at runtime and in that case *sp could hold ++ live data. Furthermore, we don't want to probe into the red ++ zone. ++ ++ Go ahead and just guard a probe at *sp on SIZE != 0 at runtime ++ if SIZE is not a compile time constant. */ ++ + /* Ideally we would just probe at *sp. However, if SIZE is not + a compile-time constant, but is zero at runtime, then *sp + might hold live data. So probe at *sp if we know that +@@ -1990,9 +1997,12 @@ anti_adjust_stack_and_probe_stack_clash (rtx size) + } + else + { +- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, +- -GET_MODE_SIZE (word_mode))); ++ rtx label = gen_label_rtx (); ++ emit_cmp_and_jump_insns (size, CONST0_RTX (GET_MODE (size)), ++ EQ, NULL_RTX, Pmode, 1, label); ++ emit_stack_probe (stack_pointer_rtx); + emit_insn (gen_blockage ()); ++ emit_label (label); + } + } + } diff --git a/gcc7-rh1512529-15.patch b/gcc7-rh1512529-15.patch new file mode 100644 index 0000000..90f7afd --- /dev/null +++ b/gcc7-rh1512529-15.patch @@ -0,0 +1,46 @@ +commit df7f99d5297f3c592401d7312c481578a8a62991 +Author: Jeff Law +Date: Wed Nov 22 11:55:10 2017 -0700 + + * gcc.dg/stack-check-5.c: Skip with -fstack-protector. + * gcc.dg/stack-check-6.c: Likewise. + * gcc.dg/stack-check-6a.c: Likewise. + +diff --git a/gcc/testsuite/gcc.dg/stack-check-5.c b/gcc/testsuite/gcc.dg/stack-check-5.c +index 3178f5d8ce5..dd9f8dbf607 100644 +--- a/gcc/testsuite/gcc.dg/stack-check-5.c ++++ b/gcc/testsuite/gcc.dg/stack-check-5.c +@@ -1,6 +1,8 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ ++/* { dg-skip-if "" { *-*-* } { "-fstack-protector" } { "" } } */ ++ + + + /* Otherwise the S/390 back-end might save the stack pointer in f2 () +diff --git a/gcc/testsuite/gcc.dg/stack-check-6.c b/gcc/testsuite/gcc.dg/stack-check-6.c +index ad2021c9037..83559c5533c 100644 +--- a/gcc/testsuite/gcc.dg/stack-check-6.c ++++ b/gcc/testsuite/gcc.dg/stack-check-6.c +@@ -1,6 +1,8 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ ++/* { dg-skip-if "" { *-*-* } { "-fstack-protector" } { "" } } */ ++ + + + extern void foo (char *); +diff --git a/gcc/testsuite/gcc.dg/stack-check-6a.c b/gcc/testsuite/gcc.dg/stack-check-6a.c +index 6f8e7128921..a1a2acbd6bd 100644 +--- a/gcc/testsuite/gcc.dg/stack-check-6a.c ++++ b/gcc/testsuite/gcc.dg/stack-check-6a.c +@@ -4,6 +4,7 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=16" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ ++/* { dg-skip-if "" { *-*-* } { "-fstack-protector" } { "" } } */ + + #include "stack-check-6.c" + diff --git a/gcc7-rh1512529-16.patch b/gcc7-rh1512529-16.patch new file mode 100644 index 0000000..a44227f --- /dev/null +++ b/gcc7-rh1512529-16.patch @@ -0,0 +1,473 @@ +commit dafbd9e0c347cde6d8192f374787f5dbee4aed89 +Author: Jeff Law +Date: Wed Nov 22 12:01:56 2017 -0700 + + aarch64 support + +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index 75c12f1de65..d7100c36664 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -2747,7 +2747,14 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2) + output_asm_insn ("sub\t%0, %0, %1", xops); + + /* Probe at TEST_ADDR. */ +- output_asm_insn ("str\txzr, [%0]", xops); ++ if (flag_stack_clash_protection) ++ { ++ gcc_assert (xops[0] == stack_pointer_rtx); ++ xops[1] = GEN_INT (PROBE_INTERVAL - 8); ++ output_asm_insn ("str\txzr, [%0, %1]", xops); ++ } ++ else ++ output_asm_insn ("str\txzr, [%0]", xops); + + /* Test if TEST_ADDR == LAST_ADDR. */ + xops[1] = reg2; +@@ -3511,6 +3518,125 @@ aarch64_set_handled_components (sbitmap components) + cfun->machine->reg_is_wrapped_separately[regno] = true; + } + ++/* Allocate SIZE bytes of stack space using SCRATCH_REG as a scratch ++ register. */ ++ ++static void ++aarch64_allocate_and_probe_stack_space (int scratchreg, HOST_WIDE_INT size) ++{ ++ HOST_WIDE_INT probe_interval ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL); ++ HOST_WIDE_INT guard_size ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE); ++ HOST_WIDE_INT guard_used_by_caller = 1024; ++ ++ /* SIZE should be large enough to require probing here. ie, it ++ must be larger than GUARD_SIZE - GUARD_USED_BY_CALLER. ++ ++ We can allocate GUARD_SIZE - GUARD_USED_BY_CALLER as a single chunk ++ without any probing. */ ++ gcc_assert (size >= guard_size - guard_used_by_caller); ++ aarch64_sub_sp (scratchreg, guard_size - guard_used_by_caller, true); ++ HOST_WIDE_INT orig_size = size; ++ size -= (guard_size - guard_used_by_caller); ++ ++ HOST_WIDE_INT rounded_size = size & -probe_interval; ++ HOST_WIDE_INT residual = size - rounded_size; ++ ++ /* We can handle a small number of allocations/probes inline. Otherwise ++ punt to a loop. */ ++ if (rounded_size && rounded_size <= 4 * probe_interval) ++ { ++ /* We don't use aarch64_sub_sp here because we don't want to ++ repeatedly load SCRATCHREG. */ ++ rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg); ++ if (probe_interval > ARITH_FACTOR) ++ emit_move_insn (scratch_rtx, GEN_INT (-probe_interval)); ++ else ++ scratch_rtx = GEN_INT (-probe_interval); ++ ++ for (HOST_WIDE_INT i = 0; i < rounded_size; i += probe_interval) ++ { ++ rtx_insn *insn = emit_insn (gen_add2_insn (stack_pointer_rtx, ++ scratch_rtx)); ++ add_reg_note (insn, REG_STACK_CHECK, const0_rtx); ++ ++ if (probe_interval > ARITH_FACTOR) ++ { ++ RTX_FRAME_RELATED_P (insn) = 1; ++ rtx adj = plus_constant (Pmode, stack_pointer_rtx, -probe_interval); ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, ++ gen_rtx_SET (stack_pointer_rtx, adj)); ++ } ++ ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ (probe_interval ++ - GET_MODE_SIZE (word_mode)))); ++ emit_insn (gen_blockage ()); ++ } ++ dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size); ++ } ++ else if (rounded_size) ++ { ++ /* Compute the ending address. */ ++ rtx temp = gen_rtx_REG (word_mode, scratchreg); ++ emit_move_insn (temp, GEN_INT (-rounded_size)); ++ rtx_insn *insn ++ = emit_insn (gen_add3_insn (temp, stack_pointer_rtx, temp)); ++ ++ /* For the initial allocation, we don't have a frame pointer ++ set up, so we always need CFI notes. If we're doing the ++ final allocation, then we may have a frame pointer, in which ++ case it is the CFA, otherwise we need CFI notes. ++ ++ We can determine which allocation we are doing by looking at ++ the temporary register. IP0 is the initial allocation, IP1 ++ is the final allocation. */ ++ if (scratchreg == IP0_REGNUM || !frame_pointer_needed) ++ { ++ /* We want the CFA independent of the stack pointer for the ++ duration of the loop. */ ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ plus_constant (Pmode, temp, ++ (rounded_size + (orig_size - size)))); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ ++ /* This allocates and probes the stack. ++ ++ It also probes at a 4k interval regardless of the value of ++ PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL. */ ++ insn = emit_insn (gen_probe_stack_range (stack_pointer_rtx, ++ stack_pointer_rtx, temp)); ++ ++ /* Now reset the CFA register if needed. */ ++ if (scratchreg == IP0_REGNUM || !frame_pointer_needed) ++ { ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ plus_constant (Pmode, stack_pointer_rtx, ++ (rounded_size + (orig_size - size)))); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ ++ emit_insn (gen_blockage ()); ++ dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size); ++ } ++ else ++ dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size); ++ ++ /* Handle any residuals. ++ Note that any residual must be probed. */ ++ if (residual) ++ { ++ aarch64_sub_sp (scratchreg, residual, true); ++ add_reg_note (get_last_insn (), REG_STACK_CHECK, const0_rtx); ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ (residual - GET_MODE_SIZE (word_mode)))); ++ emit_insn (gen_blockage ()); ++ } ++ return; ++} ++ + /* AArch64 stack frames generated by this compiler look like: + + +-------------------------------+ +@@ -3592,7 +3718,54 @@ aarch64_expand_prologue (void) + aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size); + } + +- aarch64_sub_sp (IP0_REGNUM, initial_adjust, true); ++ /* We do not fully protect aarch64 against stack clash style attacks ++ as doing so would be prohibitively expensive with less utility over ++ time as newer compilers are deployed. ++ ++ We assume the guard is at least 64k. Furthermore, we assume that ++ the caller has not pushed the stack pointer more than 1k into ++ the guard. A caller that pushes the stack pointer than 1k into ++ the guard is considered invalid. ++ ++ Note that the caller's ability to push the stack pointer into the ++ guard is a function of the number and size of outgoing arguments and/or ++ dynamic stack allocations due to the mandatory save of the link register ++ in the caller's frame. ++ ++ With those assumptions the callee can allocate up to 63k of stack ++ space without probing. ++ ++ When probing is needed, we emit a probe at the start of the prologue ++ and every PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes thereafter. ++ ++ We have to track how much space has been allocated, but we do not ++ track stores into the stack as implicit probes except for the ++ fp/lr store. */ ++ HOST_WIDE_INT guard_size ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE); ++ HOST_WIDE_INT guard_used_by_caller = 1024; ++ if (flag_stack_clash_protection) ++ { ++ if (frame_size == 0) ++ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); ++ else if (initial_adjust < guard_size - guard_used_by_caller ++ && final_adjust < guard_size - guard_used_by_caller) ++ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); ++ } ++ ++ /* In theory we should never have both an initial adjustment ++ and a callee save adjustment. Verify that is the case since the ++ code below does not handle it for -fstack-clash-protection. */ ++ gcc_assert (initial_adjust == 0 || callee_adjust == 0); ++ ++ /* Only probe if the initial adjustment is larger than the guard ++ less the amount of the guard reserved for use by the caller's ++ outgoing args. */ ++ if (flag_stack_clash_protection ++ && initial_adjust >= guard_size - guard_used_by_caller) ++ aarch64_allocate_and_probe_stack_space (IP0_REGNUM, initial_adjust); ++ else ++ aarch64_sub_sp (IP0_REGNUM, initial_adjust, true); + + if (callee_adjust != 0) + aarch64_push_regs (reg1, reg2, callee_adjust); +@@ -3613,7 +3786,30 @@ aarch64_expand_prologue (void) + callee_adjust != 0 || frame_pointer_needed); + aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM, + callee_adjust != 0 || frame_pointer_needed); +- aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed); ++ ++ /* We may need to probe the final adjustment as well. */ ++ if (flag_stack_clash_protection && final_adjust != 0) ++ { ++ /* First probe if the final adjustment is larger than the guard size ++ less the amount of the guard reserved for use by the caller's ++ outgoing args. */ ++ if (final_adjust >= guard_size - guard_used_by_caller) ++ aarch64_allocate_and_probe_stack_space (IP1_REGNUM, final_adjust); ++ else ++ aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed); ++ ++ /* We must also probe if the final adjustment is larger than the guard ++ that is assumed used by the caller. This may be sub-optimal. */ ++ if (final_adjust >= guard_used_by_caller) ++ { ++ if (dump_file) ++ fprintf (dump_file, ++ "Stack clash aarch64 large outgoing arg, probing\n"); ++ emit_stack_probe (stack_pointer_rtx); ++ } ++ } ++ else ++ aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed); + } + + /* Return TRUE if we can use a simple_return insn. +@@ -3679,7 +3875,11 @@ aarch64_expand_epilogue (bool for_sibcall) + RTX_FRAME_RELATED_P (insn) = callee_adjust == 0; + } + else +- aarch64_add_sp (IP1_REGNUM, final_adjust, df_regs_ever_live_p (IP1_REGNUM)); ++ aarch64_add_sp (IP1_REGNUM, final_adjust, ++ /* A stack clash protection prologue may not have ++ left IP1_REGNUM in a usable state. */ ++ (flag_stack_clash_protection ++ || df_regs_ever_live_p (IP1_REGNUM))); + + aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM, + callee_adjust != 0, &cfi_ops); +@@ -3702,7 +3902,11 @@ aarch64_expand_epilogue (bool for_sibcall) + cfi_ops = NULL; + } + +- aarch64_add_sp (IP0_REGNUM, initial_adjust, df_regs_ever_live_p (IP0_REGNUM)); ++ /* A stack clash protection prologue may not have left IP0_REGNUM ++ in a usable state. */ ++ aarch64_add_sp (IP0_REGNUM, initial_adjust, ++ (flag_stack_clash_protection ++ || df_regs_ever_live_p (IP0_REGNUM))); + + if (cfi_ops) + { +@@ -8696,6 +8900,12 @@ aarch64_override_options_internal (struct gcc_options *opts) + opts->x_param_values, + global_options_set.x_param_values); + ++ /* We assume the guard page is 64k. */ ++ maybe_set_param_value (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE, ++ 16, ++ opts->x_param_values, ++ global_options_set.x_param_values); ++ + aarch64_override_options_after_change_1 (opts); + } + +@@ -14726,6 +14936,28 @@ aarch64_sched_can_speculate_insn (rtx_insn *insn) + } + } + ++/* It has been decided that to allow up to 1kb of outgoing argument ++ space to be allocated w/o probing. If more than 1kb of outgoing ++ argment space is allocated, then it must be probed and the last ++ probe must occur no more than 1kbyte away from the end of the ++ allocated space. ++ ++ This implies that the residual part of an alloca allocation may ++ need probing in cases where the generic code might not otherwise ++ think a probe is needed. ++ ++ This target hook returns TRUE when allocating RESIDUAL bytes of ++ alloca space requires an additional probe, otherwise FALSE is ++ returned. */ ++ ++static bool ++aarch64_stack_clash_protection_final_dynamic_probe (rtx residual) ++{ ++ return (residual == CONST0_RTX (Pmode) ++ || GET_CODE (residual) != CONST_INT ++ || INTVAL (residual) >= 1024); ++} ++ + /* Target-specific selftests. */ + + #if CHECKING_P +@@ -15154,6 +15386,10 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS + #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4 + ++#undef TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE ++#define TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE \ ++ aarch64_stack_clash_protection_final_dynamic_probe ++ + #if CHECKING_P + #undef TARGET_RUN_TARGET_SELFTESTS + #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 51368e29f2d..09b353d7470 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -5413,7 +5413,7 @@ + ) + + (define_insn "probe_stack_range" +- [(set (match_operand:DI 0 "register_operand" "=r") ++ [(set (match_operand:DI 0 "register_operand" "=rk") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "register_operand" "r")] + UNSPECV_PROBE_STACK_RANGE))] +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-12.c b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c +new file mode 100644 +index 00000000000..2ce38483b6b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++extern void arf (unsigned long int *, unsigned long int *); ++void ++frob () ++{ ++ unsigned long int num[1000]; ++ unsigned long int den[1000]; ++ arf (den, num); ++} ++ ++/* This verifies that the scheduler did not break the dependencies ++ by adjusting the offsets within the probe and that the scheduler ++ did not reorder around the stack probes. */ ++/* { dg-final { scan-assembler-times "sub\\tsp, sp, #4096\\n\\tstr\\txzr, .sp, 4088." 3 } } */ ++ ++ ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-13.c b/gcc/testsuite/gcc.target/aarch64/stack-check-13.c +new file mode 100644 +index 00000000000..d8886835989 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-13.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++#define ARG32(X) X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X ++#define ARG192(X) ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X) ++void out1(ARG192(__int128)); ++int t1(int); ++ ++int t3(int x) ++{ ++ if (x < 1000) ++ return t1 (x) + 1; ++ ++ out1 (ARG192(1)); ++ return 0; ++} ++ ++ ++ ++/* This test creates a large (> 1k) outgoing argument area that needs ++ to be probed. We don't test the exact size of the space or the ++ exact offset to make the test a little less sensitive to trivial ++ output changes. */ ++/* { dg-final { scan-assembler-times "sub\\tsp, sp, #....\\n\\tstr\\txzr, \\\[sp" 1 } } */ ++ ++ ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-14.c b/gcc/testsuite/gcc.target/aarch64/stack-check-14.c +new file mode 100644 +index 00000000000..59ffe01376d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-14.c +@@ -0,0 +1,25 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++int t1(int); ++ ++int t2(int x) ++{ ++ char *p = __builtin_alloca (4050); ++ x = t1 (x); ++ return p[x]; ++} ++ ++ ++/* This test has a constant sized alloca that is smaller than the ++ probe interval. But it actually requires two probes instead ++ of one because of the optimistic assumptions we made in the ++ aarch64 prologue code WRT probing state. ++ ++ The form can change quite a bit so we just check for two ++ probes without looking at the actual address. */ ++/* { dg-final { scan-assembler-times "str\\txzr," 2 } } */ ++ ++ ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-15.c b/gcc/testsuite/gcc.target/aarch64/stack-check-15.c +new file mode 100644 +index 00000000000..e06db6dc2f0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-15.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++int t1(int); ++ ++int t2(int x) ++{ ++ char *p = __builtin_alloca (x); ++ x = t1 (x); ++ return p[x]; ++} ++ ++ ++/* This test has a variable sized alloca. It requires 3 probes. ++ One in the loop, one for the residual and at the end of the ++ alloca area. ++ ++ The form can change quite a bit so we just check for two ++ probes without looking at the actual address. */ ++/* { dg-final { scan-assembler-times "str\\txzr," 3 } } */ ++ ++ ++ +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index 02eb2066393..5431c236aa1 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -8384,14 +8384,9 @@ proc check_effective_target_arm_coproc4_ok { } { + # + proc check_effective_target_supports_stack_clash_protection { } { + +- # Temporary until the target bits are fully ACK'd. +-# if { [istarget aarch*-*-*] } { +-# return 1 +-# } +- + if { [istarget x86_64-*-*] || [istarget i?86-*-*] + || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] +- || [istarget s390*-*-*] } { ++ || [istarget aarch64*-**] || [istarget s390*-*-*] } { + return 1 + } + return 0 diff --git a/gcc7-rh1512529-17.patch b/gcc7-rh1512529-17.patch new file mode 100644 index 0000000..e3b813c --- /dev/null +++ b/gcc7-rh1512529-17.patch @@ -0,0 +1,40 @@ +commit 5ed5d56d36fc50627edf537c75afa0fdf6ba4f5d +Author: Jeff Law +Date: Wed Nov 22 12:02:50 2017 -0700 + + 2017-10-31 Segher Boessenkool + + PR target/82674 + * config/rs6000/rs6000.md (allocate_stack): Force update interval + into a register if it does not fit into an immediate offset field. + +diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md +index 843148e9703..4b9732a78ec 100644 +--- a/gcc/config/rs6000/rs6000.md ++++ b/gcc/config/rs6000/rs6000.md +@@ -10450,6 +10450,9 @@ + { + rtx loop_lab, end_loop; + bool rotated = CONST_INT_P (rounded_size); ++ rtx update = GEN_INT (-probe_interval); ++ if (probe_interval > 32768) ++ update = force_reg (Pmode, update); + + emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop, + last_addr, rotated); +@@ -10457,13 +10460,11 @@ + if (Pmode == SImode) + emit_insn (gen_movsi_update_stack (stack_pointer_rtx, + stack_pointer_rtx, +- GEN_INT (-probe_interval), +- chain)); ++ update, chain)); + else + emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx, + stack_pointer_rtx, +- GEN_INT (-probe_interval), +- chain)); ++ update, chain)); + emit_stack_clash_protection_probe_loop_end (loop_lab, end_loop, + last_addr, rotated); + } diff --git a/gcc7-rh1512529-18.patch b/gcc7-rh1512529-18.patch new file mode 100644 index 0000000..0aaf01f --- /dev/null +++ b/gcc7-rh1512529-18.patch @@ -0,0 +1,13 @@ +diff --git a/libsanitizer/sanitizer_common/sanitizer_linux.cc b/libsanitizer/sanitizer_common/sanitizer_linux.cc +index 5b6f18602e7..b5e96a1a0e4 100644 +--- a/libsanitizer/sanitizer_common/sanitizer_linux.cc ++++ b/libsanitizer/sanitizer_common/sanitizer_linux.cc +@@ -1262,7 +1262,7 @@ struct __sanitizer_esr_context { + + static bool Aarch64GetESR(ucontext_t *ucontext, u64 *esr) { + static const u32 kEsrMagic = 0x45535201; +- u8 *aux = ucontext->uc_mcontext.__reserved; ++ u8 *aux = ucontext->uc_mcontext.__glibc_reserved1; + while (true) { + _aarch64_ctx *ctx = (_aarch64_ctx *)aux; + if (ctx->size == 0) break; diff --git a/gcc7-rh1512529-2.patch b/gcc7-rh1512529-2.patch new file mode 100644 index 0000000..e334b5c --- /dev/null +++ b/gcc7-rh1512529-2.patch @@ -0,0 +1,735 @@ +commit bed27844b80e17ad786028a0a82c7d47990d15bb +Author: law +Date: Wed Sep 20 05:05:12 2017 +0000 + + 2017-09-18 Jeff Law + + * explow.c: Include "params.h". + (anti_adjust_stack_and_probe_stack_clash): New function. + (get_stack_check_protect): Likewise. + (compute_stack_clash_protection_loop_data): Likewise. + (emit_stack_clash_protection_loop_start): Likewise. + (emit_stack_clash_protection_loop_end): Likewise. + (allocate_dynamic_stack_space): Use get_stack_check_protect. + Use anti_adjust_stack_and_probe_stack_clash. + * explow.h (compute_stack_clash_protection_loop_data): Prototype. + (emit_stack_clash_protection_loop_start): Likewise. + (emit_stack_clash_protection_loop_end): Likewise. + * rtl.h (get_stack_check_protect): Prototype. + * target.def (stack_clash_protection_final_dynamic_probe): New hook. + * targhooks.c (default_stack_clash_protection_final_dynamic_probe): New. + * targhooks.h (default_stack_clash_protection_final_dynamic_probe): + Prototype. + * doc/tm.texi.in (TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE): + Add @hook. + * doc/tm.texi: Rebuilt. + * config/aarch64/aarch64.c (aarch64_expand_prologue): Use + get_stack_check_protect. + * config/alpha/alpha.c (alpha_expand_prologue): Likewise. + * config/arm/arm.c (arm_expand_prologue): Likewise. + (arm_frame_pointer_required): Likewise. + * config/i386/i386.c (ix86_expand_prologue): Likewise. + * config/ia64/ia64.c (ia64_expand_prologue): Likewise. + * config/mips/mips.c (mips_expand_prologue): Likewise. + * config/powerpcspe/powerpcspe.c (rs6000_emit_prologue): Likewise. + * config/rs6000/rs6000.c (rs6000_emit_prologue): Likewise. + * config/sparc/sparc.c (sparc_expand_prologue): Likewise. + (sparc_flat_expand_prologue): Likewise. + + * gcc.dg/stack-check-3.c: New test. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@252995 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index 1bd010be756..75c12f1de65 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -3582,12 +3582,14 @@ aarch64_expand_prologue (void) + { + if (crtl->is_leaf && !cfun->calls_alloca) + { +- if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT) +- aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, +- frame_size - STACK_CHECK_PROTECT); ++ if (frame_size > PROBE_INTERVAL ++ && frame_size > get_stack_check_protect ()) ++ aarch64_emit_probe_stack_range (get_stack_check_protect (), ++ (frame_size ++ - get_stack_check_protect ())); + } + else if (frame_size > 0) +- aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size); ++ aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size); + } + + aarch64_sub_sp (IP0_REGNUM, initial_adjust, true); +diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c +index 00a69c1a08d..91f3d7cfbeb 100644 +--- a/gcc/config/alpha/alpha.c ++++ b/gcc/config/alpha/alpha.c +@@ -7741,7 +7741,7 @@ alpha_expand_prologue (void) + + probed_size = frame_size; + if (flag_stack_check) +- probed_size += STACK_CHECK_PROTECT; ++ probed_size += get_stack_check_protect (); + + if (probed_size <= 32768) + { +diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c +index 1ded0d2a17d..242d7c792fe 100644 +--- a/gcc/config/arm/arm.c ++++ b/gcc/config/arm/arm.c +@@ -21604,13 +21604,13 @@ arm_expand_prologue (void) + + if (crtl->is_leaf && !cfun->calls_alloca) + { +- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) +- arm_emit_probe_stack_range (STACK_CHECK_PROTECT, +- size - STACK_CHECK_PROTECT, ++ if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) ++ arm_emit_probe_stack_range (get_stack_check_protect (), ++ size - get_stack_check_protect (), + regno, live_regs_mask); + } + else if (size > 0) +- arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size, ++ arm_emit_probe_stack_range (get_stack_check_protect (), size, + regno, live_regs_mask); + } + +@@ -27751,7 +27751,7 @@ arm_frame_pointer_required (void) + { + /* We don't have the final size of the frame so adjust. */ + size += 32 * UNITS_PER_WORD; +- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) ++ if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) + return true; + } + else +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index 2d163797ba0..60bab9a26a6 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -14037,7 +14037,7 @@ ix86_expand_prologue (void) + HOST_WIDE_INT size = allocate; + + if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000)) +- size = 0x80000000 - STACK_CHECK_PROTECT - 1; ++ size = 0x80000000 - get_stack_check_protect () - 1; + + if (TARGET_STACK_PROBE) + { +@@ -14047,18 +14047,20 @@ ix86_expand_prologue (void) + ix86_emit_probe_stack_range (0, size); + } + else +- ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT); ++ ix86_emit_probe_stack_range (0, ++ size + get_stack_check_protect ()); + } + else + { + if (crtl->is_leaf && !cfun->calls_alloca) + { +- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) +- ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, +- size - STACK_CHECK_PROTECT); ++ if (size > PROBE_INTERVAL ++ && size > get_stack_check_protect ()) ++ ix86_emit_probe_stack_range (get_stack_check_protect (), ++ size - get_stack_check_protect ()); + } + else +- ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size); ++ ix86_emit_probe_stack_range (get_stack_check_protect (), size); + } + } + } +diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c +index a9f479b6e5c..17d6b3318a6 100644 +--- a/gcc/config/ia64/ia64.c ++++ b/gcc/config/ia64/ia64.c +@@ -3476,15 +3476,16 @@ ia64_expand_prologue (void) + + if (crtl->is_leaf && !cfun->calls_alloca) + { +- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) +- ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, +- size - STACK_CHECK_PROTECT, ++ if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) ++ ia64_emit_probe_stack_range (get_stack_check_protect (), ++ size - get_stack_check_protect (), + bs_size); +- else if (size + bs_size > STACK_CHECK_PROTECT) +- ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size); ++ else if (size + bs_size > get_stack_check_protect ()) ++ ia64_emit_probe_stack_range (get_stack_check_protect (), ++ 0, bs_size); + } + else if (size + bs_size > 0) +- ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size); ++ ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size); + } + + if (dump_file) +diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c +index 6bfd86a07af..7d85ce7055a 100644 +--- a/gcc/config/mips/mips.c ++++ b/gcc/config/mips/mips.c +@@ -12081,12 +12081,12 @@ mips_expand_prologue (void) + { + if (crtl->is_leaf && !cfun->calls_alloca) + { +- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) +- mips_emit_probe_stack_range (STACK_CHECK_PROTECT, +- size - STACK_CHECK_PROTECT); ++ if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) ++ mips_emit_probe_stack_range (get_stack_check_protect (), ++ size - get_stack_check_protect ()); + } + else if (size > 0) +- mips_emit_probe_stack_range (STACK_CHECK_PROTECT, size); ++ mips_emit_probe_stack_range (get_stack_check_protect (), size); + } + + /* Save the registers. Allocate up to MIPS_MAX_FIRST_STACK_STEP +diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c +index 66496a1f80f..1836e1d147d 100644 +--- a/gcc/config/rs6000/rs6000.c ++++ b/gcc/config/rs6000/rs6000.c +@@ -29294,12 +29294,12 @@ rs6000_emit_prologue (void) + + if (crtl->is_leaf && !cfun->calls_alloca) + { +- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) +- rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, +- size - STACK_CHECK_PROTECT); ++ if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) ++ rs6000_emit_probe_stack_range (get_stack_check_protect (), ++ size - get_stack_check_protect ()); + } + else if (size > 0) +- rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size); ++ rs6000_emit_probe_stack_range (get_stack_check_protect (), size); + } + + if (TARGET_FIX_AND_CONTINUE) +diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c +index 83ca1dcc617..b32bb9859e1 100644 +--- a/gcc/config/sparc/sparc.c ++++ b/gcc/config/sparc/sparc.c +@@ -5695,12 +5695,12 @@ sparc_expand_prologue (void) + { + if (crtl->is_leaf && !cfun->calls_alloca) + { +- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) +- sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, +- size - STACK_CHECK_PROTECT); ++ if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) ++ sparc_emit_probe_stack_range (get_stack_check_protect (), ++ size - get_stack_check_protect ()); + } + else if (size > 0) +- sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size); ++ sparc_emit_probe_stack_range (get_stack_check_protect (), size); + } + + if (size == 0) +@@ -5806,12 +5806,12 @@ sparc_flat_expand_prologue (void) + { + if (crtl->is_leaf && !cfun->calls_alloca) + { +- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) +- sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, +- size - STACK_CHECK_PROTECT); ++ if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) ++ sparc_emit_probe_stack_range (get_stack_check_protect (), ++ size - get_stack_check_protect ()); + } + else if (size > 0) +- sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size); ++ sparc_emit_probe_stack_range (get_stack_check_protect (), size); + } + + if (sparc_save_local_in_regs_p) +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index c4f2c893c8e..0ba2925a1bd 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -3419,6 +3419,10 @@ GCC computed the default from the values of the above macros and you will + normally not need to override that default. + @end defmac + ++@deftypefn {Target Hook} bool TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE (rtx @var{residual}) ++Some targets make optimistic assumptions about the state of stack probing when they emit their prologues. On such targets a probe into the end of any dynamically allocated space is likely required for safety against stack clash style attacks. Define this variable to return nonzero if such a probe is required or zero otherwise. You need not define this macro if it would always have the value zero. ++@end deftypefn ++ + @need 2000 + @node Frame Registers + @subsection Registers That Address the Stack Frame +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index 1c471d8da35..d31255e86cb 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -2999,6 +2999,8 @@ GCC computed the default from the values of the above macros and you will + normally not need to override that default. + @end defmac + ++@hook TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE ++ + @need 2000 + @node Frame Registers + @subsection Registers That Address the Stack Frame +diff --git a/gcc/explow.c b/gcc/explow.c +index 67cb6ff1513..e7db438b1ec 100644 +--- a/gcc/explow.c ++++ b/gcc/explow.c +@@ -39,8 +39,11 @@ along with GCC; see the file COPYING3. If not see + #include "expr.h" + #include "common/common-target.h" + #include "output.h" ++#include "params.h" ++#include "dumpfile.h" + + static rtx break_out_memory_refs (rtx); ++static void anti_adjust_stack_and_probe_stack_clash (rtx); + + + /* Truncate and perhaps sign-extend C as appropriate for MODE. */ +@@ -1271,6 +1274,29 @@ get_dynamic_stack_size (rtx *psize, unsigned size_align, + *psize = size; + } + ++/* Return the number of bytes to "protect" on the stack for -fstack-check. ++ ++ "protect" in the context of -fstack-check means how many bytes we ++ should always ensure are available on the stack. More importantly ++ this is how many bytes are skipped when probing the stack. ++ ++ On some targets we want to reuse the -fstack-check prologue support ++ to give a degree of protection against stack clashing style attacks. ++ ++ In that scenario we do not want to skip bytes before probing as that ++ would render the stack clash protections useless. ++ ++ So we never use STACK_CHECK_PROTECT directly. Instead we indirect though ++ this helper which allows us to provide different values for ++ -fstack-check and -fstack-clash-protection. */ ++HOST_WIDE_INT ++get_stack_check_protect (void) ++{ ++ if (flag_stack_clash_protection) ++ return 0; ++ return STACK_CHECK_PROTECT; ++} ++ + /* Return an rtx representing the address of an area of memory dynamically + pushed on the stack. + +@@ -1429,7 +1455,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, + probe_stack_range (STACK_OLD_CHECK_PROTECT + STACK_CHECK_MAX_FRAME_SIZE, + size); + else if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) +- probe_stack_range (STACK_CHECK_PROTECT, size); ++ probe_stack_range (get_stack_check_protect (), size); + + /* Don't let anti_adjust_stack emit notes. */ + suppress_reg_args_size = true; +@@ -1482,6 +1508,8 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, + + if (flag_stack_check && STACK_CHECK_MOVING_SP) + anti_adjust_stack_and_probe (size, false); ++ else if (flag_stack_clash_protection) ++ anti_adjust_stack_and_probe_stack_clash (size); + else + anti_adjust_stack (size); + +@@ -1757,6 +1785,219 @@ probe_stack_range (HOST_WIDE_INT first, rtx size) + emit_insn (gen_blockage ()); + } + ++/* Compute parameters for stack clash probing a dynamic stack ++ allocation of SIZE bytes. ++ ++ We compute ROUNDED_SIZE, LAST_ADDR, RESIDUAL and PROBE_INTERVAL. ++ ++ Additionally we conditionally dump the type of probing that will ++ be needed given the values computed. */ ++ ++void ++compute_stack_clash_protection_loop_data (rtx *rounded_size, rtx *last_addr, ++ rtx *residual, ++ HOST_WIDE_INT *probe_interval, ++ rtx size) ++{ ++ /* Round SIZE down to STACK_CLASH_PROTECTION_PROBE_INTERVAL */ ++ *probe_interval ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL); ++ *rounded_size = simplify_gen_binary (AND, Pmode, size, ++ GEN_INT (-*probe_interval)); ++ ++ /* Compute the value of the stack pointer for the last iteration. ++ It's just SP + ROUNDED_SIZE. */ ++ rtx rounded_size_op = force_operand (*rounded_size, NULL_RTX); ++ *last_addr = force_operand (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode, ++ stack_pointer_rtx, ++ rounded_size_op), ++ NULL_RTX); ++ ++ /* Compute any residuals not allocated by the loop above. Residuals ++ are just the ROUNDED_SIZE - SIZE. */ ++ *residual = simplify_gen_binary (MINUS, Pmode, size, *rounded_size); ++ ++ /* Dump key information to make writing tests easy. */ ++ if (dump_file) ++ { ++ if (*rounded_size == CONST0_RTX (Pmode)) ++ fprintf (dump_file, ++ "Stack clash skipped dynamic allocation and probing loop.\n"); ++ else if (GET_CODE (*rounded_size) == CONST_INT ++ && INTVAL (*rounded_size) <= 4 * *probe_interval) ++ fprintf (dump_file, ++ "Stack clash dynamic allocation and probing inline.\n"); ++ else if (GET_CODE (*rounded_size) == CONST_INT) ++ fprintf (dump_file, ++ "Stack clash dynamic allocation and probing in " ++ "rotated loop.\n"); ++ else ++ fprintf (dump_file, ++ "Stack clash dynamic allocation and probing in loop.\n"); ++ ++ if (*residual != CONST0_RTX (Pmode)) ++ fprintf (dump_file, ++ "Stack clash dynamic allocation and probing residuals.\n"); ++ else ++ fprintf (dump_file, ++ "Stack clash skipped dynamic allocation and " ++ "probing residuals.\n"); ++ } ++} ++ ++/* Emit the start of an allocate/probe loop for stack ++ clash protection. ++ ++ LOOP_LAB and END_LAB are returned for use when we emit the ++ end of the loop. ++ ++ LAST addr is the value for SP which stops the loop. */ ++void ++emit_stack_clash_protection_probe_loop_start (rtx *loop_lab, ++ rtx *end_lab, ++ rtx last_addr, ++ bool rotated) ++{ ++ /* Essentially we want to emit any setup code, the top of loop ++ label and the comparison at the top of the loop. */ ++ *loop_lab = gen_label_rtx (); ++ *end_lab = gen_label_rtx (); ++ ++ emit_label (*loop_lab); ++ if (!rotated) ++ emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, EQ, NULL_RTX, ++ Pmode, 1, *end_lab); ++} ++ ++/* Emit the end of a stack clash probing loop. ++ ++ This consists of just the jump back to LOOP_LAB and ++ emitting END_LOOP after the loop. */ ++ ++void ++emit_stack_clash_protection_probe_loop_end (rtx loop_lab, rtx end_loop, ++ rtx last_addr, bool rotated) ++{ ++ if (rotated) ++ emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, NE, NULL_RTX, ++ Pmode, 1, loop_lab); ++ else ++ emit_jump (loop_lab); ++ ++ emit_label (end_loop); ++ ++} ++ ++/* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes) ++ while probing it. This pushes when SIZE is positive. SIZE need not ++ be constant. ++ ++ This is subtly different than anti_adjust_stack_and_probe to try and ++ prevent stack-clash attacks ++ ++ 1. It must assume no knowledge of the probing state, any allocation ++ must probe. ++ ++ Consider the case of a 1 byte alloca in a loop. If the sum of the ++ allocations is large, then this could be used to jump the guard if ++ probes were not emitted. ++ ++ 2. It never skips probes, whereas anti_adjust_stack_and_probe will ++ skip probes on the first couple PROBE_INTERVALs on the assumption ++ they're done elsewhere. ++ ++ 3. It only allocates and probes SIZE bytes, it does not need to ++ allocate/probe beyond that because this probing style does not ++ guarantee signal handling capability if the guard is hit. */ ++ ++static void ++anti_adjust_stack_and_probe_stack_clash (rtx size) ++{ ++ /* First ensure SIZE is Pmode. */ ++ if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode) ++ size = convert_to_mode (Pmode, size, 1); ++ ++ /* We can get here with a constant size on some targets. */ ++ rtx rounded_size, last_addr, residual; ++ HOST_WIDE_INT probe_interval; ++ compute_stack_clash_protection_loop_data (&rounded_size, &last_addr, ++ &residual, &probe_interval, size); ++ ++ if (rounded_size != CONST0_RTX (Pmode)) ++ { ++ if (INTVAL (rounded_size) <= 4 * probe_interval) ++ { ++ for (HOST_WIDE_INT i = 0; ++ i < INTVAL (rounded_size); ++ i += probe_interval) ++ { ++ anti_adjust_stack (GEN_INT (probe_interval)); ++ ++ /* The prologue does not probe residuals. Thus the offset ++ here to probe just beyond what the prologue had already ++ allocated. */ ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ (probe_interval ++ - GET_MODE_SIZE (word_mode)))); ++ emit_insn (gen_blockage ()); ++ } ++ } ++ else ++ { ++ rtx loop_lab, end_loop; ++ bool rotate_loop = GET_CODE (rounded_size) == CONST_INT; ++ emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop, ++ last_addr, rotate_loop); ++ ++ anti_adjust_stack (GEN_INT (probe_interval)); ++ ++ /* The prologue does not probe residuals. Thus the offset here ++ to probe just beyond what the prologue had already allocated. */ ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ (probe_interval ++ - GET_MODE_SIZE (word_mode)))); ++ ++ emit_stack_clash_protection_probe_loop_end (loop_lab, end_loop, ++ last_addr, rotate_loop); ++ emit_insn (gen_blockage ()); ++ } ++ } ++ ++ if (residual != CONST0_RTX (Pmode)) ++ { ++ rtx x = force_reg (Pmode, plus_constant (Pmode, residual, ++ -GET_MODE_SIZE (word_mode))); ++ anti_adjust_stack (residual); ++ emit_stack_probe (gen_rtx_PLUS (Pmode, stack_pointer_rtx, x)); ++ emit_insn (gen_blockage ()); ++ } ++ ++ /* Some targets make optimistic assumptions in their prologues about ++ how the caller may have probed the stack. Make sure we honor ++ those assumptions when needed. */ ++ if (size != CONST0_RTX (Pmode) ++ && targetm.stack_clash_protection_final_dynamic_probe (residual)) ++ { ++ /* Ideally we would just probe at *sp. However, if SIZE is not ++ a compile-time constant, but is zero at runtime, then *sp ++ might hold live data. So probe at *sp if we know that ++ an allocation was made, otherwise probe into the red zone ++ which is obviously undesirable. */ ++ if (GET_CODE (size) == CONST_INT) ++ { ++ emit_stack_probe (stack_pointer_rtx); ++ emit_insn (gen_blockage ()); ++ } ++ else ++ { ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ -GET_MODE_SIZE (word_mode))); ++ emit_insn (gen_blockage ()); ++ } ++ } ++} ++ ++ + /* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes) + while probing it. This pushes when SIZE is positive. SIZE need not + be constant. If ADJUST_BACK is true, adjust back the stack pointer +diff --git a/gcc/explow.h b/gcc/explow.h +index 217a3226adb..b85c051e8ce 100644 +--- a/gcc/explow.h ++++ b/gcc/explow.h +@@ -69,6 +69,15 @@ extern void anti_adjust_stack (rtx); + /* Add some bytes to the stack while probing it. An rtx says how many. */ + extern void anti_adjust_stack_and_probe (rtx, bool); + ++/* Support for building allocation/probing loops for stack-clash ++ protection of dyamically allocated stack space. */ ++extern void compute_stack_clash_protection_loop_data (rtx *, rtx *, rtx *, ++ HOST_WIDE_INT *, rtx); ++extern void emit_stack_clash_protection_probe_loop_start (rtx *, rtx *, ++ rtx, bool); ++extern void emit_stack_clash_protection_probe_loop_end (rtx, rtx, ++ rtx, bool); ++ + /* This enum is used for the following two functions. */ + enum save_level {SAVE_BLOCK, SAVE_FUNCTION, SAVE_NONLOCAL}; + +diff --git a/gcc/rtl.h b/gcc/rtl.h +index 93330425c00..98f993a95a5 100644 +--- a/gcc/rtl.h ++++ b/gcc/rtl.h +@@ -2707,6 +2707,7 @@ get_full_set_src_cost (rtx x, machine_mode mode, struct full_rtx_costs *c) + /* In explow.c */ + extern HOST_WIDE_INT trunc_int_for_mode (HOST_WIDE_INT, machine_mode); + extern rtx plus_constant (machine_mode, rtx, HOST_WIDE_INT, bool = false); ++extern HOST_WIDE_INT get_stack_check_protect (void); + + /* In rtl.c */ + extern rtx rtx_alloc_stat (RTX_CODE MEM_STAT_DECL); +diff --git a/gcc/target.def b/gcc/target.def +index 6bebfd5b9d6..8b8ad1f4938 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -5473,6 +5473,13 @@ these registers when the target switches are opposed to them.)", + void, (void), + hook_void_void) + ++DEFHOOK ++(stack_clash_protection_final_dynamic_probe, ++ "Some targets make optimistic assumptions about the state of stack probing when they emit their prologues. On such targets a probe into the end of any dynamically allocated space is likely required for safety against stack clash style attacks. Define this variable to return nonzero if such a probe is required or zero otherwise. You need not define this macro if it would always have the value zero.", ++ bool, (rtx residual), ++ default_stack_clash_protection_final_dynamic_probe) ++ ++ + /* Functions specific to the C family of frontends. */ + #undef HOOK_PREFIX + #define HOOK_PREFIX "TARGET_C_" +diff --git a/gcc/targhooks.c b/gcc/targhooks.c +index 1cdec068ed8..225048e7518 100644 +--- a/gcc/targhooks.c ++++ b/gcc/targhooks.c +@@ -2107,4 +2107,10 @@ default_excess_precision (enum excess_precision_type ATTRIBUTE_UNUSED) + return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; + } + ++HOST_WIDE_INT ++default_stack_clash_protection_final_dynamic_probe (rtx residual ATTRIBUTE_UNUSED) ++{ ++ return 0; ++} ++ + #include "gt-targhooks.h" +diff --git a/gcc/targhooks.h b/gcc/targhooks.h +index 18070df7839..32ae9d88050 100644 +--- a/gcc/targhooks.h ++++ b/gcc/targhooks.h +@@ -263,5 +263,6 @@ extern unsigned int default_min_arithmetic_precision (void); + + extern enum flt_eval_method + default_excess_precision (enum excess_precision_type ATTRIBUTE_UNUSED); ++extern bool default_stack_clash_protection_final_dynamic_probe (rtx); + + #endif /* GCC_TARGHOOKS_H */ +diff --git a/gcc/testsuite/gcc.dg/stack-check-3.c b/gcc/testsuite/gcc.dg/stack-check-3.c +new file mode 100644 +index 00000000000..58fb65649ee +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-3.c +@@ -0,0 +1,86 @@ ++/* The goal here is to ensure that dynamic allocations via vlas or ++ alloca calls receive probing. ++ ++ Scanning the RTL or assembly code seems like insanity here as does ++ checking for particular allocation sizes and probe offsets. For ++ now we just verify that there's an allocation + probe loop and ++ residual allocation + probe for f?. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-expand -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=4096 --param stack-clash-protection-guard-size=4096" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++__attribute__((noinline, noclone)) void ++foo (char *p) ++{ ++ asm volatile ("" : : "r" (p) : "memory"); ++} ++ ++/* Simple VLA, no other locals. */ ++__attribute__((noinline, noclone)) void ++f0 (int x) ++{ ++ char vla[x]; ++ foo (vla); ++} ++ ++/* Simple VLA, small local frame. */ ++__attribute__((noinline, noclone)) void ++f1 (int x) ++{ ++ char locals[128]; ++ char vla[x]; ++ foo (vla); ++} ++ ++/* Small constant alloca, no other locals. */ ++__attribute__((noinline, noclone)) void ++f2 (int x) ++{ ++ char *vla = __builtin_alloca (128); ++ foo (vla); ++} ++ ++/* Big constant alloca, small local frame. */ ++__attribute__((noinline, noclone)) void ++f3 (int x) ++{ ++ char locals[128]; ++ char *vla = __builtin_alloca (16384); ++ foo (vla); ++} ++ ++/* Big constant alloca, small local frame. */ ++__attribute__((noinline, noclone)) void ++f3a (int x) ++{ ++ char locals[128]; ++ char *vla = __builtin_alloca (32768); ++ foo (vla); ++} ++ ++/* Nonconstant alloca, no other locals. */ ++__attribute__((noinline, noclone)) void ++f4 (int x) ++{ ++ char *vla = __builtin_alloca (x); ++ foo (vla); ++} ++ ++/* Nonconstant alloca, small local frame. */ ++__attribute__((noinline, noclone)) void ++f5 (int x) ++{ ++ char locals[128]; ++ char *vla = __builtin_alloca (x); ++ foo (vla); ++} ++ ++/* { dg-final { scan-rtl-dump-times "allocation and probing residuals" 7 "expand" } } */ ++ ++ ++/* { dg-final { scan-rtl-dump-times "allocation and probing in loop" 7 "expand" { target callee_realigns_stack } } } */ ++/* { dg-final { scan-rtl-dump-times "allocation and probing in loop" 4 "expand" { target { ! callee_realigns_stack } } } } */ ++/* { dg-final { scan-rtl-dump-times "allocation and probing in rotated loop" 1 "expand" { target { ! callee_realigns_stack } } } } */ ++/* { dg-final { scan-rtl-dump-times "allocation and probing inline" 1 "expand" { target { ! callee_realigns_stack } } } } */ ++/* { dg-final { scan-rtl-dump-times "skipped dynamic allocation and probing loop" 1 "expand" { target { ! callee_realigns_stack } } } } */ diff --git a/gcc7-rh1512529-3.patch b/gcc7-rh1512529-3.patch new file mode 100644 index 0000000..112cf25 --- /dev/null +++ b/gcc7-rh1512529-3.patch @@ -0,0 +1,199 @@ +commit 640ca5ef08a3ade8fe3f32d8c412008410de989c +Author: law +Date: Wed Sep 20 05:21:09 2017 +0000 + + * config/alpha/alpha.c (alpha_expand_prologue): Also check + flag_stack_clash_protection. + * config/arm/arm.c (arm_compute_static_chain_stack_bytes): Likewise. + (arm_expand_prologue, thumb1_expand_prologue): Likewise. + (arm_frame_pointer_required): Likewise. + * config/ia64/ia64.c (ia64_compute_frame_size): Likewise. + (ia64_expand_prologue): Likewise. + * config/mips/mips.c (mips_expand_prologue): Likewise. + * config/powerpcspe/powerpcspe.c (rs6000_expand_prologue): Likewise. + * config/sparc/sparc.c (sparc_expand_prologue): Likewise. + (sparc_flat_expand_prologue): Likewise. + * config/spu/spu.c (spu_expand_prologue): Likewise. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@252996 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c +index 91f3d7cfbeb..36e78a0caf7 100644 +--- a/gcc/config/alpha/alpha.c ++++ b/gcc/config/alpha/alpha.c +@@ -7740,7 +7740,7 @@ alpha_expand_prologue (void) + Note that we are only allowed to adjust sp once in the prologue. */ + + probed_size = frame_size; +- if (flag_stack_check) ++ if (flag_stack_check || flag_stack_clash_protection) + probed_size += get_stack_check_protect (); + + if (probed_size <= 32768) +@@ -7755,7 +7755,7 @@ alpha_expand_prologue (void) + /* We only have to do this probe if we aren't saving registers or + if we are probing beyond the frame because of -fstack-check. */ + if ((sa_size == 0 && probed_size > probed - 4096) +- || flag_stack_check) ++ || flag_stack_check || flag_stack_clash_protection) + emit_insn (gen_probe_stack (GEN_INT (-probed_size))); + } + +@@ -7785,7 +7785,8 @@ alpha_expand_prologue (void) + late in the compilation, generate the loop as a single insn. */ + emit_insn (gen_prologue_stack_probe_loop (count, ptr)); + +- if ((leftover > 4096 && sa_size == 0) || flag_stack_check) ++ if ((leftover > 4096 && sa_size == 0) ++ || flag_stack_check || flag_stack_clash_protection) + { + rtx last = gen_rtx_MEM (DImode, + plus_constant (Pmode, ptr, -leftover)); +@@ -7793,7 +7794,7 @@ alpha_expand_prologue (void) + emit_move_insn (last, const0_rtx); + } + +- if (flag_stack_check) ++ if (flag_stack_check || flag_stack_clash_protection) + { + /* If -fstack-check is specified we have to load the entire + constant into a register and subtract from the sp in one go, +diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c +index 242d7c792fe..4f85a061753 100644 +--- a/gcc/config/arm/arm.c ++++ b/gcc/config/arm/arm.c +@@ -19083,7 +19083,8 @@ arm_compute_static_chain_stack_bytes (void) + /* See the defining assertion in arm_expand_prologue. */ + if (IS_NESTED (arm_current_func_type ()) + && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM) +- || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection) + && !df_regs_ever_live_p (LR_REGNUM))) + && arm_r3_live_at_start_p () + && crtl->args.pretend_args_size == 0) +@@ -21377,7 +21378,8 @@ arm_expand_prologue (void) + clobbered when creating the frame, we need to save and restore it. */ + clobber_ip = IS_NESTED (func_type) + && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM) +- || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection) + && !df_regs_ever_live_p (LR_REGNUM) + && arm_r3_live_at_start_p ())); + +@@ -21591,7 +21593,8 @@ arm_expand_prologue (void) + stack checking. We use IP as the first scratch register, except for the + non-APCS nested functions if LR or r3 are available (see clobber_ip). */ + if (!IS_INTERRUPT (func_type) +- && flag_stack_check == STATIC_BUILTIN_STACK_CHECK) ++ && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection)) + { + unsigned int regno; + +@@ -24875,7 +24878,9 @@ thumb1_expand_prologue (void) + current_function_static_stack_size = size; + + /* If we have a frame, then do stack checking. FIXME: not implemented. */ +- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size) ++ if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection) ++ && size) + sorry ("-fstack-check=specific for Thumb-1"); + + amount = offsets->outgoing_args - offsets->saved_regs; +@@ -27736,7 +27741,8 @@ arm_frame_pointer_required (void) + instruction prior to the stack adjustment and this requires a frame + pointer if we want to catch the exception using the EABI unwinder. */ + if (!IS_INTERRUPT (arm_current_func_type ()) +- && flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection) + && arm_except_unwind_info (&global_options) == UI_TARGET + && cfun->can_throw_non_call_exceptions) + { +diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c +index 17d6b3318a6..8acb92fd32b 100644 +--- a/gcc/config/ia64/ia64.c ++++ b/gcc/config/ia64/ia64.c +@@ -2685,7 +2685,8 @@ ia64_compute_frame_size (HOST_WIDE_INT size) + mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL); + + /* Static stack checking uses r2 and r3. */ +- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) ++ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection) + current_frame_info.gr_used_mask |= 0xc; + + /* Find the size of the register stack frame. We have only 80 local +@@ -3468,7 +3469,8 @@ ia64_expand_prologue (void) + if (flag_stack_usage_info) + current_function_static_stack_size = current_frame_info.total_size; + +- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) ++ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection) + { + HOST_WIDE_INT size = current_frame_info.total_size; + int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs +diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c +index 7d85ce7055a..b2d3c737b34 100644 +--- a/gcc/config/mips/mips.c ++++ b/gcc/config/mips/mips.c +@@ -12077,7 +12077,8 @@ mips_expand_prologue (void) + if (flag_stack_usage_info) + current_function_static_stack_size = size; + +- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) ++ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection) + { + if (crtl->is_leaf && !cfun->calls_alloca) + { +diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c +index b32bb9859e1..498bd04f5c2 100644 +--- a/gcc/config/sparc/sparc.c ++++ b/gcc/config/sparc/sparc.c +@@ -5691,7 +5691,8 @@ sparc_expand_prologue (void) + if (flag_stack_usage_info) + current_function_static_stack_size = size; + +- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) ++ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection) + { + if (crtl->is_leaf && !cfun->calls_alloca) + { +@@ -5802,7 +5803,8 @@ sparc_flat_expand_prologue (void) + if (flag_stack_usage_info) + current_function_static_stack_size = size; + +- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) ++ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection) + { + if (crtl->is_leaf && !cfun->calls_alloca) + { +diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c +index fcb85c0ee16..22745fa32c3 100644 +--- a/gcc/config/spu/spu.c ++++ b/gcc/config/spu/spu.c +@@ -1760,7 +1760,7 @@ spu_expand_prologue (void) + + if (total_size > 0) + { +- if (flag_stack_check) ++ if (flag_stack_check || flag_stack_clash_protection) + { + /* We compare against total_size-1 because + ($sp >= total_size) <=> ($sp > total_size-1) */ +@@ -5364,7 +5364,7 @@ spu_allocate_stack (rtx op0, rtx op1) + emit_insn (gen_spu_convert (sp, stack_pointer_rtx)); + emit_insn (gen_subv4si3 (sp, sp, splatted)); + +- if (flag_stack_check) ++ if (flag_stack_check || flag_stack_clash_protection) + { + rtx avail = gen_reg_rtx(SImode); + rtx result = gen_reg_rtx(SImode); diff --git a/gcc7-rh1512529-4.patch b/gcc7-rh1512529-4.patch new file mode 100644 index 0000000..9c953ec --- /dev/null +++ b/gcc7-rh1512529-4.patch @@ -0,0 +1,92 @@ +commit 7f0d332f992c3b51c8aff3ed79e2233ed2498863 +Author: law +Date: Wed Sep 20 05:23:51 2017 +0000 + + * function.c (dump_stack_clash_frame_info): New function. + * function.h (dump_stack_clash_frame_info): Prototype. + (enum stack_clash_probes): New enum. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@252997 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/function.c b/gcc/function.c +index 21eb586a8f7..6d0f462a262 100644 +--- a/gcc/function.c ++++ b/gcc/function.c +@@ -5695,6 +5695,58 @@ get_arg_pointer_save_area (void) + return ret; + } + ++ ++/* If debugging dumps are requested, dump information about how the ++ target handled -fstack-check=clash for the prologue. ++ ++ PROBES describes what if any probes were emitted. ++ ++ RESIDUALS indicates if the prologue had any residual allocation ++ (i.e. total allocation was not a multiple of PROBE_INTERVAL). */ ++ ++void ++dump_stack_clash_frame_info (enum stack_clash_probes probes, bool residuals) ++{ ++ if (!dump_file) ++ return; ++ ++ switch (probes) ++ { ++ case NO_PROBE_NO_FRAME: ++ fprintf (dump_file, ++ "Stack clash no probe no stack adjustment in prologue.\n"); ++ break; ++ case NO_PROBE_SMALL_FRAME: ++ fprintf (dump_file, ++ "Stack clash no probe small stack adjustment in prologue.\n"); ++ break; ++ case PROBE_INLINE: ++ fprintf (dump_file, "Stack clash inline probes in prologue.\n"); ++ break; ++ case PROBE_LOOP: ++ fprintf (dump_file, "Stack clash probe loop in prologue.\n"); ++ break; ++ } ++ ++ if (residuals) ++ fprintf (dump_file, "Stack clash residual allocation in prologue.\n"); ++ else ++ fprintf (dump_file, "Stack clash no residual allocation in prologue.\n"); ++ ++ if (frame_pointer_needed) ++ fprintf (dump_file, "Stack clash frame pointer needed.\n"); ++ else ++ fprintf (dump_file, "Stack clash no frame pointer needed.\n"); ++ ++ if (TREE_THIS_VOLATILE (cfun->decl)) ++ fprintf (dump_file, ++ "Stack clash noreturn prologue, assuming no implicit" ++ " probes in caller.\n"); ++ else ++ fprintf (dump_file, ++ "Stack clash not noreturn prologue.\n"); ++} ++ + /* Add a list of INSNS to the hash HASHP, possibly allocating HASHP + for the first time. */ + +diff --git a/gcc/function.h b/gcc/function.h +index 0f34bcd6123..87dac803622 100644 +--- a/gcc/function.h ++++ b/gcc/function.h +@@ -553,6 +553,14 @@ do { \ + ((TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_pfn) \ + ? MAX (FUNCTION_BOUNDARY, 2 * BITS_PER_UNIT) : FUNCTION_BOUNDARY) + ++enum stack_clash_probes { ++ NO_PROBE_NO_FRAME, ++ NO_PROBE_SMALL_FRAME, ++ PROBE_INLINE, ++ PROBE_LOOP ++}; ++ ++extern void dump_stack_clash_frame_info (enum stack_clash_probes, bool); + + + extern void push_function_context (void); diff --git a/gcc7-rh1512529-5.patch b/gcc7-rh1512529-5.patch new file mode 100644 index 0000000..1b6d4ba --- /dev/null +++ b/gcc7-rh1512529-5.patch @@ -0,0 +1,2705 @@ +commit ded495f992254176bd1d08d4f200a7b508a33b9a +Author: law +Date: Wed Sep 20 05:35:07 2017 +0000 + + * config/i386/i386.c (ix86_adjust_stack_and_probe_stack_clash): New. + (ix86_expand_prologue): Dump stack clash info as needed. + Call ix86_adjust_stack_and_probe_stack_clash as needed. + + * gcc.dg/stack-check-4.c: New test. + * gcc.dg/stack-check-5.c: New test. + * gcc.dg/stack-check-6.c: New test. + * gcc.dg/stack-check-6a.c: New test. + * gcc.dg/stack-check-7.c: New test. + * gcc.dg/stack-check-8.c: New test. + * gcc.dg/stack-check-9.c: New test. + * gcc.dg/stack-check-10.c: New test. + * lib/target-supports.exp + (check_effective_target_supports_stack_clash_protection): Enable for + x86 and x86_64 targets. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@252998 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index 60bab9a26a6..088dca2001f 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -13270,6 +13270,147 @@ release_scratch_register_on_entry (struct scratch_reg *sr) + + #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) + ++/* Emit code to adjust the stack pointer by SIZE bytes while probing it. ++ ++ This differs from the next routine in that it tries hard to prevent ++ attacks that jump the stack guard. Thus it is never allowed to allocate ++ more than PROBE_INTERVAL bytes of stack space without a suitable ++ probe. */ ++ ++static void ++ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size) ++{ ++ struct machine_function *m = cfun->machine; ++ ++ /* If this function does not statically allocate stack space, then ++ no probes are needed. */ ++ if (!size) ++ { ++ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); ++ return; ++ } ++ ++ /* If we are a noreturn function, then we have to consider the ++ possibility that we're called via a jump rather than a call. ++ ++ Thus we don't have the implicit probe generated by saving the ++ return address into the stack at the call. Thus, the stack ++ pointer could be anywhere in the guard page. The safe thing ++ to do is emit a probe now. ++ ++ ?!? This should be revamped to work like aarch64 and s390 where ++ we track the offset from the most recent probe. Normally that ++ offset would be zero. For a non-return function we would reset ++ it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then ++ we just probe when we cross PROBE_INTERVAL. */ ++ if (TREE_THIS_VOLATILE (cfun->decl)) ++ { ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ -GET_MODE_SIZE (word_mode))); ++ emit_insn (gen_blockage ()); ++ } ++ ++ /* If we allocate less than the size of the guard statically, ++ then no probing is necessary, but we do need to allocate ++ the stack. */ ++ if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE))) ++ { ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (-size), -1, ++ m->fs.cfa_reg == stack_pointer_rtx); ++ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); ++ return; ++ } ++ ++ /* We're allocating a large enough stack frame that we need to ++ emit probes. Either emit them inline or in a loop depending ++ on the size. */ ++ HOST_WIDE_INT probe_interval ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL); ++ if (size <= 4 * probe_interval) ++ { ++ HOST_WIDE_INT i; ++ for (i = probe_interval; i <= size; i += probe_interval) ++ { ++ /* Allocate PROBE_INTERVAL bytes. */ ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (-probe_interval), -1, ++ m->fs.cfa_reg == stack_pointer_rtx); ++ ++ /* And probe at *sp. */ ++ emit_stack_probe (stack_pointer_rtx); ++ emit_insn (gen_blockage ()); ++ } ++ ++ /* We need to allocate space for the residual, but we do not need ++ to probe the residual. */ ++ HOST_WIDE_INT residual = (i - probe_interval - size); ++ if (residual) ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (residual), -1, ++ m->fs.cfa_reg == stack_pointer_rtx); ++ dump_stack_clash_frame_info (PROBE_INLINE, residual != 0); ++ } ++ else ++ { ++ struct scratch_reg sr; ++ get_scratch_register_on_entry (&sr); ++ ++ /* Step 1: round SIZE down to a multiple of the interval. */ ++ HOST_WIDE_INT rounded_size = size & -probe_interval; ++ ++ /* Step 2: compute final value of the loop counter. Use lea if ++ possible. */ ++ rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size); ++ rtx insn; ++ if (address_no_seg_operand (addr, Pmode)) ++ insn = emit_insn (gen_rtx_SET (sr.reg, addr)); ++ else ++ { ++ emit_move_insn (sr.reg, GEN_INT (-rounded_size)); ++ insn = emit_insn (gen_rtx_SET (sr.reg, ++ gen_rtx_PLUS (Pmode, sr.reg, ++ stack_pointer_rtx))); ++ } ++ if (m->fs.cfa_reg == stack_pointer_rtx) ++ { ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ plus_constant (Pmode, sr.reg, ++ m->fs.cfa_offset + rounded_size)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ ++ /* Step 3: the loop. */ ++ rtx size_rtx = GEN_INT (rounded_size); ++ insn = emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, ++ size_rtx)); ++ if (m->fs.cfa_reg == stack_pointer_rtx) ++ { ++ m->fs.cfa_offset += rounded_size; ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ plus_constant (Pmode, stack_pointer_rtx, ++ m->fs.cfa_offset)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ m->fs.sp_offset += rounded_size; ++ emit_insn (gen_blockage ()); ++ ++ /* Step 4: adjust SP if we cannot assert at compile-time that SIZE ++ is equal to ROUNDED_SIZE. */ ++ ++ if (size != rounded_size) ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (rounded_size - size), -1, ++ m->fs.cfa_reg == stack_pointer_rtx); ++ dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size); ++ ++ release_scratch_register_on_entry (&sr); ++ } ++ ++ /* Make sure nothing is scheduled before we are done. */ ++ emit_insn (gen_blockage ()); ++} ++ + /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */ + + static void +@@ -14018,12 +14159,19 @@ ix86_expand_prologue (void) + + /* The stack has already been decremented by the instruction calling us + so probe if the size is non-negative to preserve the protection area. */ +- if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK) ++ if (allocate >= 0 ++ && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection)) + { + /* We expect the registers to be saved when probes are used. */ + gcc_assert (int_registers_saved); + +- if (STACK_CHECK_MOVING_SP) ++ if (flag_stack_clash_protection) ++ { ++ ix86_adjust_stack_and_probe_stack_clash (allocate); ++ allocate = 0; ++ } ++ else if (STACK_CHECK_MOVING_SP) + { + if (!(crtl->is_leaf && !cfun->calls_alloca + && allocate <= PROBE_INTERVAL)) +diff --git a/gcc/testsuite/gcc.dg/stack-check-10.c b/gcc/testsuite/gcc.dg/stack-check-10.c +new file mode 100644 +index 00000000000..a86956ad692 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-10.c +@@ -0,0 +1,41 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++int f (int *); ++ ++int ++g (int a) ++{ ++ return f (&a); ++} ++ ++int f1 (void); ++int f2 (int); ++ ++int ++f3 (void) ++{ ++ return f2 (f1 ()); ++} ++ ++ ++/* If we have caller implicit probes, then we should not need probes in either callee. ++ Else callees may need probes, particularly if non-leaf functions require a ++ frame/frame pointer. */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no probe" 2 "pro_and_epilogue" { target caller_implicit_probes } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash inline probe" 1 "pro_and_epilogue" { target { ! caller_implicit_probes } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no probe" 1 "pro_and_epilogue" { target { ! caller_implicit_probes } } } } */ ++ ++/* Neither of these functions are a nonreturn function. */ ++/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 2 "pro_and_epilogue" } } */ ++ ++/* If the callee realigns the stack or has a mandatory frame, then both functions ++ have a residual allocation. Else just g() has a residual allocation. */ ++/* { dg-final { scan-rtl-dump-times "Stack clash residual allocation in prologue" 2 "pro_and_epilogue" } } */ ++ ++ ++/* If the target has frame pointers for non-leafs, then both functions will ++ need a frame pointer. Otherwise neither should. */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 2 "pro_and_epilogue" { target { ! frame_pointer_for_non_leaf } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */ +diff --git a/gcc/testsuite/gcc.dg/stack-check-3.c b/gcc/testsuite/gcc.dg/stack-check-3.c +index 58fb65649ee..f0bf7c71a5b 100644 +--- a/gcc/testsuite/gcc.dg/stack-check-3.c ++++ b/gcc/testsuite/gcc.dg/stack-check-3.c +@@ -7,7 +7,7 @@ + residual allocation + probe for f?. */ + + /* { dg-do compile } */ +-/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-expand -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=4096 --param stack-clash-protection-guard-size=4096" } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-expand -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ + + __attribute__((noinline, noclone)) void +diff --git a/gcc/testsuite/gcc.dg/stack-check-4.c b/gcc/testsuite/gcc.dg/stack-check-4.c +new file mode 100644 +index 00000000000..b0c5c61972f +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-4.c +@@ -0,0 +1,42 @@ ++/* On targets where the call instruction is an implicit probe of *sp, we ++ elide stack probes as long as the size of the local stack is less than ++ PROBE_INTERVAL. ++ ++ But if the caller were to transform a tail call into a direct jump ++ we do not have that implicit probe. This normally isn't a problem as ++ the caller must not have a local frame for that optimization to apply. ++ ++ However, a sufficiently smart compiler could realize that the caller's ++ local stack need not be torn down and thus could transform a call into ++ a jump if the target is a noreturn function, even if the caller has ++ a local frame. ++ ++ To guard against that, targets that depend on *sp being probed by the ++ call itself must emit a probe if the target function is a noreturn ++ function, even if they just allocate a small amount of stack space. ++ ++ Rather than try to parse RTL or assembly code, we instead require the ++ prologue code to emit information into the dump file that we can ++ scan for. We scan for both the positive and negative cases. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++extern void arf (char *); ++ ++__attribute__ ((noreturn)) void foo1 () ++{ ++ char x[10]; ++ while (1) ++ arf (x); ++} ++ ++void foo2 () ++{ ++ char x[10]; ++ arf (x); ++} ++/* { dg-final { scan-rtl-dump-times "Stack clash noreturn" 1 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 1 "pro_and_epilogue" } } */ ++ +diff --git a/gcc/testsuite/gcc.dg/stack-check-5.c b/gcc/testsuite/gcc.dg/stack-check-5.c +new file mode 100644 +index 00000000000..2171d9b6c23 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-5.c +@@ -0,0 +1,74 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++ ++extern void foo (char *); ++extern void bar (void); ++ ++/* This function allocates no local stack and is a leaf. It should have no ++ probes on any target and should not require a frame pointer. */ ++int ++f0 (int x, int y) ++{ ++ asm volatile ("" : : : "memory"); ++ return x + y; ++} ++ ++/* This function allocates no local stack, but is not a leaf. Ideally it ++ should not need probing and no frame pointer. */ ++int ++f1 (int x, int y) ++{ ++ asm volatile ("" : : : "memory"); ++ bar (); ++} ++ ++/* This is a leaf with a small frame. On targets with implicit probes in ++ the caller, this should not need probing. On targets with no implicit ++ probes in the caller, it may require probes. Ideally it should need no ++ frame pointer. */ ++void ++f2 (void) ++{ ++ char buf[512]; ++ asm volatile ("" : : "g" (&buf) : "memory"); ++} ++ ++/* This is a non-leaf with a small frame. On targets with implicit probes in ++ the caller, this should not need probing. On targets with no implicit ++ probes in the caller, it may require probes. It should need no frame ++ pointer. */ ++void ++f3 (void) ++{ ++ char buf[512]; ++ foo (buf); ++} ++ ++/* If we have caller implicit probes, then we should not need probes. ++ Else callees may need probes, particularly if non-leaf functions require a ++ frame/frame pointer. */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no probe" 4 "pro_and_epilogue" { target caller_implicit_probes } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no probe" 2 "pro_and_epilogue" { target { ! caller_implicit_probes } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash inline probes " 2 "pro_and_epilogue" { target { ! caller_implicit_probes } } } } */ ++ ++/* None of these functions are marked with the noreturn attribute. */ ++/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 4 "pro_and_epilogue" } } */ ++ ++/* Two functions are leafs, two are not. Verify the target identified them ++ appropriately. */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 4 "pro_and_epilogue" { target { ! frame_pointer_for_non_leaf } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */ ++ ++ ++/* We have selected the size of the array in f2/f3 to be large enough ++ to not live in the red zone on targets that support it. ++ ++ That allows simplification of this test considerably. ++ f1() should not require any allocations, thus no residuals. ++ All the rest of the functions require some kind of allocation, ++ either for the saved fp/rp or the array. */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no residual allocation in prologue" 1 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash residual allocation in prologue" 3 "pro_and_epilogue" } } */ +diff --git a/gcc/testsuite/gcc.dg/stack-check-6.c b/gcc/testsuite/gcc.dg/stack-check-6.c +new file mode 100644 +index 00000000000..ad2021c9037 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-6.c +@@ -0,0 +1,55 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++ ++extern void foo (char *); ++extern void bar (void); ++ ++ ++/* This is a leaf with a frame that is large enough to require probing with ++ a residual allocation, but small enough to probe inline. */ ++void ++f4 (void) ++{ ++ char buf[4096 + 512]; ++ asm volatile ("" : : "g" (&buf) : "memory"); ++} ++ ++ ++/* This is a non-leaf with a frame large enough to require probing and ++ a residual allocation, but small enough to probe inline. */ ++void ++f5 (void) ++{ ++ char buf[4096 + 512]; ++ foo (buf); ++} ++ ++/* This is a leaf with a frame that is large enough to require probing with ++ a loop plus a residual allocation. */ ++void ++f6 (void) ++{ ++ char buf[4096 * 10 + 512]; ++ asm volatile ("" : : "g" (&buf) : "memory"); ++} ++ ++ ++/* This is a non-leaf with a frame large enough to require probing with ++ a loop plus a residual allocation. */ ++void ++f7 (void) ++{ ++ char buf[4096 * 10 + 512]; ++ foo (buf); ++} ++ ++/* { dg-final { scan-rtl-dump-times "Stack clash inline probes" 2 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash probe loop" 2 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash residual allocation in prologue" 4 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 4 "pro_and_epilogue" } } */ ++ ++/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 4 "pro_and_epilogue" { target { ! frame_pointer_for_non_leaf } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */ +diff --git a/gcc/testsuite/gcc.dg/stack-check-6a.c b/gcc/testsuite/gcc.dg/stack-check-6a.c +new file mode 100644 +index 00000000000..6f8e7128921 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-6a.c +@@ -0,0 +1,17 @@ ++/* The goal here is to verify that increasing the size of the guard allows ++ elimination of all probing on the relevant targets. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=16" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++#include "stack-check-6.c" ++ ++/* { dg-final { scan-rtl-dump-times "Stack clash inline probes" 0 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash probe loop" 0 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash residual allocation in prologue" 4 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 4 "pro_and_epilogue" } } */ ++ ++/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 4 "pro_and_epilogue" { target { ! frame_pointer_for_non_leaf } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash frame pointer needed" 2 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */ +diff --git a/gcc/testsuite/gcc.dg/stack-check-7.c b/gcc/testsuite/gcc.dg/stack-check-7.c +new file mode 100644 +index 00000000000..b963a2881dc +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-7.c +@@ -0,0 +1,36 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fstack-clash-protection -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++/* For further testing, this can be run under valgrind where it's crashed ++ on aarch64 and ppc64le with -fstack-check=specific. */ ++ ++ ++__attribute__((noinline, noclone)) void ++foo (char *p) ++{ ++ asm volatile ("" : : "r" (p) : "memory"); ++} ++ ++__attribute__((noinline, noclone)) void ++bar (void) ++{ ++ char buf[131072]; ++ foo (buf); ++} ++ ++__attribute__((noinline, noclone)) void ++baz (void) ++{ ++ char buf[12000]; ++ foo (buf); ++} ++ ++int ++main () ++{ ++ bar (); ++ baz (); ++ return 0; ++} ++ +diff --git a/gcc/testsuite/gcc.dg/stack-check-8.c b/gcc/testsuite/gcc.dg/stack-check-8.c +new file mode 100644 +index 00000000000..84d5adef154 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-8.c +@@ -0,0 +1,139 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fstack-clash-protection -Wno-psabi -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++ ++typedef float V __attribute__((vector_size (32))); ++ ++__attribute__((noinline, noclone)) void ++foo (char *p) ++{ ++ asm volatile ("" : : "r" (p) : "memory"); ++} ++ ++__attribute__((noinline, noclone)) int ++f0 (int x, int y) ++{ ++ asm volatile ("" : : : "memory"); ++ return x + y; ++} ++ ++__attribute__((noinline, noclone)) void ++f1 (void) ++{ ++ char buf[64]; ++ foo (buf); ++} ++ ++__attribute__((noinline, noclone)) void ++f2 (void) ++{ ++ char buf[12000]; ++ foo (buf); ++} ++ ++__attribute__((noinline, noclone)) void ++f3 (void) ++{ ++ char buf[131072]; ++ foo (buf); ++} ++ ++__attribute__((noinline, noclone)) void ++f4 (int x) ++{ ++ char vla[x]; ++ foo (vla); ++} ++ ++__attribute__((noinline, noclone)) void ++f5 (int x) ++{ ++ char buf[12000]; ++ foo (buf); ++ { ++ char vla[x]; ++ foo (vla); ++ } ++ { ++ char vla[x]; ++ foo (vla); ++ } ++} ++ ++V v; ++ ++__attribute__((noinline, noclone)) int ++f6 (int x, int y, V a, V b, V c) ++{ ++ asm volatile ("" : : : "memory"); ++ v = a + b + c; ++ return x + y; ++} ++ ++__attribute__((noinline, noclone)) void ++f7 (V a, V b, V c) ++{ ++ char buf[64]; ++ foo (buf); ++ v = a + b + c; ++} ++ ++__attribute__((noinline, noclone)) void ++f8 (V a, V b, V c) ++{ ++ char buf[12000]; ++ foo (buf); ++ v = a + b + c; ++} ++ ++__attribute__((noinline, noclone)) void ++f9 (V a, V b, V c) ++{ ++ char buf[131072]; ++ foo (buf); ++ v = a + b + c; ++} ++ ++__attribute__((noinline, noclone)) void ++f10 (int x, V a, V b, V c) ++{ ++ char vla[x]; ++ foo (vla); ++ v = a + b + c; ++} ++ ++__attribute__((noinline, noclone)) void ++f11 (int x, V a, V b, V c) ++{ ++ char buf[12000]; ++ foo (buf); ++ v = a + b + c; ++ { ++ char vla[x]; ++ foo (vla); ++ } ++ { ++ char vla[x]; ++ foo (vla); ++ } ++} ++ ++int ++main () ++{ ++ f0 (2, 3); ++ f1 (); ++ f2 (); ++ f3 (); ++ f4 (12000); ++ f5 (12000); ++ f6 (2, 3, v, v, v); ++ f7 (v, v, v); ++ f8 (v, v, v); ++ f9 (v, v, v); ++ f10 (12000, v, v, v); ++ f11 (12000, v, v, v); ++ return 0; ++} ++ +diff --git a/gcc/testsuite/gcc.dg/stack-check-9.c b/gcc/testsuite/gcc.dg/stack-check-9.c +new file mode 100644 +index 00000000000..b84075b9b43 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/stack-check-9.c +@@ -0,0 +1,2022 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection -fdump-rtl-pro_and_epilogue -fno-optimize-sibling-calls --param stack-clash-protection-probe-interval=12 --param stack-clash-protection-guard-size=12" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++double f1 (void); ++double f2 (double, double); ++ ++double ++f3 (void) ++{ ++ double d000 = f1 (); ++ double d001 = f1 (); ++ double d002 = f1 (); ++ double d003 = f1 (); ++ double d004 = f1 (); ++ double d005 = f1 (); ++ double d006 = f1 (); ++ double d007 = f1 (); ++ double d008 = f1 (); ++ double d009 = f1 (); ++ double d010 = f1 (); ++ double d011 = f1 (); ++ double d012 = f1 (); ++ double d013 = f1 (); ++ double d014 = f1 (); ++ double d015 = f1 (); ++ double d016 = f1 (); ++ double d017 = f1 (); ++ double d018 = f1 (); ++ double d019 = f1 (); ++ double d020 = f1 (); ++ double d021 = f1 (); ++ double d022 = f1 (); ++ double d023 = f1 (); ++ double d024 = f1 (); ++ double d025 = f1 (); ++ double d026 = f1 (); ++ double d027 = f1 (); ++ double d028 = f1 (); ++ double d029 = f1 (); ++ double d030 = f1 (); ++ double d031 = f1 (); ++ double d032 = f1 (); ++ double d033 = f1 (); ++ double d034 = f1 (); ++ double d035 = f1 (); ++ double d036 = f1 (); ++ double d037 = f1 (); ++ double d038 = f1 (); ++ double d039 = f1 (); ++ double d040 = f1 (); ++ double d041 = f1 (); ++ double d042 = f1 (); ++ double d043 = f1 (); ++ double d044 = f1 (); ++ double d045 = f1 (); ++ double d046 = f1 (); ++ double d047 = f1 (); ++ double d048 = f1 (); ++ double d049 = f1 (); ++ double d050 = f1 (); ++ double d051 = f1 (); ++ double d052 = f1 (); ++ double d053 = f1 (); ++ double d054 = f1 (); ++ double d055 = f1 (); ++ double d056 = f1 (); ++ double d057 = f1 (); ++ double d058 = f1 (); ++ double d059 = f1 (); ++ double d060 = f1 (); ++ double d061 = f1 (); ++ double d062 = f1 (); ++ double d063 = f1 (); ++ double d064 = f1 (); ++ double d065 = f1 (); ++ double d066 = f1 (); ++ double d067 = f1 (); ++ double d068 = f1 (); ++ double d069 = f1 (); ++ double d070 = f1 (); ++ double d071 = f1 (); ++ double d072 = f1 (); ++ double d073 = f1 (); ++ double d074 = f1 (); ++ double d075 = f1 (); ++ double d076 = f1 (); ++ double d077 = f1 (); ++ double d078 = f1 (); ++ double d079 = f1 (); ++ double d080 = f1 (); ++ double d081 = f1 (); ++ double d082 = f1 (); ++ double d083 = f1 (); ++ double d084 = f1 (); ++ double d085 = f1 (); ++ double d086 = f1 (); ++ double d087 = f1 (); ++ double d088 = f1 (); ++ double d089 = f1 (); ++ double d090 = f1 (); ++ double d091 = f1 (); ++ double d092 = f1 (); ++ double d093 = f1 (); ++ double d094 = f1 (); ++ double d095 = f1 (); ++ double d096 = f1 (); ++ double d097 = f1 (); ++ double d098 = f1 (); ++ double d099 = f1 (); ++ double d100 = f1 (); ++ double d101 = f1 (); ++ double d102 = f1 (); ++ double d103 = f1 (); ++ double d104 = f1 (); ++ double d105 = f1 (); ++ double d106 = f1 (); ++ double d107 = f1 (); ++ double d108 = f1 (); ++ double d109 = f1 (); ++ double d110 = f1 (); ++ double d111 = f1 (); ++ double d112 = f1 (); ++ double d113 = f1 (); ++ double d114 = f1 (); ++ double d115 = f1 (); ++ double d116 = f1 (); ++ double d117 = f1 (); ++ double d118 = f1 (); ++ double d119 = f1 (); ++ double d120 = f1 (); ++ double d121 = f1 (); ++ double d122 = f1 (); ++ double d123 = f1 (); ++ double d124 = f1 (); ++ double d125 = f1 (); ++ double d126 = f1 (); ++ double d127 = f1 (); ++ double d128 = f1 (); ++ double d129 = f1 (); ++ double d130 = f1 (); ++ double d131 = f1 (); ++ double d132 = f1 (); ++ double d133 = f1 (); ++ double d134 = f1 (); ++ double d135 = f1 (); ++ double d136 = f1 (); ++ double d137 = f1 (); ++ double d138 = f1 (); ++ double d139 = f1 (); ++ double d140 = f1 (); ++ double d141 = f1 (); ++ double d142 = f1 (); ++ double d143 = f1 (); ++ double d144 = f1 (); ++ double d145 = f1 (); ++ double d146 = f1 (); ++ double d147 = f1 (); ++ double d148 = f1 (); ++ double d149 = f1 (); ++ double d150 = f1 (); ++ double d151 = f1 (); ++ double d152 = f1 (); ++ double d153 = f1 (); ++ double d154 = f1 (); ++ double d155 = f1 (); ++ double d156 = f1 (); ++ double d157 = f1 (); ++ double d158 = f1 (); ++ double d159 = f1 (); ++ double d160 = f1 (); ++ double d161 = f1 (); ++ double d162 = f1 (); ++ double d163 = f1 (); ++ double d164 = f1 (); ++ double d165 = f1 (); ++ double d166 = f1 (); ++ double d167 = f1 (); ++ double d168 = f1 (); ++ double d169 = f1 (); ++ double d170 = f1 (); ++ double d171 = f1 (); ++ double d172 = f1 (); ++ double d173 = f1 (); ++ double d174 = f1 (); ++ double d175 = f1 (); ++ double d176 = f1 (); ++ double d177 = f1 (); ++ double d178 = f1 (); ++ double d179 = f1 (); ++ double d180 = f1 (); ++ double d181 = f1 (); ++ double d182 = f1 (); ++ double d183 = f1 (); ++ double d184 = f1 (); ++ double d185 = f1 (); ++ double d186 = f1 (); ++ double d187 = f1 (); ++ double d188 = f1 (); ++ double d189 = f1 (); ++ double d190 = f1 (); ++ double d191 = f1 (); ++ double d192 = f1 (); ++ double d193 = f1 (); ++ double d194 = f1 (); ++ double d195 = f1 (); ++ double d196 = f1 (); ++ double d197 = f1 (); ++ double d198 = f1 (); ++ double d199 = f1 (); ++ double d200 = f1 (); ++ double d201 = f1 (); ++ double d202 = f1 (); ++ double d203 = f1 (); ++ double d204 = f1 (); ++ double d205 = f1 (); ++ double d206 = f1 (); ++ double d207 = f1 (); ++ double d208 = f1 (); ++ double d209 = f1 (); ++ double d210 = f1 (); ++ double d211 = f1 (); ++ double d212 = f1 (); ++ double d213 = f1 (); ++ double d214 = f1 (); ++ double d215 = f1 (); ++ double d216 = f1 (); ++ double d217 = f1 (); ++ double d218 = f1 (); ++ double d219 = f1 (); ++ double d220 = f1 (); ++ double d221 = f1 (); ++ double d222 = f1 (); ++ double d223 = f1 (); ++ double d224 = f1 (); ++ double d225 = f1 (); ++ double d226 = f1 (); ++ double d227 = f1 (); ++ double d228 = f1 (); ++ double d229 = f1 (); ++ double d230 = f1 (); ++ double d231 = f1 (); ++ double d232 = f1 (); ++ double d233 = f1 (); ++ double d234 = f1 (); ++ double d235 = f1 (); ++ double d236 = f1 (); ++ double d237 = f1 (); ++ double d238 = f1 (); ++ double d239 = f1 (); ++ double d240 = f1 (); ++ double d241 = f1 (); ++ double d242 = f1 (); ++ double d243 = f1 (); ++ double d244 = f1 (); ++ double d245 = f1 (); ++ double d246 = f1 (); ++ double d247 = f1 (); ++ double d248 = f1 (); ++ double d249 = f1 (); ++ double d250 = f1 (); ++ double d251 = f1 (); ++ double d252 = f1 (); ++ double d253 = f1 (); ++ double d254 = f1 (); ++ double d255 = f1 (); ++ double d256 = f1 (); ++ double d257 = f1 (); ++ double d258 = f1 (); ++ double d259 = f1 (); ++ double d260 = f1 (); ++ double d261 = f1 (); ++ double d262 = f1 (); ++ double d263 = f1 (); ++ double d264 = f1 (); ++ double d265 = f1 (); ++ double d266 = f1 (); ++ double d267 = f1 (); ++ double d268 = f1 (); ++ double d269 = f1 (); ++ double d270 = f1 (); ++ double d271 = f1 (); ++ double d272 = f1 (); ++ double d273 = f1 (); ++ double d274 = f1 (); ++ double d275 = f1 (); ++ double d276 = f1 (); ++ double d277 = f1 (); ++ double d278 = f1 (); ++ double d279 = f1 (); ++ double d280 = f1 (); ++ double d281 = f1 (); ++ double d282 = f1 (); ++ double d283 = f1 (); ++ double d284 = f1 (); ++ double d285 = f1 (); ++ double d286 = f1 (); ++ double d287 = f1 (); ++ double d288 = f1 (); ++ double d289 = f1 (); ++ double d290 = f1 (); ++ double d291 = f1 (); ++ double d292 = f1 (); ++ double d293 = f1 (); ++ double d294 = f1 (); ++ double d295 = f1 (); ++ double d296 = f1 (); ++ double d297 = f1 (); ++ double d298 = f1 (); ++ double d299 = f1 (); ++ double d300 = f1 (); ++ double d301 = f1 (); ++ double d302 = f1 (); ++ double d303 = f1 (); ++ double d304 = f1 (); ++ double d305 = f1 (); ++ double d306 = f1 (); ++ double d307 = f1 (); ++ double d308 = f1 (); ++ double d309 = f1 (); ++ double d310 = f1 (); ++ double d311 = f1 (); ++ double d312 = f1 (); ++ double d313 = f1 (); ++ double d314 = f1 (); ++ double d315 = f1 (); ++ double d316 = f1 (); ++ double d317 = f1 (); ++ double d318 = f1 (); ++ double d319 = f1 (); ++ double d320 = f1 (); ++ double d321 = f1 (); ++ double d322 = f1 (); ++ double d323 = f1 (); ++ double d324 = f1 (); ++ double d325 = f1 (); ++ double d326 = f1 (); ++ double d327 = f1 (); ++ double d328 = f1 (); ++ double d329 = f1 (); ++ double d330 = f1 (); ++ double d331 = f1 (); ++ double d332 = f1 (); ++ double d333 = f1 (); ++ double d334 = f1 (); ++ double d335 = f1 (); ++ double d336 = f1 (); ++ double d337 = f1 (); ++ double d338 = f1 (); ++ double d339 = f1 (); ++ double d340 = f1 (); ++ double d341 = f1 (); ++ double d342 = f1 (); ++ double d343 = f1 (); ++ double d344 = f1 (); ++ double d345 = f1 (); ++ double d346 = f1 (); ++ double d347 = f1 (); ++ double d348 = f1 (); ++ double d349 = f1 (); ++ double d350 = f1 (); ++ double d351 = f1 (); ++ double d352 = f1 (); ++ double d353 = f1 (); ++ double d354 = f1 (); ++ double d355 = f1 (); ++ double d356 = f1 (); ++ double d357 = f1 (); ++ double d358 = f1 (); ++ double d359 = f1 (); ++ double d360 = f1 (); ++ double d361 = f1 (); ++ double d362 = f1 (); ++ double d363 = f1 (); ++ double d364 = f1 (); ++ double d365 = f1 (); ++ double d366 = f1 (); ++ double d367 = f1 (); ++ double d368 = f1 (); ++ double d369 = f1 (); ++ double d370 = f1 (); ++ double d371 = f1 (); ++ double d372 = f1 (); ++ double d373 = f1 (); ++ double d374 = f1 (); ++ double d375 = f1 (); ++ double d376 = f1 (); ++ double d377 = f1 (); ++ double d378 = f1 (); ++ double d379 = f1 (); ++ double d380 = f1 (); ++ double d381 = f1 (); ++ double d382 = f1 (); ++ double d383 = f1 (); ++ double d384 = f1 (); ++ double d385 = f1 (); ++ double d386 = f1 (); ++ double d387 = f1 (); ++ double d388 = f1 (); ++ double d389 = f1 (); ++ double d390 = f1 (); ++ double d391 = f1 (); ++ double d392 = f1 (); ++ double d393 = f1 (); ++ double d394 = f1 (); ++ double d395 = f1 (); ++ double d396 = f1 (); ++ double d397 = f1 (); ++ double d398 = f1 (); ++ double d399 = f1 (); ++ double d400 = f1 (); ++ double d401 = f1 (); ++ double d402 = f1 (); ++ double d403 = f1 (); ++ double d404 = f1 (); ++ double d405 = f1 (); ++ double d406 = f1 (); ++ double d407 = f1 (); ++ double d408 = f1 (); ++ double d409 = f1 (); ++ double d410 = f1 (); ++ double d411 = f1 (); ++ double d412 = f1 (); ++ double d413 = f1 (); ++ double d414 = f1 (); ++ double d415 = f1 (); ++ double d416 = f1 (); ++ double d417 = f1 (); ++ double d418 = f1 (); ++ double d419 = f1 (); ++ double d420 = f1 (); ++ double d421 = f1 (); ++ double d422 = f1 (); ++ double d423 = f1 (); ++ double d424 = f1 (); ++ double d425 = f1 (); ++ double d426 = f1 (); ++ double d427 = f1 (); ++ double d428 = f1 (); ++ double d429 = f1 (); ++ double d430 = f1 (); ++ double d431 = f1 (); ++ double d432 = f1 (); ++ double d433 = f1 (); ++ double d434 = f1 (); ++ double d435 = f1 (); ++ double d436 = f1 (); ++ double d437 = f1 (); ++ double d438 = f1 (); ++ double d439 = f1 (); ++ double d440 = f1 (); ++ double d441 = f1 (); ++ double d442 = f1 (); ++ double d443 = f1 (); ++ double d444 = f1 (); ++ double d445 = f1 (); ++ double d446 = f1 (); ++ double d447 = f1 (); ++ double d448 = f1 (); ++ double d449 = f1 (); ++ double d450 = f1 (); ++ double d451 = f1 (); ++ double d452 = f1 (); ++ double d453 = f1 (); ++ double d454 = f1 (); ++ double d455 = f1 (); ++ double d456 = f1 (); ++ double d457 = f1 (); ++ double d458 = f1 (); ++ double d459 = f1 (); ++ double d460 = f1 (); ++ double d461 = f1 (); ++ double d462 = f1 (); ++ double d463 = f1 (); ++ double d464 = f1 (); ++ double d465 = f1 (); ++ double d466 = f1 (); ++ double d467 = f1 (); ++ double d468 = f1 (); ++ double d469 = f1 (); ++ double d470 = f1 (); ++ double d471 = f1 (); ++ double d472 = f1 (); ++ double d473 = f1 (); ++ double d474 = f1 (); ++ double d475 = f1 (); ++ double d476 = f1 (); ++ double d477 = f1 (); ++ double d478 = f1 (); ++ double d479 = f1 (); ++ double d480 = f1 (); ++ double d481 = f1 (); ++ double d482 = f1 (); ++ double d483 = f1 (); ++ double d484 = f1 (); ++ double d485 = f1 (); ++ double d486 = f1 (); ++ double d487 = f1 (); ++ double d488 = f1 (); ++ double d489 = f1 (); ++ double d490 = f1 (); ++ double d491 = f1 (); ++ double d492 = f1 (); ++ double d493 = f1 (); ++ double d494 = f1 (); ++ double d495 = f1 (); ++ double d496 = f1 (); ++ double d497 = f1 (); ++ double d498 = f1 (); ++ double d499 = f1 (); ++ double d500 = f1 (); ++ double d501 = f1 (); ++ double d502 = f1 (); ++ double d503 = f1 (); ++ double d504 = f1 (); ++ double d505 = f1 (); ++ double d506 = f1 (); ++ double d507 = f1 (); ++ double d508 = f1 (); ++ double d509 = f1 (); ++ double d510 = f1 (); ++ double d511 = f1 (); ++ double d512 = f1 (); ++ double d513 = f1 (); ++ double d514 = f1 (); ++ double d515 = f1 (); ++ double d516 = f1 (); ++ double d517 = f1 (); ++ double d518 = f1 (); ++ double d519 = f1 (); ++ double d520 = f1 (); ++ double d521 = f1 (); ++ double d522 = f1 (); ++ double d523 = f1 (); ++ double d524 = f1 (); ++ double d525 = f1 (); ++ double d526 = f1 (); ++ double d527 = f1 (); ++ double d528 = f1 (); ++ double d529 = f1 (); ++ double d530 = f1 (); ++ double d531 = f1 (); ++ double d532 = f1 (); ++ double d533 = f1 (); ++ double d534 = f1 (); ++ double d535 = f1 (); ++ double d536 = f1 (); ++ double d537 = f1 (); ++ double d538 = f1 (); ++ double d539 = f1 (); ++ double d540 = f1 (); ++ double d541 = f1 (); ++ double d542 = f1 (); ++ double d543 = f1 (); ++ double d544 = f1 (); ++ double d545 = f1 (); ++ double d546 = f1 (); ++ double d547 = f1 (); ++ double d548 = f1 (); ++ double d549 = f1 (); ++ double d550 = f1 (); ++ double d551 = f1 (); ++ double d552 = f1 (); ++ double d553 = f1 (); ++ double d554 = f1 (); ++ double d555 = f1 (); ++ double d556 = f1 (); ++ double d557 = f1 (); ++ double d558 = f1 (); ++ double d559 = f1 (); ++ double d560 = f1 (); ++ double d561 = f1 (); ++ double d562 = f1 (); ++ double d563 = f1 (); ++ double d564 = f1 (); ++ double d565 = f1 (); ++ double d566 = f1 (); ++ double d567 = f1 (); ++ double d568 = f1 (); ++ double d569 = f1 (); ++ double d570 = f1 (); ++ double d571 = f1 (); ++ double d572 = f1 (); ++ double d573 = f1 (); ++ double d574 = f1 (); ++ double d575 = f1 (); ++ double d576 = f1 (); ++ double d577 = f1 (); ++ double d578 = f1 (); ++ double d579 = f1 (); ++ double d580 = f1 (); ++ double d581 = f1 (); ++ double d582 = f1 (); ++ double d583 = f1 (); ++ double d584 = f1 (); ++ double d585 = f1 (); ++ double d586 = f1 (); ++ double d587 = f1 (); ++ double d588 = f1 (); ++ double d589 = f1 (); ++ double d590 = f1 (); ++ double d591 = f1 (); ++ double d592 = f1 (); ++ double d593 = f1 (); ++ double d594 = f1 (); ++ double d595 = f1 (); ++ double d596 = f1 (); ++ double d597 = f1 (); ++ double d598 = f1 (); ++ double d599 = f1 (); ++ double d600 = f1 (); ++ double d601 = f1 (); ++ double d602 = f1 (); ++ double d603 = f1 (); ++ double d604 = f1 (); ++ double d605 = f1 (); ++ double d606 = f1 (); ++ double d607 = f1 (); ++ double d608 = f1 (); ++ double d609 = f1 (); ++ double d610 = f1 (); ++ double d611 = f1 (); ++ double d612 = f1 (); ++ double d613 = f1 (); ++ double d614 = f1 (); ++ double d615 = f1 (); ++ double d616 = f1 (); ++ double d617 = f1 (); ++ double d618 = f1 (); ++ double d619 = f1 (); ++ double d620 = f1 (); ++ double d621 = f1 (); ++ double d622 = f1 (); ++ double d623 = f1 (); ++ double d624 = f1 (); ++ double d625 = f1 (); ++ double d626 = f1 (); ++ double d627 = f1 (); ++ double d628 = f1 (); ++ double d629 = f1 (); ++ double d630 = f1 (); ++ double d631 = f1 (); ++ double d632 = f1 (); ++ double d633 = f1 (); ++ double d634 = f1 (); ++ double d635 = f1 (); ++ double d636 = f1 (); ++ double d637 = f1 (); ++ double d638 = f1 (); ++ double d639 = f1 (); ++ double d640 = f1 (); ++ double d641 = f1 (); ++ double d642 = f1 (); ++ double d643 = f1 (); ++ double d644 = f1 (); ++ double d645 = f1 (); ++ double d646 = f1 (); ++ double d647 = f1 (); ++ double d648 = f1 (); ++ double d649 = f1 (); ++ double d650 = f1 (); ++ double d651 = f1 (); ++ double d652 = f1 (); ++ double d653 = f1 (); ++ double d654 = f1 (); ++ double d655 = f1 (); ++ double d656 = f1 (); ++ double d657 = f1 (); ++ double d658 = f1 (); ++ double d659 = f1 (); ++ double d660 = f1 (); ++ double d661 = f1 (); ++ double d662 = f1 (); ++ double d663 = f1 (); ++ double d664 = f1 (); ++ double d665 = f1 (); ++ double d666 = f1 (); ++ double d667 = f1 (); ++ double d668 = f1 (); ++ double d669 = f1 (); ++ double d670 = f1 (); ++ double d671 = f1 (); ++ double d672 = f1 (); ++ double d673 = f1 (); ++ double d674 = f1 (); ++ double d675 = f1 (); ++ double d676 = f1 (); ++ double d677 = f1 (); ++ double d678 = f1 (); ++ double d679 = f1 (); ++ double d680 = f1 (); ++ double d681 = f1 (); ++ double d682 = f1 (); ++ double d683 = f1 (); ++ double d684 = f1 (); ++ double d685 = f1 (); ++ double d686 = f1 (); ++ double d687 = f1 (); ++ double d688 = f1 (); ++ double d689 = f1 (); ++ double d690 = f1 (); ++ double d691 = f1 (); ++ double d692 = f1 (); ++ double d693 = f1 (); ++ double d694 = f1 (); ++ double d695 = f1 (); ++ double d696 = f1 (); ++ double d697 = f1 (); ++ double d698 = f1 (); ++ double d699 = f1 (); ++ double d700 = f1 (); ++ double d701 = f1 (); ++ double d702 = f1 (); ++ double d703 = f1 (); ++ double d704 = f1 (); ++ double d705 = f1 (); ++ double d706 = f1 (); ++ double d707 = f1 (); ++ double d708 = f1 (); ++ double d709 = f1 (); ++ double d710 = f1 (); ++ double d711 = f1 (); ++ double d712 = f1 (); ++ double d713 = f1 (); ++ double d714 = f1 (); ++ double d715 = f1 (); ++ double d716 = f1 (); ++ double d717 = f1 (); ++ double d718 = f1 (); ++ double d719 = f1 (); ++ double d720 = f1 (); ++ double d721 = f1 (); ++ double d722 = f1 (); ++ double d723 = f1 (); ++ double d724 = f1 (); ++ double d725 = f1 (); ++ double d726 = f1 (); ++ double d727 = f1 (); ++ double d728 = f1 (); ++ double d729 = f1 (); ++ double d730 = f1 (); ++ double d731 = f1 (); ++ double d732 = f1 (); ++ double d733 = f1 (); ++ double d734 = f1 (); ++ double d735 = f1 (); ++ double d736 = f1 (); ++ double d737 = f1 (); ++ double d738 = f1 (); ++ double d739 = f1 (); ++ double d740 = f1 (); ++ double d741 = f1 (); ++ double d742 = f1 (); ++ double d743 = f1 (); ++ double d744 = f1 (); ++ double d745 = f1 (); ++ double d746 = f1 (); ++ double d747 = f1 (); ++ double d748 = f1 (); ++ double d749 = f1 (); ++ double d750 = f1 (); ++ double d751 = f1 (); ++ double d752 = f1 (); ++ double d753 = f1 (); ++ double d754 = f1 (); ++ double d755 = f1 (); ++ double d756 = f1 (); ++ double d757 = f1 (); ++ double d758 = f1 (); ++ double d759 = f1 (); ++ double d760 = f1 (); ++ double d761 = f1 (); ++ double d762 = f1 (); ++ double d763 = f1 (); ++ double d764 = f1 (); ++ double d765 = f1 (); ++ double d766 = f1 (); ++ double d767 = f1 (); ++ double d768 = f1 (); ++ double d769 = f1 (); ++ double d770 = f1 (); ++ double d771 = f1 (); ++ double d772 = f1 (); ++ double d773 = f1 (); ++ double d774 = f1 (); ++ double d775 = f1 (); ++ double d776 = f1 (); ++ double d777 = f1 (); ++ double d778 = f1 (); ++ double d779 = f1 (); ++ double d780 = f1 (); ++ double d781 = f1 (); ++ double d782 = f1 (); ++ double d783 = f1 (); ++ double d784 = f1 (); ++ double d785 = f1 (); ++ double d786 = f1 (); ++ double d787 = f1 (); ++ double d788 = f1 (); ++ double d789 = f1 (); ++ double d790 = f1 (); ++ double d791 = f1 (); ++ double d792 = f1 (); ++ double d793 = f1 (); ++ double d794 = f1 (); ++ double d795 = f1 (); ++ double d796 = f1 (); ++ double d797 = f1 (); ++ double d798 = f1 (); ++ double d799 = f1 (); ++ double d800 = f1 (); ++ double d801 = f1 (); ++ double d802 = f1 (); ++ double d803 = f1 (); ++ double d804 = f1 (); ++ double d805 = f1 (); ++ double d806 = f1 (); ++ double d807 = f1 (); ++ double d808 = f1 (); ++ double d809 = f1 (); ++ double d810 = f1 (); ++ double d811 = f1 (); ++ double d812 = f1 (); ++ double d813 = f1 (); ++ double d814 = f1 (); ++ double d815 = f1 (); ++ double d816 = f1 (); ++ double d817 = f1 (); ++ double d818 = f1 (); ++ double d819 = f1 (); ++ double d820 = f1 (); ++ double d821 = f1 (); ++ double d822 = f1 (); ++ double d823 = f1 (); ++ double d824 = f1 (); ++ double d825 = f1 (); ++ double d826 = f1 (); ++ double d827 = f1 (); ++ double d828 = f1 (); ++ double d829 = f1 (); ++ double d830 = f1 (); ++ double d831 = f1 (); ++ double d832 = f1 (); ++ double d833 = f1 (); ++ double d834 = f1 (); ++ double d835 = f1 (); ++ double d836 = f1 (); ++ double d837 = f1 (); ++ double d838 = f1 (); ++ double d839 = f1 (); ++ double d840 = f1 (); ++ double d841 = f1 (); ++ double d842 = f1 (); ++ double d843 = f1 (); ++ double d844 = f1 (); ++ double d845 = f1 (); ++ double d846 = f1 (); ++ double d847 = f1 (); ++ double d848 = f1 (); ++ double d849 = f1 (); ++ double d850 = f1 (); ++ double d851 = f1 (); ++ double d852 = f1 (); ++ double d853 = f1 (); ++ double d854 = f1 (); ++ double d855 = f1 (); ++ double d856 = f1 (); ++ double d857 = f1 (); ++ double d858 = f1 (); ++ double d859 = f1 (); ++ double d860 = f1 (); ++ double d861 = f1 (); ++ double d862 = f1 (); ++ double d863 = f1 (); ++ double d864 = f1 (); ++ double d865 = f1 (); ++ double d866 = f1 (); ++ double d867 = f1 (); ++ double d868 = f1 (); ++ double d869 = f1 (); ++ double d870 = f1 (); ++ double d871 = f1 (); ++ double d872 = f1 (); ++ double d873 = f1 (); ++ double d874 = f1 (); ++ double d875 = f1 (); ++ double d876 = f1 (); ++ double d877 = f1 (); ++ double d878 = f1 (); ++ double d879 = f1 (); ++ double d880 = f1 (); ++ double d881 = f1 (); ++ double d882 = f1 (); ++ double d883 = f1 (); ++ double d884 = f1 (); ++ double d885 = f1 (); ++ double d886 = f1 (); ++ double d887 = f1 (); ++ double d888 = f1 (); ++ double d889 = f1 (); ++ double d890 = f1 (); ++ double d891 = f1 (); ++ double d892 = f1 (); ++ double d893 = f1 (); ++ double d894 = f1 (); ++ double d895 = f1 (); ++ double d896 = f1 (); ++ double d897 = f1 (); ++ double d898 = f1 (); ++ double d899 = f1 (); ++ double d900 = f1 (); ++ double d901 = f1 (); ++ double d902 = f1 (); ++ double d903 = f1 (); ++ double d904 = f1 (); ++ double d905 = f1 (); ++ double d906 = f1 (); ++ double d907 = f1 (); ++ double d908 = f1 (); ++ double d909 = f1 (); ++ double d910 = f1 (); ++ double d911 = f1 (); ++ double d912 = f1 (); ++ double d913 = f1 (); ++ double d914 = f1 (); ++ double d915 = f1 (); ++ double d916 = f1 (); ++ double d917 = f1 (); ++ double d918 = f1 (); ++ double d919 = f1 (); ++ double d920 = f1 (); ++ double d921 = f1 (); ++ double d922 = f1 (); ++ double d923 = f1 (); ++ double d924 = f1 (); ++ double d925 = f1 (); ++ double d926 = f1 (); ++ double d927 = f1 (); ++ double d928 = f1 (); ++ double d929 = f1 (); ++ double d930 = f1 (); ++ double d931 = f1 (); ++ double d932 = f1 (); ++ double d933 = f1 (); ++ double d934 = f1 (); ++ double d935 = f1 (); ++ double d936 = f1 (); ++ double d937 = f1 (); ++ double d938 = f1 (); ++ double d939 = f1 (); ++ double d940 = f1 (); ++ double d941 = f1 (); ++ double d942 = f1 (); ++ double d943 = f1 (); ++ double d944 = f1 (); ++ double d945 = f1 (); ++ double d946 = f1 (); ++ double d947 = f1 (); ++ double d948 = f1 (); ++ double d949 = f1 (); ++ double d950 = f1 (); ++ double d951 = f1 (); ++ double d952 = f1 (); ++ double d953 = f1 (); ++ double d954 = f1 (); ++ double d955 = f1 (); ++ double d956 = f1 (); ++ double d957 = f1 (); ++ double d958 = f1 (); ++ double d959 = f1 (); ++ double d960 = f1 (); ++ double d961 = f1 (); ++ double d962 = f1 (); ++ double d963 = f1 (); ++ double d964 = f1 (); ++ double d965 = f1 (); ++ double d966 = f1 (); ++ double d967 = f1 (); ++ double d968 = f1 (); ++ double d969 = f1 (); ++ double d970 = f1 (); ++ double d971 = f1 (); ++ double d972 = f1 (); ++ double d973 = f1 (); ++ double d974 = f1 (); ++ double d975 = f1 (); ++ double d976 = f1 (); ++ double d977 = f1 (); ++ double d978 = f1 (); ++ double d979 = f1 (); ++ double d980 = f1 (); ++ double d981 = f1 (); ++ double d982 = f1 (); ++ double d983 = f1 (); ++ double d984 = f1 (); ++ double d985 = f1 (); ++ double d986 = f1 (); ++ double d987 = f1 (); ++ double d988 = f1 (); ++ double d989 = f1 (); ++ double d990 = f1 (); ++ double d991 = f1 (); ++ double d992 = f1 (); ++ double d993 = f1 (); ++ double d994 = f1 (); ++ double d995 = f1 (); ++ double d996 = f1 (); ++ double d997 = f1 (); ++ double d998 = f1 (); ++ double d999 = f1 (); ++ ++ double x = 0; ++ x = f2 (x, d000); ++ x = f2 (x, d001); ++ x = f2 (x, d002); ++ x = f2 (x, d003); ++ x = f2 (x, d004); ++ x = f2 (x, d005); ++ x = f2 (x, d006); ++ x = f2 (x, d007); ++ x = f2 (x, d008); ++ x = f2 (x, d009); ++ x = f2 (x, d010); ++ x = f2 (x, d011); ++ x = f2 (x, d012); ++ x = f2 (x, d013); ++ x = f2 (x, d014); ++ x = f2 (x, d015); ++ x = f2 (x, d016); ++ x = f2 (x, d017); ++ x = f2 (x, d018); ++ x = f2 (x, d019); ++ x = f2 (x, d020); ++ x = f2 (x, d021); ++ x = f2 (x, d022); ++ x = f2 (x, d023); ++ x = f2 (x, d024); ++ x = f2 (x, d025); ++ x = f2 (x, d026); ++ x = f2 (x, d027); ++ x = f2 (x, d028); ++ x = f2 (x, d029); ++ x = f2 (x, d030); ++ x = f2 (x, d031); ++ x = f2 (x, d032); ++ x = f2 (x, d033); ++ x = f2 (x, d034); ++ x = f2 (x, d035); ++ x = f2 (x, d036); ++ x = f2 (x, d037); ++ x = f2 (x, d038); ++ x = f2 (x, d039); ++ x = f2 (x, d040); ++ x = f2 (x, d041); ++ x = f2 (x, d042); ++ x = f2 (x, d043); ++ x = f2 (x, d044); ++ x = f2 (x, d045); ++ x = f2 (x, d046); ++ x = f2 (x, d047); ++ x = f2 (x, d048); ++ x = f2 (x, d049); ++ x = f2 (x, d050); ++ x = f2 (x, d051); ++ x = f2 (x, d052); ++ x = f2 (x, d053); ++ x = f2 (x, d054); ++ x = f2 (x, d055); ++ x = f2 (x, d056); ++ x = f2 (x, d057); ++ x = f2 (x, d058); ++ x = f2 (x, d059); ++ x = f2 (x, d060); ++ x = f2 (x, d061); ++ x = f2 (x, d062); ++ x = f2 (x, d063); ++ x = f2 (x, d064); ++ x = f2 (x, d065); ++ x = f2 (x, d066); ++ x = f2 (x, d067); ++ x = f2 (x, d068); ++ x = f2 (x, d069); ++ x = f2 (x, d070); ++ x = f2 (x, d071); ++ x = f2 (x, d072); ++ x = f2 (x, d073); ++ x = f2 (x, d074); ++ x = f2 (x, d075); ++ x = f2 (x, d076); ++ x = f2 (x, d077); ++ x = f2 (x, d078); ++ x = f2 (x, d079); ++ x = f2 (x, d080); ++ x = f2 (x, d081); ++ x = f2 (x, d082); ++ x = f2 (x, d083); ++ x = f2 (x, d084); ++ x = f2 (x, d085); ++ x = f2 (x, d086); ++ x = f2 (x, d087); ++ x = f2 (x, d088); ++ x = f2 (x, d089); ++ x = f2 (x, d090); ++ x = f2 (x, d091); ++ x = f2 (x, d092); ++ x = f2 (x, d093); ++ x = f2 (x, d094); ++ x = f2 (x, d095); ++ x = f2 (x, d096); ++ x = f2 (x, d097); ++ x = f2 (x, d098); ++ x = f2 (x, d099); ++ x = f2 (x, d100); ++ x = f2 (x, d101); ++ x = f2 (x, d102); ++ x = f2 (x, d103); ++ x = f2 (x, d104); ++ x = f2 (x, d105); ++ x = f2 (x, d106); ++ x = f2 (x, d107); ++ x = f2 (x, d108); ++ x = f2 (x, d109); ++ x = f2 (x, d110); ++ x = f2 (x, d111); ++ x = f2 (x, d112); ++ x = f2 (x, d113); ++ x = f2 (x, d114); ++ x = f2 (x, d115); ++ x = f2 (x, d116); ++ x = f2 (x, d117); ++ x = f2 (x, d118); ++ x = f2 (x, d119); ++ x = f2 (x, d120); ++ x = f2 (x, d121); ++ x = f2 (x, d122); ++ x = f2 (x, d123); ++ x = f2 (x, d124); ++ x = f2 (x, d125); ++ x = f2 (x, d126); ++ x = f2 (x, d127); ++ x = f2 (x, d128); ++ x = f2 (x, d129); ++ x = f2 (x, d130); ++ x = f2 (x, d131); ++ x = f2 (x, d132); ++ x = f2 (x, d133); ++ x = f2 (x, d134); ++ x = f2 (x, d135); ++ x = f2 (x, d136); ++ x = f2 (x, d137); ++ x = f2 (x, d138); ++ x = f2 (x, d139); ++ x = f2 (x, d140); ++ x = f2 (x, d141); ++ x = f2 (x, d142); ++ x = f2 (x, d143); ++ x = f2 (x, d144); ++ x = f2 (x, d145); ++ x = f2 (x, d146); ++ x = f2 (x, d147); ++ x = f2 (x, d148); ++ x = f2 (x, d149); ++ x = f2 (x, d150); ++ x = f2 (x, d151); ++ x = f2 (x, d152); ++ x = f2 (x, d153); ++ x = f2 (x, d154); ++ x = f2 (x, d155); ++ x = f2 (x, d156); ++ x = f2 (x, d157); ++ x = f2 (x, d158); ++ x = f2 (x, d159); ++ x = f2 (x, d160); ++ x = f2 (x, d161); ++ x = f2 (x, d162); ++ x = f2 (x, d163); ++ x = f2 (x, d164); ++ x = f2 (x, d165); ++ x = f2 (x, d166); ++ x = f2 (x, d167); ++ x = f2 (x, d168); ++ x = f2 (x, d169); ++ x = f2 (x, d170); ++ x = f2 (x, d171); ++ x = f2 (x, d172); ++ x = f2 (x, d173); ++ x = f2 (x, d174); ++ x = f2 (x, d175); ++ x = f2 (x, d176); ++ x = f2 (x, d177); ++ x = f2 (x, d178); ++ x = f2 (x, d179); ++ x = f2 (x, d180); ++ x = f2 (x, d181); ++ x = f2 (x, d182); ++ x = f2 (x, d183); ++ x = f2 (x, d184); ++ x = f2 (x, d185); ++ x = f2 (x, d186); ++ x = f2 (x, d187); ++ x = f2 (x, d188); ++ x = f2 (x, d189); ++ x = f2 (x, d190); ++ x = f2 (x, d191); ++ x = f2 (x, d192); ++ x = f2 (x, d193); ++ x = f2 (x, d194); ++ x = f2 (x, d195); ++ x = f2 (x, d196); ++ x = f2 (x, d197); ++ x = f2 (x, d198); ++ x = f2 (x, d199); ++ x = f2 (x, d200); ++ x = f2 (x, d201); ++ x = f2 (x, d202); ++ x = f2 (x, d203); ++ x = f2 (x, d204); ++ x = f2 (x, d205); ++ x = f2 (x, d206); ++ x = f2 (x, d207); ++ x = f2 (x, d208); ++ x = f2 (x, d209); ++ x = f2 (x, d210); ++ x = f2 (x, d211); ++ x = f2 (x, d212); ++ x = f2 (x, d213); ++ x = f2 (x, d214); ++ x = f2 (x, d215); ++ x = f2 (x, d216); ++ x = f2 (x, d217); ++ x = f2 (x, d218); ++ x = f2 (x, d219); ++ x = f2 (x, d220); ++ x = f2 (x, d221); ++ x = f2 (x, d222); ++ x = f2 (x, d223); ++ x = f2 (x, d224); ++ x = f2 (x, d225); ++ x = f2 (x, d226); ++ x = f2 (x, d227); ++ x = f2 (x, d228); ++ x = f2 (x, d229); ++ x = f2 (x, d230); ++ x = f2 (x, d231); ++ x = f2 (x, d232); ++ x = f2 (x, d233); ++ x = f2 (x, d234); ++ x = f2 (x, d235); ++ x = f2 (x, d236); ++ x = f2 (x, d237); ++ x = f2 (x, d238); ++ x = f2 (x, d239); ++ x = f2 (x, d240); ++ x = f2 (x, d241); ++ x = f2 (x, d242); ++ x = f2 (x, d243); ++ x = f2 (x, d244); ++ x = f2 (x, d245); ++ x = f2 (x, d246); ++ x = f2 (x, d247); ++ x = f2 (x, d248); ++ x = f2 (x, d249); ++ x = f2 (x, d250); ++ x = f2 (x, d251); ++ x = f2 (x, d252); ++ x = f2 (x, d253); ++ x = f2 (x, d254); ++ x = f2 (x, d255); ++ x = f2 (x, d256); ++ x = f2 (x, d257); ++ x = f2 (x, d258); ++ x = f2 (x, d259); ++ x = f2 (x, d260); ++ x = f2 (x, d261); ++ x = f2 (x, d262); ++ x = f2 (x, d263); ++ x = f2 (x, d264); ++ x = f2 (x, d265); ++ x = f2 (x, d266); ++ x = f2 (x, d267); ++ x = f2 (x, d268); ++ x = f2 (x, d269); ++ x = f2 (x, d270); ++ x = f2 (x, d271); ++ x = f2 (x, d272); ++ x = f2 (x, d273); ++ x = f2 (x, d274); ++ x = f2 (x, d275); ++ x = f2 (x, d276); ++ x = f2 (x, d277); ++ x = f2 (x, d278); ++ x = f2 (x, d279); ++ x = f2 (x, d280); ++ x = f2 (x, d281); ++ x = f2 (x, d282); ++ x = f2 (x, d283); ++ x = f2 (x, d284); ++ x = f2 (x, d285); ++ x = f2 (x, d286); ++ x = f2 (x, d287); ++ x = f2 (x, d288); ++ x = f2 (x, d289); ++ x = f2 (x, d290); ++ x = f2 (x, d291); ++ x = f2 (x, d292); ++ x = f2 (x, d293); ++ x = f2 (x, d294); ++ x = f2 (x, d295); ++ x = f2 (x, d296); ++ x = f2 (x, d297); ++ x = f2 (x, d298); ++ x = f2 (x, d299); ++ x = f2 (x, d300); ++ x = f2 (x, d301); ++ x = f2 (x, d302); ++ x = f2 (x, d303); ++ x = f2 (x, d304); ++ x = f2 (x, d305); ++ x = f2 (x, d306); ++ x = f2 (x, d307); ++ x = f2 (x, d308); ++ x = f2 (x, d309); ++ x = f2 (x, d310); ++ x = f2 (x, d311); ++ x = f2 (x, d312); ++ x = f2 (x, d313); ++ x = f2 (x, d314); ++ x = f2 (x, d315); ++ x = f2 (x, d316); ++ x = f2 (x, d317); ++ x = f2 (x, d318); ++ x = f2 (x, d319); ++ x = f2 (x, d320); ++ x = f2 (x, d321); ++ x = f2 (x, d322); ++ x = f2 (x, d323); ++ x = f2 (x, d324); ++ x = f2 (x, d325); ++ x = f2 (x, d326); ++ x = f2 (x, d327); ++ x = f2 (x, d328); ++ x = f2 (x, d329); ++ x = f2 (x, d330); ++ x = f2 (x, d331); ++ x = f2 (x, d332); ++ x = f2 (x, d333); ++ x = f2 (x, d334); ++ x = f2 (x, d335); ++ x = f2 (x, d336); ++ x = f2 (x, d337); ++ x = f2 (x, d338); ++ x = f2 (x, d339); ++ x = f2 (x, d340); ++ x = f2 (x, d341); ++ x = f2 (x, d342); ++ x = f2 (x, d343); ++ x = f2 (x, d344); ++ x = f2 (x, d345); ++ x = f2 (x, d346); ++ x = f2 (x, d347); ++ x = f2 (x, d348); ++ x = f2 (x, d349); ++ x = f2 (x, d350); ++ x = f2 (x, d351); ++ x = f2 (x, d352); ++ x = f2 (x, d353); ++ x = f2 (x, d354); ++ x = f2 (x, d355); ++ x = f2 (x, d356); ++ x = f2 (x, d357); ++ x = f2 (x, d358); ++ x = f2 (x, d359); ++ x = f2 (x, d360); ++ x = f2 (x, d361); ++ x = f2 (x, d362); ++ x = f2 (x, d363); ++ x = f2 (x, d364); ++ x = f2 (x, d365); ++ x = f2 (x, d366); ++ x = f2 (x, d367); ++ x = f2 (x, d368); ++ x = f2 (x, d369); ++ x = f2 (x, d370); ++ x = f2 (x, d371); ++ x = f2 (x, d372); ++ x = f2 (x, d373); ++ x = f2 (x, d374); ++ x = f2 (x, d375); ++ x = f2 (x, d376); ++ x = f2 (x, d377); ++ x = f2 (x, d378); ++ x = f2 (x, d379); ++ x = f2 (x, d380); ++ x = f2 (x, d381); ++ x = f2 (x, d382); ++ x = f2 (x, d383); ++ x = f2 (x, d384); ++ x = f2 (x, d385); ++ x = f2 (x, d386); ++ x = f2 (x, d387); ++ x = f2 (x, d388); ++ x = f2 (x, d389); ++ x = f2 (x, d390); ++ x = f2 (x, d391); ++ x = f2 (x, d392); ++ x = f2 (x, d393); ++ x = f2 (x, d394); ++ x = f2 (x, d395); ++ x = f2 (x, d396); ++ x = f2 (x, d397); ++ x = f2 (x, d398); ++ x = f2 (x, d399); ++ x = f2 (x, d400); ++ x = f2 (x, d401); ++ x = f2 (x, d402); ++ x = f2 (x, d403); ++ x = f2 (x, d404); ++ x = f2 (x, d405); ++ x = f2 (x, d406); ++ x = f2 (x, d407); ++ x = f2 (x, d408); ++ x = f2 (x, d409); ++ x = f2 (x, d410); ++ x = f2 (x, d411); ++ x = f2 (x, d412); ++ x = f2 (x, d413); ++ x = f2 (x, d414); ++ x = f2 (x, d415); ++ x = f2 (x, d416); ++ x = f2 (x, d417); ++ x = f2 (x, d418); ++ x = f2 (x, d419); ++ x = f2 (x, d420); ++ x = f2 (x, d421); ++ x = f2 (x, d422); ++ x = f2 (x, d423); ++ x = f2 (x, d424); ++ x = f2 (x, d425); ++ x = f2 (x, d426); ++ x = f2 (x, d427); ++ x = f2 (x, d428); ++ x = f2 (x, d429); ++ x = f2 (x, d430); ++ x = f2 (x, d431); ++ x = f2 (x, d432); ++ x = f2 (x, d433); ++ x = f2 (x, d434); ++ x = f2 (x, d435); ++ x = f2 (x, d436); ++ x = f2 (x, d437); ++ x = f2 (x, d438); ++ x = f2 (x, d439); ++ x = f2 (x, d440); ++ x = f2 (x, d441); ++ x = f2 (x, d442); ++ x = f2 (x, d443); ++ x = f2 (x, d444); ++ x = f2 (x, d445); ++ x = f2 (x, d446); ++ x = f2 (x, d447); ++ x = f2 (x, d448); ++ x = f2 (x, d449); ++ x = f2 (x, d450); ++ x = f2 (x, d451); ++ x = f2 (x, d452); ++ x = f2 (x, d453); ++ x = f2 (x, d454); ++ x = f2 (x, d455); ++ x = f2 (x, d456); ++ x = f2 (x, d457); ++ x = f2 (x, d458); ++ x = f2 (x, d459); ++ x = f2 (x, d460); ++ x = f2 (x, d461); ++ x = f2 (x, d462); ++ x = f2 (x, d463); ++ x = f2 (x, d464); ++ x = f2 (x, d465); ++ x = f2 (x, d466); ++ x = f2 (x, d467); ++ x = f2 (x, d468); ++ x = f2 (x, d469); ++ x = f2 (x, d470); ++ x = f2 (x, d471); ++ x = f2 (x, d472); ++ x = f2 (x, d473); ++ x = f2 (x, d474); ++ x = f2 (x, d475); ++ x = f2 (x, d476); ++ x = f2 (x, d477); ++ x = f2 (x, d478); ++ x = f2 (x, d479); ++ x = f2 (x, d480); ++ x = f2 (x, d481); ++ x = f2 (x, d482); ++ x = f2 (x, d483); ++ x = f2 (x, d484); ++ x = f2 (x, d485); ++ x = f2 (x, d486); ++ x = f2 (x, d487); ++ x = f2 (x, d488); ++ x = f2 (x, d489); ++ x = f2 (x, d490); ++ x = f2 (x, d491); ++ x = f2 (x, d492); ++ x = f2 (x, d493); ++ x = f2 (x, d494); ++ x = f2 (x, d495); ++ x = f2 (x, d496); ++ x = f2 (x, d497); ++ x = f2 (x, d498); ++ x = f2 (x, d499); ++ x = f2 (x, d500); ++ x = f2 (x, d501); ++ x = f2 (x, d502); ++ x = f2 (x, d503); ++ x = f2 (x, d504); ++ x = f2 (x, d505); ++ x = f2 (x, d506); ++ x = f2 (x, d507); ++ x = f2 (x, d508); ++ x = f2 (x, d509); ++ x = f2 (x, d510); ++ x = f2 (x, d511); ++ x = f2 (x, d512); ++ x = f2 (x, d513); ++ x = f2 (x, d514); ++ x = f2 (x, d515); ++ x = f2 (x, d516); ++ x = f2 (x, d517); ++ x = f2 (x, d518); ++ x = f2 (x, d519); ++ x = f2 (x, d520); ++ x = f2 (x, d521); ++ x = f2 (x, d522); ++ x = f2 (x, d523); ++ x = f2 (x, d524); ++ x = f2 (x, d525); ++ x = f2 (x, d526); ++ x = f2 (x, d527); ++ x = f2 (x, d528); ++ x = f2 (x, d529); ++ x = f2 (x, d530); ++ x = f2 (x, d531); ++ x = f2 (x, d532); ++ x = f2 (x, d533); ++ x = f2 (x, d534); ++ x = f2 (x, d535); ++ x = f2 (x, d536); ++ x = f2 (x, d537); ++ x = f2 (x, d538); ++ x = f2 (x, d539); ++ x = f2 (x, d540); ++ x = f2 (x, d541); ++ x = f2 (x, d542); ++ x = f2 (x, d543); ++ x = f2 (x, d544); ++ x = f2 (x, d545); ++ x = f2 (x, d546); ++ x = f2 (x, d547); ++ x = f2 (x, d548); ++ x = f2 (x, d549); ++ x = f2 (x, d550); ++ x = f2 (x, d551); ++ x = f2 (x, d552); ++ x = f2 (x, d553); ++ x = f2 (x, d554); ++ x = f2 (x, d555); ++ x = f2 (x, d556); ++ x = f2 (x, d557); ++ x = f2 (x, d558); ++ x = f2 (x, d559); ++ x = f2 (x, d560); ++ x = f2 (x, d561); ++ x = f2 (x, d562); ++ x = f2 (x, d563); ++ x = f2 (x, d564); ++ x = f2 (x, d565); ++ x = f2 (x, d566); ++ x = f2 (x, d567); ++ x = f2 (x, d568); ++ x = f2 (x, d569); ++ x = f2 (x, d570); ++ x = f2 (x, d571); ++ x = f2 (x, d572); ++ x = f2 (x, d573); ++ x = f2 (x, d574); ++ x = f2 (x, d575); ++ x = f2 (x, d576); ++ x = f2 (x, d577); ++ x = f2 (x, d578); ++ x = f2 (x, d579); ++ x = f2 (x, d580); ++ x = f2 (x, d581); ++ x = f2 (x, d582); ++ x = f2 (x, d583); ++ x = f2 (x, d584); ++ x = f2 (x, d585); ++ x = f2 (x, d586); ++ x = f2 (x, d587); ++ x = f2 (x, d588); ++ x = f2 (x, d589); ++ x = f2 (x, d590); ++ x = f2 (x, d591); ++ x = f2 (x, d592); ++ x = f2 (x, d593); ++ x = f2 (x, d594); ++ x = f2 (x, d595); ++ x = f2 (x, d596); ++ x = f2 (x, d597); ++ x = f2 (x, d598); ++ x = f2 (x, d599); ++ x = f2 (x, d600); ++ x = f2 (x, d601); ++ x = f2 (x, d602); ++ x = f2 (x, d603); ++ x = f2 (x, d604); ++ x = f2 (x, d605); ++ x = f2 (x, d606); ++ x = f2 (x, d607); ++ x = f2 (x, d608); ++ x = f2 (x, d609); ++ x = f2 (x, d610); ++ x = f2 (x, d611); ++ x = f2 (x, d612); ++ x = f2 (x, d613); ++ x = f2 (x, d614); ++ x = f2 (x, d615); ++ x = f2 (x, d616); ++ x = f2 (x, d617); ++ x = f2 (x, d618); ++ x = f2 (x, d619); ++ x = f2 (x, d620); ++ x = f2 (x, d621); ++ x = f2 (x, d622); ++ x = f2 (x, d623); ++ x = f2 (x, d624); ++ x = f2 (x, d625); ++ x = f2 (x, d626); ++ x = f2 (x, d627); ++ x = f2 (x, d628); ++ x = f2 (x, d629); ++ x = f2 (x, d630); ++ x = f2 (x, d631); ++ x = f2 (x, d632); ++ x = f2 (x, d633); ++ x = f2 (x, d634); ++ x = f2 (x, d635); ++ x = f2 (x, d636); ++ x = f2 (x, d637); ++ x = f2 (x, d638); ++ x = f2 (x, d639); ++ x = f2 (x, d640); ++ x = f2 (x, d641); ++ x = f2 (x, d642); ++ x = f2 (x, d643); ++ x = f2 (x, d644); ++ x = f2 (x, d645); ++ x = f2 (x, d646); ++ x = f2 (x, d647); ++ x = f2 (x, d648); ++ x = f2 (x, d649); ++ x = f2 (x, d650); ++ x = f2 (x, d651); ++ x = f2 (x, d652); ++ x = f2 (x, d653); ++ x = f2 (x, d654); ++ x = f2 (x, d655); ++ x = f2 (x, d656); ++ x = f2 (x, d657); ++ x = f2 (x, d658); ++ x = f2 (x, d659); ++ x = f2 (x, d660); ++ x = f2 (x, d661); ++ x = f2 (x, d662); ++ x = f2 (x, d663); ++ x = f2 (x, d664); ++ x = f2 (x, d665); ++ x = f2 (x, d666); ++ x = f2 (x, d667); ++ x = f2 (x, d668); ++ x = f2 (x, d669); ++ x = f2 (x, d670); ++ x = f2 (x, d671); ++ x = f2 (x, d672); ++ x = f2 (x, d673); ++ x = f2 (x, d674); ++ x = f2 (x, d675); ++ x = f2 (x, d676); ++ x = f2 (x, d677); ++ x = f2 (x, d678); ++ x = f2 (x, d679); ++ x = f2 (x, d680); ++ x = f2 (x, d681); ++ x = f2 (x, d682); ++ x = f2 (x, d683); ++ x = f2 (x, d684); ++ x = f2 (x, d685); ++ x = f2 (x, d686); ++ x = f2 (x, d687); ++ x = f2 (x, d688); ++ x = f2 (x, d689); ++ x = f2 (x, d690); ++ x = f2 (x, d691); ++ x = f2 (x, d692); ++ x = f2 (x, d693); ++ x = f2 (x, d694); ++ x = f2 (x, d695); ++ x = f2 (x, d696); ++ x = f2 (x, d697); ++ x = f2 (x, d698); ++ x = f2 (x, d699); ++ x = f2 (x, d700); ++ x = f2 (x, d701); ++ x = f2 (x, d702); ++ x = f2 (x, d703); ++ x = f2 (x, d704); ++ x = f2 (x, d705); ++ x = f2 (x, d706); ++ x = f2 (x, d707); ++ x = f2 (x, d708); ++ x = f2 (x, d709); ++ x = f2 (x, d710); ++ x = f2 (x, d711); ++ x = f2 (x, d712); ++ x = f2 (x, d713); ++ x = f2 (x, d714); ++ x = f2 (x, d715); ++ x = f2 (x, d716); ++ x = f2 (x, d717); ++ x = f2 (x, d718); ++ x = f2 (x, d719); ++ x = f2 (x, d720); ++ x = f2 (x, d721); ++ x = f2 (x, d722); ++ x = f2 (x, d723); ++ x = f2 (x, d724); ++ x = f2 (x, d725); ++ x = f2 (x, d726); ++ x = f2 (x, d727); ++ x = f2 (x, d728); ++ x = f2 (x, d729); ++ x = f2 (x, d730); ++ x = f2 (x, d731); ++ x = f2 (x, d732); ++ x = f2 (x, d733); ++ x = f2 (x, d734); ++ x = f2 (x, d735); ++ x = f2 (x, d736); ++ x = f2 (x, d737); ++ x = f2 (x, d738); ++ x = f2 (x, d739); ++ x = f2 (x, d740); ++ x = f2 (x, d741); ++ x = f2 (x, d742); ++ x = f2 (x, d743); ++ x = f2 (x, d744); ++ x = f2 (x, d745); ++ x = f2 (x, d746); ++ x = f2 (x, d747); ++ x = f2 (x, d748); ++ x = f2 (x, d749); ++ x = f2 (x, d750); ++ x = f2 (x, d751); ++ x = f2 (x, d752); ++ x = f2 (x, d753); ++ x = f2 (x, d754); ++ x = f2 (x, d755); ++ x = f2 (x, d756); ++ x = f2 (x, d757); ++ x = f2 (x, d758); ++ x = f2 (x, d759); ++ x = f2 (x, d760); ++ x = f2 (x, d761); ++ x = f2 (x, d762); ++ x = f2 (x, d763); ++ x = f2 (x, d764); ++ x = f2 (x, d765); ++ x = f2 (x, d766); ++ x = f2 (x, d767); ++ x = f2 (x, d768); ++ x = f2 (x, d769); ++ x = f2 (x, d770); ++ x = f2 (x, d771); ++ x = f2 (x, d772); ++ x = f2 (x, d773); ++ x = f2 (x, d774); ++ x = f2 (x, d775); ++ x = f2 (x, d776); ++ x = f2 (x, d777); ++ x = f2 (x, d778); ++ x = f2 (x, d779); ++ x = f2 (x, d780); ++ x = f2 (x, d781); ++ x = f2 (x, d782); ++ x = f2 (x, d783); ++ x = f2 (x, d784); ++ x = f2 (x, d785); ++ x = f2 (x, d786); ++ x = f2 (x, d787); ++ x = f2 (x, d788); ++ x = f2 (x, d789); ++ x = f2 (x, d790); ++ x = f2 (x, d791); ++ x = f2 (x, d792); ++ x = f2 (x, d793); ++ x = f2 (x, d794); ++ x = f2 (x, d795); ++ x = f2 (x, d796); ++ x = f2 (x, d797); ++ x = f2 (x, d798); ++ x = f2 (x, d799); ++ x = f2 (x, d800); ++ x = f2 (x, d801); ++ x = f2 (x, d802); ++ x = f2 (x, d803); ++ x = f2 (x, d804); ++ x = f2 (x, d805); ++ x = f2 (x, d806); ++ x = f2 (x, d807); ++ x = f2 (x, d808); ++ x = f2 (x, d809); ++ x = f2 (x, d810); ++ x = f2 (x, d811); ++ x = f2 (x, d812); ++ x = f2 (x, d813); ++ x = f2 (x, d814); ++ x = f2 (x, d815); ++ x = f2 (x, d816); ++ x = f2 (x, d817); ++ x = f2 (x, d818); ++ x = f2 (x, d819); ++ x = f2 (x, d820); ++ x = f2 (x, d821); ++ x = f2 (x, d822); ++ x = f2 (x, d823); ++ x = f2 (x, d824); ++ x = f2 (x, d825); ++ x = f2 (x, d826); ++ x = f2 (x, d827); ++ x = f2 (x, d828); ++ x = f2 (x, d829); ++ x = f2 (x, d830); ++ x = f2 (x, d831); ++ x = f2 (x, d832); ++ x = f2 (x, d833); ++ x = f2 (x, d834); ++ x = f2 (x, d835); ++ x = f2 (x, d836); ++ x = f2 (x, d837); ++ x = f2 (x, d838); ++ x = f2 (x, d839); ++ x = f2 (x, d840); ++ x = f2 (x, d841); ++ x = f2 (x, d842); ++ x = f2 (x, d843); ++ x = f2 (x, d844); ++ x = f2 (x, d845); ++ x = f2 (x, d846); ++ x = f2 (x, d847); ++ x = f2 (x, d848); ++ x = f2 (x, d849); ++ x = f2 (x, d850); ++ x = f2 (x, d851); ++ x = f2 (x, d852); ++ x = f2 (x, d853); ++ x = f2 (x, d854); ++ x = f2 (x, d855); ++ x = f2 (x, d856); ++ x = f2 (x, d857); ++ x = f2 (x, d858); ++ x = f2 (x, d859); ++ x = f2 (x, d860); ++ x = f2 (x, d861); ++ x = f2 (x, d862); ++ x = f2 (x, d863); ++ x = f2 (x, d864); ++ x = f2 (x, d865); ++ x = f2 (x, d866); ++ x = f2 (x, d867); ++ x = f2 (x, d868); ++ x = f2 (x, d869); ++ x = f2 (x, d870); ++ x = f2 (x, d871); ++ x = f2 (x, d872); ++ x = f2 (x, d873); ++ x = f2 (x, d874); ++ x = f2 (x, d875); ++ x = f2 (x, d876); ++ x = f2 (x, d877); ++ x = f2 (x, d878); ++ x = f2 (x, d879); ++ x = f2 (x, d880); ++ x = f2 (x, d881); ++ x = f2 (x, d882); ++ x = f2 (x, d883); ++ x = f2 (x, d884); ++ x = f2 (x, d885); ++ x = f2 (x, d886); ++ x = f2 (x, d887); ++ x = f2 (x, d888); ++ x = f2 (x, d889); ++ x = f2 (x, d890); ++ x = f2 (x, d891); ++ x = f2 (x, d892); ++ x = f2 (x, d893); ++ x = f2 (x, d894); ++ x = f2 (x, d895); ++ x = f2 (x, d896); ++ x = f2 (x, d897); ++ x = f2 (x, d898); ++ x = f2 (x, d899); ++ x = f2 (x, d900); ++ x = f2 (x, d901); ++ x = f2 (x, d902); ++ x = f2 (x, d903); ++ x = f2 (x, d904); ++ x = f2 (x, d905); ++ x = f2 (x, d906); ++ x = f2 (x, d907); ++ x = f2 (x, d908); ++ x = f2 (x, d909); ++ x = f2 (x, d910); ++ x = f2 (x, d911); ++ x = f2 (x, d912); ++ x = f2 (x, d913); ++ x = f2 (x, d914); ++ x = f2 (x, d915); ++ x = f2 (x, d916); ++ x = f2 (x, d917); ++ x = f2 (x, d918); ++ x = f2 (x, d919); ++ x = f2 (x, d920); ++ x = f2 (x, d921); ++ x = f2 (x, d922); ++ x = f2 (x, d923); ++ x = f2 (x, d924); ++ x = f2 (x, d925); ++ x = f2 (x, d926); ++ x = f2 (x, d927); ++ x = f2 (x, d928); ++ x = f2 (x, d929); ++ x = f2 (x, d930); ++ x = f2 (x, d931); ++ x = f2 (x, d932); ++ x = f2 (x, d933); ++ x = f2 (x, d934); ++ x = f2 (x, d935); ++ x = f2 (x, d936); ++ x = f2 (x, d937); ++ x = f2 (x, d938); ++ x = f2 (x, d939); ++ x = f2 (x, d940); ++ x = f2 (x, d941); ++ x = f2 (x, d942); ++ x = f2 (x, d943); ++ x = f2 (x, d944); ++ x = f2 (x, d945); ++ x = f2 (x, d946); ++ x = f2 (x, d947); ++ x = f2 (x, d948); ++ x = f2 (x, d949); ++ x = f2 (x, d950); ++ x = f2 (x, d951); ++ x = f2 (x, d952); ++ x = f2 (x, d953); ++ x = f2 (x, d954); ++ x = f2 (x, d955); ++ x = f2 (x, d956); ++ x = f2 (x, d957); ++ x = f2 (x, d958); ++ x = f2 (x, d959); ++ x = f2 (x, d960); ++ x = f2 (x, d961); ++ x = f2 (x, d962); ++ x = f2 (x, d963); ++ x = f2 (x, d964); ++ x = f2 (x, d965); ++ x = f2 (x, d966); ++ x = f2 (x, d967); ++ x = f2 (x, d968); ++ x = f2 (x, d969); ++ x = f2 (x, d970); ++ x = f2 (x, d971); ++ x = f2 (x, d972); ++ x = f2 (x, d973); ++ x = f2 (x, d974); ++ x = f2 (x, d975); ++ x = f2 (x, d976); ++ x = f2 (x, d977); ++ x = f2 (x, d978); ++ x = f2 (x, d979); ++ x = f2 (x, d980); ++ x = f2 (x, d981); ++ x = f2 (x, d982); ++ x = f2 (x, d983); ++ x = f2 (x, d984); ++ x = f2 (x, d985); ++ x = f2 (x, d986); ++ x = f2 (x, d987); ++ x = f2 (x, d988); ++ x = f2 (x, d989); ++ x = f2 (x, d990); ++ x = f2 (x, d991); ++ x = f2 (x, d992); ++ x = f2 (x, d993); ++ x = f2 (x, d994); ++ x = f2 (x, d995); ++ x = f2 (x, d996); ++ x = f2 (x, d997); ++ x = f2 (x, d998); ++ x = f2 (x, d999); ++ return x; ++} ++ ++/* { dg-final { scan-rtl-dump-times "Stack clash inline probes" 1 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash residual allocation in prologue" 1 "pro_and_epilogue" } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 1 "pro_and_epilogue" } } */ ++ ++/* f3 is not a leaf ++/* { dg-final { scan-rtl-dump-times "Stack clash no frame pointer needed" 1 "pro_and_epilogue" { target { ! frame_pointer_for_non_leaf } } } } */ ++/* { dg-final { scan-rtl-dump-times "Stack clash frame pointer needed" 1 "pro_and_epilogue" { target { frame_pointer_for_non_leaf } } } } */ +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index 1000f15358b..8dbec663b18 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -8385,11 +8385,15 @@ proc check_effective_target_arm_coproc4_ok { } { + proc check_effective_target_supports_stack_clash_protection { } { + + # Temporary until the target bits are fully ACK'd. +-# if { [istarget aarch*-*-*] || [istarget x86_64-*-*] +-# || [istarget i?86-*-*] || [istarget s390*-*-*] ++# if { [istarget aarch*-*-*] ++# || [istarget s390*-*-*] + # || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] } { + # return 1 + # } ++ ++ if { [istarget x86_64-*-*] || [istarget i?86-*-*] } { ++ return 1 ++ } + return 0 + } + diff --git a/gcc7-rh1512529-6.patch b/gcc7-rh1512529-6.patch new file mode 100644 index 0000000..dd13635 --- /dev/null +++ b/gcc7-rh1512529-6.patch @@ -0,0 +1,118 @@ +commit e7519110827a59eb7275591db640bdbdfd31ba15 +Author: law +Date: Wed Sep 20 05:43:28 2017 +0000 + + * combine-stack-adj.c (combine_stack_adjustments_for_block): Do + nothing for stack adjustments with REG_STACK_CHECK. + * sched-deps.c (parse_add_or_inc): Reject insns with + REG_STACK_CHECK from dependency breaking. + * config/i386/i386.c (pro_epilogue_adjust_stack): Return insn. + (ix86_adjust_satck_and_probe_stack_clash): Add REG_STACK_NOTEs. + * reg-notes.def (STACK_CHECK): New note. + + * gcc.target/i386/stack-check-11.c: New test. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@252999 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/combine-stack-adj.c b/gcc/combine-stack-adj.c +index 9ec14a3e443..82d6dba856f 100644 +--- a/gcc/combine-stack-adj.c ++++ b/gcc/combine-stack-adj.c +@@ -508,6 +508,8 @@ combine_stack_adjustments_for_block (basic_block bb) + continue; + + set = single_set_for_csa (insn); ++ if (set && find_reg_note (insn, REG_STACK_CHECK, NULL_RTX)) ++ set = NULL_RTX; + if (set) + { + rtx dest = SET_DEST (set); +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index 088dca2001f..5aff9b9d113 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -12900,7 +12900,7 @@ ix86_add_queued_cfa_restore_notes (rtx insn) + zero if %r11 register is live and cannot be freely used and positive + otherwise. */ + +-static void ++static rtx + pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, + int style, bool set_cfa) + { +@@ -12987,6 +12987,7 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, + m->fs.sp_offset = ooffset - INTVAL (offset); + m->fs.sp_valid = valid; + } ++ return insn; + } + + /* Find an available register to be used as dynamic realign argument +@@ -13333,9 +13334,11 @@ ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size) + for (i = probe_interval; i <= size; i += probe_interval) + { + /* Allocate PROBE_INTERVAL bytes. */ +- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (-probe_interval), -1, +- m->fs.cfa_reg == stack_pointer_rtx); ++ rtx insn ++ = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (-PROBE_INTERVAL), -1, ++ m->fs.cfa_reg == stack_pointer_rtx); ++ add_reg_note (insn, REG_STACK_CHECK, const0_rtx); + + /* And probe at *sp. */ + emit_stack_probe (stack_pointer_rtx); +diff --git a/gcc/reg-notes.def b/gcc/reg-notes.def +index 8734d26e5b4..18cf7e3cfc0 100644 +--- a/gcc/reg-notes.def ++++ b/gcc/reg-notes.def +@@ -223,6 +223,10 @@ REG_NOTE (ARGS_SIZE) + pseudo reg. */ + REG_NOTE (RETURNED) + ++/* Indicates the instruction is a stack check probe that should not ++ be combined with other stack adjustments. */ ++REG_NOTE (STACK_CHECK) ++ + /* Used to mark a call with the function decl called by the call. + The decl might not be available in the call due to splitting of the call + insn. This note is a SYMBOL_REF. */ +diff --git a/gcc/sched-deps.c b/gcc/sched-deps.c +index b2393bf28fb..564f3fb1559 100644 +--- a/gcc/sched-deps.c ++++ b/gcc/sched-deps.c +@@ -4717,6 +4717,11 @@ parse_add_or_inc (struct mem_inc_info *mii, rtx_insn *insn, bool before_mem) + if (RTX_FRAME_RELATED_P (insn) || !pat) + return false; + ++ /* Do not allow breaking data dependencies for insns that are marked ++ with REG_STACK_CHECK. */ ++ if (find_reg_note (insn, REG_STACK_CHECK, NULL)) ++ return false; ++ + /* Result must be single reg. */ + if (!REG_P (SET_DEST (pat))) + return false; +diff --git a/gcc/testsuite/gcc.target/i386/stack-check-11.c b/gcc/testsuite/gcc.target/i386/stack-check-11.c +new file mode 100644 +index 00000000000..183103f01e5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/stack-check-11.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-clash-protection" } */ ++/* { dg-require-effective-target supports_stack_clash_protection } */ ++ ++extern void arf (unsigned long int *, unsigned long int *); ++void ++frob () ++{ ++ unsigned long int num[859]; ++ unsigned long int den[859]; ++ arf (den, num); ++} ++ ++/* { dg-final { scan-assembler-times "subq" 4 } } */ ++/* { dg-final { scan-assembler-times "orq" 3 } } */ ++ diff --git a/gcc7-rh1512529-7.patch b/gcc7-rh1512529-7.patch new file mode 100644 index 0000000..0510dcd --- /dev/null +++ b/gcc7-rh1512529-7.patch @@ -0,0 +1,88 @@ +commit 919262bedfe78b0d4f9fddbd980b85243a83bf28 +Author: law +Date: Wed Sep 20 21:59:50 2017 +0000 + + * explow.c (compute_stack_clash_protection_loop_data): Use + CONST_INT_P instead of explicit test. Verify object is a + CONST_INT_P before looking at INTVAL. + (anti_adjust_stack_and_probe_stack_clash): Use CONST_INT_P + instead of explicit test. + + * gcc.target/i386/stack-check-11.c: Update test and regexp + so that it works for both i?86 and x86_64. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@253034 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/explow.c b/gcc/explow.c +index 2a73414ab22..6af6355fe30 100644 +--- a/gcc/explow.c ++++ b/gcc/explow.c +@@ -1822,11 +1822,11 @@ compute_stack_clash_protection_loop_data (rtx *rounded_size, rtx *last_addr, + if (*rounded_size == CONST0_RTX (Pmode)) + fprintf (dump_file, + "Stack clash skipped dynamic allocation and probing loop.\n"); +- else if (GET_CODE (*rounded_size) == CONST_INT ++ else if (CONST_INT_P (*rounded_size) + && INTVAL (*rounded_size) <= 4 * *probe_interval) + fprintf (dump_file, + "Stack clash dynamic allocation and probing inline.\n"); +- else if (GET_CODE (*rounded_size) == CONST_INT) ++ else if (CONST_INT_P (*rounded_size)) + fprintf (dump_file, + "Stack clash dynamic allocation and probing in " + "rotated loop.\n"); +@@ -1924,7 +1924,8 @@ anti_adjust_stack_and_probe_stack_clash (rtx size) + + if (rounded_size != CONST0_RTX (Pmode)) + { +- if (INTVAL (rounded_size) <= 4 * probe_interval) ++ if (CONST_INT_P (rounded_size) ++ && INTVAL (rounded_size) <= 4 * probe_interval) + { + for (HOST_WIDE_INT i = 0; + i < INTVAL (rounded_size); +@@ -1944,7 +1945,7 @@ anti_adjust_stack_and_probe_stack_clash (rtx size) + else + { + rtx loop_lab, end_loop; +- bool rotate_loop = GET_CODE (rounded_size) == CONST_INT; ++ bool rotate_loop = CONST_INT_P (rounded_size); + emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop, + last_addr, rotate_loop); + +@@ -1982,7 +1983,7 @@ anti_adjust_stack_and_probe_stack_clash (rtx size) + might hold live data. So probe at *sp if we know that + an allocation was made, otherwise probe into the red zone + which is obviously undesirable. */ +- if (GET_CODE (size) == CONST_INT) ++ if (CONST_INT_P (size)) + { + emit_stack_probe (stack_pointer_rtx); + emit_insn (gen_blockage ()); +diff --git a/gcc/testsuite/gcc.target/i386/stack-check-11.c b/gcc/testsuite/gcc.target/i386/stack-check-11.c +index 183103f01e5..fe5b2c2b844 100644 +--- a/gcc/testsuite/gcc.target/i386/stack-check-11.c ++++ b/gcc/testsuite/gcc.target/i386/stack-check-11.c +@@ -2,15 +2,17 @@ + /* { dg-options "-O2 -fstack-clash-protection" } */ + /* { dg-require-effective-target supports_stack_clash_protection } */ + +-extern void arf (unsigned long int *, unsigned long int *); ++#include ++ ++extern void arf (uint64_t *, uint64_t *); + void + frob () + { +- unsigned long int num[859]; +- unsigned long int den[859]; ++ uint64_t num[859]; ++ uint64_t den[859]; + arf (den, num); + } + +-/* { dg-final { scan-assembler-times "subq" 4 } } */ +-/* { dg-final { scan-assembler-times "orq" 3 } } */ ++/* { dg-final { scan-assembler-times "sub\[ql\]" 4 } } */ ++/* { dg-final { scan-assembler-times "or\[ql\]" 3 } } */ + diff --git a/gcc7-rh1512529-8.patch b/gcc7-rh1512529-8.patch new file mode 100644 index 0000000..a59a8ee --- /dev/null +++ b/gcc7-rh1512529-8.patch @@ -0,0 +1,388 @@ +commit a63af926db1d6109011e4dd81c750ecf784ab6c1 +Author: law +Date: Thu Sep 21 04:30:16 2017 +0000 + + * config/s390/s390.c (MIN_UNROLL_PROBES): Define. + (allocate_stack_space): New function, partially extracted from + s390_emit_prologue. + (s390_emit_prologue): Track offset to most recent stack probe. + Code to allocate space moved into allocate_stack_space. + Dump actions when no stack is allocated. + (s390_prologue_plus_offset): New function. + (s390_emit_stack_probe): Likewise. + + * gcc.dg/stack-check-5.c: Add argument for s390. + * lib/target-supports.exp: + (check_effective_target_supports_stack_clash_protection): Enable for + s390/s390x targets. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@253049 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c +index c408d59efd2..c78f1456daa 100644 +--- a/gcc/config/s390/s390.c ++++ b/gcc/config/s390/s390.c +@@ -10974,6 +10974,183 @@ pass_s390_early_mach::execute (function *fun) + + } // anon namespace + ++/* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it. ++ - push too big immediates to the literal pool and annotate the refs ++ - emit frame related notes for stack pointer changes. */ ++ ++static rtx ++s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p) ++{ ++ rtx insn; ++ rtx orig_offset = offset; ++ ++ gcc_assert (REG_P (target)); ++ gcc_assert (REG_P (reg)); ++ gcc_assert (CONST_INT_P (offset)); ++ ++ if (offset == const0_rtx) /* lr/lgr */ ++ { ++ insn = emit_move_insn (target, reg); ++ } ++ else if (DISP_IN_RANGE (INTVAL (offset))) /* la */ ++ { ++ insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg, ++ offset)); ++ } ++ else ++ { ++ if (!satisfies_constraint_K (offset) /* ahi/aghi */ ++ && (!TARGET_EXTIMM ++ || (!satisfies_constraint_Op (offset) /* alfi/algfi */ ++ && !satisfies_constraint_On (offset)))) /* slfi/slgfi */ ++ offset = force_const_mem (Pmode, offset); ++ ++ if (target != reg) ++ { ++ insn = emit_move_insn (target, reg); ++ RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0; ++ } ++ ++ insn = emit_insn (gen_add2_insn (target, offset)); ++ ++ if (!CONST_INT_P (offset)) ++ { ++ annotate_constant_pool_refs (&PATTERN (insn)); ++ ++ if (frame_related_p) ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, ++ gen_rtx_SET (target, ++ gen_rtx_PLUS (Pmode, target, ++ orig_offset))); ++ } ++ } ++ ++ RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0; ++ ++ /* If this is a stack adjustment and we are generating a stack clash ++ prologue, then add a REG_STACK_CHECK note to signal that this insn ++ should be left alone. */ ++ if (flag_stack_clash_protection && target == stack_pointer_rtx) ++ add_reg_note (insn, REG_STACK_CHECK, const0_rtx); ++ ++ return insn; ++} ++ ++/* Emit a compare instruction with a volatile memory access as stack ++ probe. It does not waste store tags and does not clobber any ++ registers apart from the condition code. */ ++static void ++s390_emit_stack_probe (rtx addr) ++{ ++ rtx tmp = gen_rtx_MEM (Pmode, addr); ++ MEM_VOLATILE_P (tmp) = 1; ++ s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp); ++ emit_insn (gen_blockage ()); ++} ++ ++/* Use a runtime loop if we have to emit more probes than this. */ ++#define MIN_UNROLL_PROBES 3 ++ ++/* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary ++ if necessary. LAST_PROBE_OFFSET contains the offset of the closest ++ probe relative to the stack pointer. ++ ++ Note that SIZE is negative. ++ ++ The return value is true if TEMP_REG has been clobbered. */ ++static bool ++allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset, ++ rtx temp_reg) ++{ ++ bool temp_reg_clobbered_p = false; ++ HOST_WIDE_INT probe_interval ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL); ++ HOST_WIDE_INT guard_size ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE); ++ ++ if (flag_stack_clash_protection) ++ { ++ if (last_probe_offset + -INTVAL (size) < guard_size) ++ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); ++ else ++ { ++ rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG); ++ HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval; ++ HOST_WIDE_INT num_probes = rounded_size / probe_interval; ++ HOST_WIDE_INT residual = -INTVAL (size) - rounded_size; ++ ++ if (num_probes < MIN_UNROLL_PROBES) ++ { ++ /* Emit unrolled probe statements. */ ++ ++ for (unsigned int i = 0; i < num_probes; i++) ++ { ++ s390_prologue_plus_offset (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-probe_interval), true); ++ s390_emit_stack_probe (gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, ++ offset)); ++ } ++ dump_stack_clash_frame_info (PROBE_INLINE, residual != 0); ++ } ++ else ++ { ++ /* Emit a loop probing the pages. */ ++ ++ rtx_code_label *loop_start_label = gen_label_rtx (); ++ ++ /* From now on temp_reg will be the CFA register. */ ++ s390_prologue_plus_offset (temp_reg, stack_pointer_rtx, ++ GEN_INT (-rounded_size), true); ++ emit_label (loop_start_label); ++ ++ s390_prologue_plus_offset (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-probe_interval), false); ++ s390_emit_stack_probe (gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, ++ offset)); ++ emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg, ++ GT, NULL_RTX, ++ Pmode, 1, loop_start_label); ++ ++ /* Without this make_edges ICEes. */ ++ JUMP_LABEL (get_last_insn ()) = loop_start_label; ++ LABEL_NUSES (loop_start_label) = 1; ++ ++ /* That's going to be a NOP since stack pointer and ++ temp_reg are supposed to be the same here. We just ++ emit it to set the CFA reg back to r15. */ ++ s390_prologue_plus_offset (stack_pointer_rtx, temp_reg, ++ const0_rtx, true); ++ temp_reg_clobbered_p = true; ++ dump_stack_clash_frame_info (PROBE_LOOP, residual != 0); ++ } ++ ++ /* Handle any residual allocation request. */ ++ s390_prologue_plus_offset (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-residual), true); ++ last_probe_offset += residual; ++ if (last_probe_offset >= probe_interval) ++ s390_emit_stack_probe (gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, ++ GEN_INT (residual ++ - UNITS_PER_LONG))); ++ ++ return temp_reg_clobbered_p; ++ } ++ } ++ ++ /* Subtract frame size from stack pointer. */ ++ s390_prologue_plus_offset (stack_pointer_rtx, ++ stack_pointer_rtx, ++ size, true); ++ ++ return temp_reg_clobbered_p; ++} ++ + /* Expand the prologue into a bunch of separate insns. */ + + void +@@ -10998,6 +11175,19 @@ s390_emit_prologue (void) + else + temp_reg = gen_rtx_REG (Pmode, 1); + ++ /* When probing for stack-clash mitigation, we have to track the distance ++ between the stack pointer and closest known reference. ++ ++ Most of the time we have to make a worst cast assumption. The ++ only exception is when TARGET_BACKCHAIN is active, in which case ++ we know *sp (offset 0) was written. */ ++ HOST_WIDE_INT probe_interval ++ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL); ++ HOST_WIDE_INT last_probe_offset ++ = (TARGET_BACKCHAIN ++ ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0) ++ : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD)); ++ + s390_save_gprs_to_fprs (); + + /* Save call saved gprs. */ +@@ -11009,6 +11199,14 @@ s390_emit_prologue (void) + - cfun_frame_layout.first_save_gpr_slot), + cfun_frame_layout.first_save_gpr, + cfun_frame_layout.last_save_gpr); ++ ++ /* This is not 100% correct. If we have more than one register saved, ++ then LAST_PROBE_OFFSET can move even closer to sp. */ ++ last_probe_offset ++ = (cfun_frame_layout.gprs_offset + ++ UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr ++ - cfun_frame_layout.first_save_gpr_slot)); ++ + emit_insn (insn); + } + +@@ -11025,6 +11223,8 @@ s390_emit_prologue (void) + if (cfun_fpr_save_p (i)) + { + save_fpr (stack_pointer_rtx, offset, i); ++ if (offset < last_probe_offset) ++ last_probe_offset = offset; + offset += 8; + } + else if (!TARGET_PACKED_STACK || cfun->stdarg) +@@ -11038,6 +11238,8 @@ s390_emit_prologue (void) + if (cfun_fpr_save_p (i)) + { + insn = save_fpr (stack_pointer_rtx, offset, i); ++ if (offset < last_probe_offset) ++ last_probe_offset = offset; + offset += 8; + + /* If f4 and f6 are call clobbered they are saved due to +@@ -11060,6 +11262,8 @@ s390_emit_prologue (void) + if (cfun_fpr_save_p (i)) + { + insn = save_fpr (stack_pointer_rtx, offset, i); ++ if (offset < last_probe_offset) ++ last_probe_offset = offset; + + RTX_FRAME_RELATED_P (insn) = 1; + offset -= 8; +@@ -11079,10 +11283,11 @@ s390_emit_prologue (void) + if (cfun_frame_layout.frame_size > 0) + { + rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size); +- rtx real_frame_off; ++ rtx_insn *stack_pointer_backup_loc; ++ bool temp_reg_clobbered_p; + + if (s390_stack_size) +- { ++ { + HOST_WIDE_INT stack_guard; + + if (s390_stack_guard) +@@ -11148,35 +11353,36 @@ s390_emit_prologue (void) + if (s390_warn_dynamicstack_p && cfun->calls_alloca) + warning (0, "%qs uses dynamic stack allocation", current_function_name ()); + +- /* Save incoming stack pointer into temp reg. */ +- if (TARGET_BACKCHAIN || next_fpr) +- insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx)); ++ /* Save the location where we could backup the incoming stack ++ pointer. */ ++ stack_pointer_backup_loc = get_last_insn (); + +- /* Subtract frame size from stack pointer. */ ++ temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset, ++ temp_reg); + +- if (DISP_IN_RANGE (INTVAL (frame_off))) +- { +- insn = gen_rtx_SET (stack_pointer_rtx, +- gen_rtx_PLUS (Pmode, stack_pointer_rtx, +- frame_off)); +- insn = emit_insn (insn); +- } +- else ++ if (TARGET_BACKCHAIN || next_fpr) + { +- if (!CONST_OK_FOR_K (INTVAL (frame_off))) +- frame_off = force_const_mem (Pmode, frame_off); +- +- insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off)); +- annotate_constant_pool_refs (&PATTERN (insn)); ++ if (temp_reg_clobbered_p) ++ { ++ /* allocate_stack_space had to make use of temp_reg and ++ we need it to hold a backup of the incoming stack ++ pointer. Calculate back that value from the current ++ stack pointer. */ ++ s390_prologue_plus_offset (temp_reg, stack_pointer_rtx, ++ GEN_INT (cfun_frame_layout.frame_size), ++ false); ++ } ++ else ++ { ++ /* allocate_stack_space didn't actually required ++ temp_reg. Insert the stack pointer backup insn ++ before the stack pointer decrement code - knowing now ++ that the value will survive. */ ++ emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx), ++ stack_pointer_backup_loc); ++ } + } + +- RTX_FRAME_RELATED_P (insn) = 1; +- real_frame_off = GEN_INT (-cfun_frame_layout.frame_size); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, +- gen_rtx_SET (stack_pointer_rtx, +- gen_rtx_PLUS (Pmode, stack_pointer_rtx, +- real_frame_off))); +- + /* Set backchain. */ + + if (TARGET_BACKCHAIN) +@@ -11200,6 +11406,8 @@ s390_emit_prologue (void) + emit_clobber (addr); + } + } ++ else if (flag_stack_clash_protection) ++ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); + + /* Save fprs 8 - 15 (64 bit ABI). */ + +diff --git a/gcc/testsuite/gcc.dg/stack-check-5.c b/gcc/testsuite/gcc.dg/stack-check-5.c +index 2171d9b6c23..3178f5d8ce5 100644 +--- a/gcc/testsuite/gcc.dg/stack-check-5.c ++++ b/gcc/testsuite/gcc.dg/stack-check-5.c +@@ -3,6 +3,10 @@ + /* { dg-require-effective-target supports_stack_clash_protection } */ + + ++/* Otherwise the S/390 back-end might save the stack pointer in f2 () ++ into an FPR. */ ++/* { dg-additional-options "-msoft-float" { target { s390x-*-* } } } */ ++ + extern void foo (char *); + extern void bar (void); + +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index 8dbec663b18..e9f03886ca6 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -8386,12 +8386,12 @@ proc check_effective_target_supports_stack_clash_protection { } { + + # Temporary until the target bits are fully ACK'd. + # if { [istarget aarch*-*-*] +-# || [istarget s390*-*-*] + # || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] } { + # return 1 + # } + +- if { [istarget x86_64-*-*] || [istarget i?86-*-*] } { ++ if { [istarget x86_64-*-*] || [istarget i?86-*-*] ++ || [istarget s390*-*-*] } { + return 1 + } + return 0 diff --git a/gcc7-rh1512529-9.patch b/gcc7-rh1512529-9.patch new file mode 100644 index 0000000..9c1f260 --- /dev/null +++ b/gcc7-rh1512529-9.patch @@ -0,0 +1,49 @@ +commit 233a9def5cd0b70f1288ff57ce67d9d5d5d7f845 +Author: law +Date: Thu Sep 21 22:03:59 2017 +0000 + + * config/i386/i386.c (ix86_adjust_stack_and_probe_stack_clash): + Fix dump output if the only stack space is for pushed registers. + + * lib/target-supports.exp + (check_effective_target_frame_pointer_for_non_leaf): Add + case for x86 Solaris. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@253082 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index 5aff9b9d113..086848b2c73 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -13287,7 +13287,13 @@ ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size) + no probes are needed. */ + if (!size) + { +- dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); ++ /* However, the allocation of space via pushes for register ++ saves could be viewed as allocating space, but without the ++ need to probe. */ ++ if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed) ++ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); ++ else ++ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); + return; + } + +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index e9f03886ca6..cb58a2be35f 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -8403,6 +8403,12 @@ proc check_effective_target_frame_pointer_for_non_leaf { } { + if { [istarget aarch*-*-*] } { + return 1 + } ++ ++ # Solaris/x86 defaults to -fno-omit-frame-pointer. ++ if { [istarget i?86-*-solaris*] || [istarget x86_64-*-solaris*] } { ++ return 1 ++ } ++ + return 0 + } +