2833 lines
95 KiB
Diff
2833 lines
95 KiB
Diff
2009-02-05 Joey Ye <joey.ye@intel.com>
|
||
Xuepeng Guo <xuepeng.guo@intel.com>
|
||
H.J. Lu <hongjiu.lu@intel.com>
|
||
|
||
Atom pipeline model, tuning and insn selection.
|
||
* rtlanal.c (reg_mentioned_by_mem_p_1): New function.
|
||
(reg_mentioned_by_mem_p): New function.
|
||
(reg_dep_by_addr_p): New function.
|
||
|
||
* rtl.h (reg_mentioned_by_mem_p): Declare new function.
|
||
(reg_dep_by_addr_p): Likewise.
|
||
|
||
* config.gcc (atom): Add atom config options and target.
|
||
|
||
* config/i386/i386.h (TARGET_ATOM): New target macro.
|
||
(X86_TUNE_OPT_AGU): New tuning flag.
|
||
(TARGET_OPT_AGU): New target option.
|
||
(TARGET_CPU_DEFAULT_atom): New CPU default.
|
||
(PROCESSOR_ATOM): New processor.
|
||
|
||
* config/i386/i386-c.c (ix86_target_macros_internal): New case
|
||
PROCESSOR_ATOM.
|
||
(ix86_target_macros_internal): Likewise.
|
||
|
||
* config/i386/i386-protos.h (ix86_lea_for_add_ok): Declare new
|
||
function.
|
||
(ix86_dep_by_shift_count): Likewise.
|
||
(ix86_agi_dependent): Likewise.
|
||
|
||
* config/i386/i386.c (atom_cost): New cost.
|
||
(m_ATOM): New macro flag.
|
||
(initial_ix86_tune_fe): Set m_ATOM.
|
||
(x86_accumulate_outgoing_args): Likewise.
|
||
(x86_arch_always_fancy_math_387): Likewise.
|
||
(processor_target): Add Atom cost.
|
||
(cpu_names): Add Atom cpu name.
|
||
(override_options): Set Atom ISA.
|
||
(LEA_SEARCH_THRESHOLD): New macro.
|
||
(distance_non_agu_define): New function.
|
||
(distance_agu_use): Likewise.
|
||
(ix86_lea_for_add_ok): Likewise.
|
||
(ix86_dep_by_shift_count): Likewise.
|
||
(ix86_agi_dependent): Make it global.
|
||
(ix86_issue_rate): New case PROCESSOR_ATOM.
|
||
(ix86_adjust_cost): Likewise.
|
||
|
||
* config/i386/i386.md (cpu): Add new value "atom".
|
||
(atom.md): Include atom.md.
|
||
(use_carry, movu): New attr.
|
||
(adddi3_carry_rex64): Set attr "use_carry".
|
||
(addqi3_carry): Likewise.
|
||
(addhi3_carry): Likewise.
|
||
(addsi3_carry): Likewise.
|
||
(*addsi3_carry_zext): Likewise.
|
||
(subdi3_carry_rex64): Likewise.
|
||
(subqi3_carry): Likewise.
|
||
(subhi3_carry): Likewise.
|
||
(subsi3_carry): Likewise.
|
||
(x86_movdicc_0_m1_rex64): Likewise.
|
||
(*x86_movdicc_0_m1_se): Likewise.
|
||
(x86_movsicc_0_m1): Likewise.
|
||
(*x86_movsicc_0_m1_se): Likewise.
|
||
(*adddi_1_rex64): Emit add insn as much as possible.
|
||
(*addsi_1): Likewise.
|
||
(return_internal): Set atom_unit.
|
||
(return_internal_long): Likewise.
|
||
(return_pop_internal): Likewise.
|
||
(*rcpsf2_sse): Set atom_sse_attr attr.
|
||
(*qrt<mode>2_sse): Likewise.
|
||
(*prefetch_sse): Likewise.
|
||
|
||
* config/i386/sse.md (cpu): Set attr "atom_sse_attr".
|
||
(*prefetch_sse_rex): Likewise.
|
||
(sse_rcpv4sf2): Likewise.
|
||
(sse_vmrcpv4sf2): Likewise.
|
||
(sse_sqrtv4sf2): Likewise.
|
||
(<sse>_vmsqrt<mode>2): Likewise.
|
||
(sse_ldmxcsr): Likewise.
|
||
(sse_stmxcsr): Likewise.
|
||
(*sse_sfence): Likewise.
|
||
(sse2_clflush): Likewise.
|
||
(*sse2_mfence): Likewise.
|
||
(*sse2_lfence): Likewise.
|
||
(avx_movup<avxmodesuffixf2c><avxmodesuffix>): Set attr "movu".
|
||
(<sse>_movup<ssemodesuffixf2c>): Likewise.
|
||
(avx_movdqu<avxmodesuffix>): Likewise.
|
||
(avx_lddqu<avxmodesuffix>): Likewise.
|
||
(sse2_movntv2di): Change attr "type" to "ssemov".
|
||
(sse2_movntsi): Likewise.
|
||
(rsqrtv8sf2): Change attr "type" to "sseadd".
|
||
(sse3_addsubv2df3): Set attr "atom_unit".
|
||
(sse3_h<plusminus_insn>v4sf3): Likewise.
|
||
(*sse2_pmaddwd): Likewise.
|
||
(*vec_extractv2di_1_rex64): Likewise.
|
||
(*vec_extractv2di_1_avx): Likewise.
|
||
(sse2_psadbw): Likewise.
|
||
(ssse3_phaddwv8hi3): Likewise.
|
||
(ssse3_phaddwv4hi3): Likewise.
|
||
(ssse3_phadddv4si3): Likewise.
|
||
(ssse3_phadddv2si3): Likewise.
|
||
(ssse3_phaddswv8hi3): Likewise.
|
||
(ssse3_phaddswv4hi3): Likewise.
|
||
(ssse3_phsubwv8hi3): Likewise.
|
||
(ssse3_phsubwv4hi3): Likewise.
|
||
(ssse3_phsubdv4si3): Likewise.
|
||
(ssse3_phsubdv2si3): Likewise.
|
||
(ssse3_phsubswv8hi3): Likewise.
|
||
(ssse3_phsubswv4hi3): Likewise.
|
||
(ssse3_pmaddubsw128): Likewise.
|
||
(sse3_pmaddubsw: Likewise.
|
||
(ssse3_palignrti): Likewise.
|
||
(ssse3_palignrdi): Likewise.
|
||
|
||
* config/i386/atom.md: New.
|
||
|
||
2009-02-05 H.J. Lu <hongjiu.lu@intel.com>
|
||
|
||
* config/i386/i386.c (ix86_agi_dependent): Remove the third
|
||
argument. Swap the first 2 arguments.
|
||
(ix86_adjust_cost): Updated.
|
||
|
||
2009-01-30 Vladimir Makarov <vmakarov@redhat.com>
|
||
|
||
* genautomata.c: Add a new year to the copyright. Add a new
|
||
reference.
|
||
(struct insn_reserv_decl): Add comments for member bypass_list.
|
||
(find_bypass): Remove.
|
||
(insert_bypass): New.
|
||
(process_decls): Use insert_bypass.
|
||
(output_internal_insn_latency_func): Output all bypasses with the
|
||
same input insn in one switch case.
|
||
|
||
* rtl.def (define_bypass): Describe bypass choice.
|
||
* doc/md.texi (define_bypass): Ditto.
|
||
|
||
--- gcc/doc/md.texi (.../trunk) (revision 144460)
|
||
+++ gcc/doc/md.texi (.../branches/ix86/atom) (revision 144601)
|
||
@@ -7506,6 +7506,11 @@ be ignored for this case. The additiona
|
||
recognize complicated bypasses, e.g.@: when the consumer is only an address
|
||
of insn @samp{store} (not a stored value).
|
||
|
||
+If there are more one bypass with the same output and input insns, the
|
||
+chosen bypass is the first bypass with a guard in description whose
|
||
+guard function returns nonzero. If there is no such bypass, then
|
||
+bypass without the guard function is chosen.
|
||
+
|
||
@findex exclusion_set
|
||
@findex presence_set
|
||
@findex final_presence_set
|
||
--- gcc/rtlanal.c (.../trunk) (revision 144460)
|
||
+++ gcc/rtlanal.c (.../branches/ix86/atom) (revision 144601)
|
||
@@ -728,6 +728,129 @@ reg_mentioned_p (const_rtx reg, const_rt
|
||
}
|
||
return 0;
|
||
}
|
||
+
|
||
+static int
|
||
+reg_mentioned_by_mem_p_1 (const_rtx reg, const_rtx in,
|
||
+ bool *mem_p)
|
||
+{
|
||
+ const char *fmt;
|
||
+ int i;
|
||
+ enum rtx_code code;
|
||
+
|
||
+ if (in == 0)
|
||
+ return 0;
|
||
+
|
||
+ if (reg == in)
|
||
+ return 1;
|
||
+
|
||
+ if (GET_CODE (in) == LABEL_REF)
|
||
+ return reg == XEXP (in, 0);
|
||
+
|
||
+ code = GET_CODE (in);
|
||
+
|
||
+ switch (code)
|
||
+ {
|
||
+ /* Compare registers by number. */
|
||
+ case REG:
|
||
+ return REG_P (reg) && REGNO (in) == REGNO (reg);
|
||
+
|
||
+ /* These codes have no constituent expressions
|
||
+ and are unique. */
|
||
+ case SCRATCH:
|
||
+ case CC0:
|
||
+ case PC:
|
||
+ return 0;
|
||
+
|
||
+ case CONST_INT:
|
||
+ case CONST_VECTOR:
|
||
+ case CONST_DOUBLE:
|
||
+ case CONST_FIXED:
|
||
+ /* These are kept unique for a given value. */
|
||
+ return 0;
|
||
+
|
||
+ default:
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ if (GET_CODE (reg) == code && rtx_equal_p (reg, in))
|
||
+ return 1;
|
||
+
|
||
+ fmt = GET_RTX_FORMAT (code);
|
||
+
|
||
+ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
||
+ {
|
||
+ if (fmt[i] == 'E')
|
||
+ {
|
||
+ int j;
|
||
+ for (j = XVECLEN (in, i) - 1; j >= 0; j--)
|
||
+ if (reg_mentioned_by_mem_p_1 (reg, XVECEXP (in, i, j), mem_p))
|
||
+ {
|
||
+ if (code == MEM)
|
||
+ *mem_p = true;
|
||
+
|
||
+ return 1;
|
||
+ }
|
||
+ }
|
||
+ else if (fmt[i] == 'e'
|
||
+ && reg_mentioned_by_mem_p_1 (reg, XEXP (in, i), mem_p))
|
||
+ {
|
||
+ if (code == MEM)
|
||
+ *mem_p = true;
|
||
+
|
||
+ return 1;
|
||
+ }
|
||
+ }
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+/* Similar to the function reg_mentioned_p, return true only when
|
||
+ register REG appears in a MEM container of RTX IN. */
|
||
+
|
||
+bool
|
||
+reg_mentioned_by_mem_p (const_rtx reg, const_rtx in)
|
||
+{
|
||
+ bool mem = false;
|
||
+
|
||
+ reg_mentioned_by_mem_p_1 (reg, in, &mem);
|
||
+ return mem;
|
||
+}
|
||
+
|
||
+/* Return true if dest regsiter in set_insn is used in use_insn as
|
||
+ address calculation.
|
||
+ For example, returns true if
|
||
+ set_insn: reg_a = reg_b
|
||
+ use_insn: reg_c = (reg_a) # reg_a used in addr calculation
|
||
+ False if
|
||
+ set_insn: reg_a = reg_b
|
||
+ use_insn: (reg_c) = reg_a # reg_a is used, by not as addr. */
|
||
+
|
||
+bool
|
||
+reg_dep_by_addr_p (const_rtx set_insn, const_rtx use_insn)
|
||
+{
|
||
+ rtx pattern = PATTERN (set_insn);
|
||
+ rtx set_dest = NULL;
|
||
+
|
||
+ switch (GET_CODE (pattern))
|
||
+ {
|
||
+ case SET:
|
||
+ set_dest = SET_DEST (pattern);
|
||
+ break;
|
||
+ case PARALLEL:
|
||
+ {
|
||
+ rtx pattern2 = XVECEXP (PATTERN (set_insn), 0,0);
|
||
+ if (GET_CODE (pattern2) == SET)
|
||
+ set_dest = SET_DEST (pattern2);
|
||
+ break;
|
||
+ }
|
||
+ default:
|
||
+ set_dest = NULL;
|
||
+ }
|
||
+
|
||
+ /* True if destination of set is reg and used as address. */
|
||
+ return set_dest && REG_P (set_dest)
|
||
+ && reg_mentioned_by_mem_p (set_dest, use_insn);
|
||
+}
|
||
+
|
||
|
||
/* Return 1 if in between BEG and END, exclusive of BEG and END, there is
|
||
no CODE_LABEL insn. */
|
||
--- gcc/genautomata.c (.../trunk) (revision 144460)
|
||
+++ gcc/genautomata.c (.../branches/ix86/atom) (revision 144601)
|
||
@@ -22,21 +22,25 @@ along with GCC; see the file COPYING3.
|
||
|
||
/* References:
|
||
|
||
- 1. Detecting pipeline structural hazards quickly. T. Proebsting,
|
||
+ 1. The finite state automaton based pipeline hazard recognizer and
|
||
+ instruction scheduler in GCC. V. Makarov. Proceedings of GCC
|
||
+ summit, 2003.
|
||
+
|
||
+ 2. Detecting pipeline structural hazards quickly. T. Proebsting,
|
||
C. Fraser. Proceedings of ACM SIGPLAN-SIGACT Symposium on
|
||
Principles of Programming Languages, pages 280--286, 1994.
|
||
|
||
This article is a good start point to understand usage of finite
|
||
state automata for pipeline hazard recognizers. But I'd
|
||
- recommend the 2nd article for more deep understanding.
|
||
+ recommend the 1st and 3rd article for more deep understanding.
|
||
|
||
- 2. Efficient Instruction Scheduling Using Finite State Automata:
|
||
+ 3. Efficient Instruction Scheduling Using Finite State Automata:
|
||
V. Bala and N. Rubin, Proceedings of MICRO-28. This is the best
|
||
article about usage of finite state automata for pipeline hazard
|
||
recognizers.
|
||
|
||
- The current implementation is different from the 2nd article in the
|
||
- following:
|
||
+ The current implementation is described in the 1st article and it
|
||
+ is different from the 3rd article in the following:
|
||
|
||
1. New operator `|' (alternative) is permitted in functional unit
|
||
reservation which can be treated deterministically and
|
||
@@ -463,7 +467,10 @@ struct insn_reserv_decl
|
||
insn. */
|
||
int insn_num;
|
||
/* The following field value is list of bypasses in which given insn
|
||
- is output insn. */
|
||
+ is output insn. Bypasses with the same input insn stay one after
|
||
+ another in the list in the same order as their occurrences in the
|
||
+ description but the bypass without a guard stays always the last
|
||
+ in a row of bypasses with the same input insn. */
|
||
struct bypass_decl *bypass_list;
|
||
|
||
/* The following fields are defined by automaton generator. */
|
||
@@ -2367,18 +2374,67 @@ add_presence_absence (unit_set_el_t dest
|
||
}
|
||
|
||
|
||
-/* The function searches for bypass with given IN_INSN_RESERV in given
|
||
- BYPASS_LIST. */
|
||
-static struct bypass_decl *
|
||
-find_bypass (struct bypass_decl *bypass_list,
|
||
- struct insn_reserv_decl *in_insn_reserv)
|
||
-{
|
||
- struct bypass_decl *bypass;
|
||
-
|
||
- for (bypass = bypass_list; bypass != NULL; bypass = bypass->next)
|
||
- if (bypass->in_insn_reserv == in_insn_reserv)
|
||
- break;
|
||
- return bypass;
|
||
+/* The function inserts BYPASS in the list of bypasses of the
|
||
+ corresponding output insn. The order of bypasses in the list is
|
||
+ decribed in a comment for member `bypass_list' (see above). If
|
||
+ there is already the same bypass in the list the function reports
|
||
+ this and does nothing. */
|
||
+static void
|
||
+insert_bypass (struct bypass_decl *bypass)
|
||
+{
|
||
+ struct bypass_decl *curr, *last;
|
||
+ struct insn_reserv_decl *out_insn_reserv = bypass->out_insn_reserv;
|
||
+ struct insn_reserv_decl *in_insn_reserv = bypass->in_insn_reserv;
|
||
+
|
||
+ for (curr = out_insn_reserv->bypass_list, last = NULL;
|
||
+ curr != NULL;
|
||
+ last = curr, curr = curr->next)
|
||
+ if (curr->in_insn_reserv == in_insn_reserv)
|
||
+ {
|
||
+ if ((bypass->bypass_guard_name != NULL
|
||
+ && curr->bypass_guard_name != NULL
|
||
+ && ! strcmp (bypass->bypass_guard_name, curr->bypass_guard_name))
|
||
+ || bypass->bypass_guard_name == curr->bypass_guard_name)
|
||
+ {
|
||
+ if (bypass->bypass_guard_name == NULL)
|
||
+ {
|
||
+ if (!w_flag)
|
||
+ error ("the same bypass `%s - %s' is already defined",
|
||
+ bypass->out_insn_name, bypass->in_insn_name);
|
||
+ else
|
||
+ warning (0, "the same bypass `%s - %s' is already defined",
|
||
+ bypass->out_insn_name, bypass->in_insn_name);
|
||
+ }
|
||
+ else if (!w_flag)
|
||
+ error ("the same bypass `%s - %s' (guard %s) is already defined",
|
||
+ bypass->out_insn_name, bypass->in_insn_name,
|
||
+ bypass->bypass_guard_name);
|
||
+ else
|
||
+ warning
|
||
+ (0, "the same bypass `%s - %s' (guard %s) is already defined",
|
||
+ bypass->out_insn_name, bypass->in_insn_name,
|
||
+ bypass->bypass_guard_name);
|
||
+ return;
|
||
+ }
|
||
+ if (curr->bypass_guard_name == NULL)
|
||
+ break;
|
||
+ if (curr->next == NULL || curr->next->in_insn_reserv != in_insn_reserv)
|
||
+ {
|
||
+ last = curr;
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ }
|
||
+ if (last == NULL)
|
||
+ {
|
||
+ bypass->next = out_insn_reserv->bypass_list;
|
||
+ out_insn_reserv->bypass_list = bypass;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ bypass->next = last->next;
|
||
+ last->next = bypass;
|
||
+ }
|
||
}
|
||
|
||
/* The function processes pipeline description declarations, checks
|
||
@@ -2391,7 +2447,6 @@ process_decls (void)
|
||
decl_t decl_in_table;
|
||
decl_t out_insn_reserv;
|
||
decl_t in_insn_reserv;
|
||
- struct bypass_decl *bypass;
|
||
int automaton_presence;
|
||
int i;
|
||
|
||
@@ -2514,36 +2569,7 @@ process_decls (void)
|
||
= DECL_INSN_RESERV (out_insn_reserv);
|
||
DECL_BYPASS (decl)->in_insn_reserv
|
||
= DECL_INSN_RESERV (in_insn_reserv);
|
||
- bypass
|
||
- = find_bypass (DECL_INSN_RESERV (out_insn_reserv)->bypass_list,
|
||
- DECL_BYPASS (decl)->in_insn_reserv);
|
||
- if (bypass != NULL)
|
||
- {
|
||
- if (DECL_BYPASS (decl)->latency == bypass->latency)
|
||
- {
|
||
- if (!w_flag)
|
||
- error
|
||
- ("the same bypass `%s - %s' is already defined",
|
||
- DECL_BYPASS (decl)->out_insn_name,
|
||
- DECL_BYPASS (decl)->in_insn_name);
|
||
- else
|
||
- warning
|
||
- (0, "the same bypass `%s - %s' is already defined",
|
||
- DECL_BYPASS (decl)->out_insn_name,
|
||
- DECL_BYPASS (decl)->in_insn_name);
|
||
- }
|
||
- else
|
||
- error ("bypass `%s - %s' is already defined",
|
||
- DECL_BYPASS (decl)->out_insn_name,
|
||
- DECL_BYPASS (decl)->in_insn_name);
|
||
- }
|
||
- else
|
||
- {
|
||
- DECL_BYPASS (decl)->next
|
||
- = DECL_INSN_RESERV (out_insn_reserv)->bypass_list;
|
||
- DECL_INSN_RESERV (out_insn_reserv)->bypass_list
|
||
- = DECL_BYPASS (decl);
|
||
- }
|
||
+ insert_bypass (DECL_BYPASS (decl));
|
||
}
|
||
}
|
||
}
|
||
@@ -8159,19 +8185,32 @@ output_internal_insn_latency_func (void)
|
||
(advance_cycle_insn_decl)->insn_num));
|
||
fprintf (output_file, " case %d:\n",
|
||
bypass->in_insn_reserv->insn_num);
|
||
- if (bypass->bypass_guard_name == NULL)
|
||
- fprintf (output_file, " return %d;\n",
|
||
- bypass->latency);
|
||
- else
|
||
+ for (;;)
|
||
{
|
||
- fprintf (output_file,
|
||
- " if (%s (%s, %s))\n",
|
||
- bypass->bypass_guard_name, INSN_PARAMETER_NAME,
|
||
- INSN2_PARAMETER_NAME);
|
||
- fprintf (output_file,
|
||
- " return %d;\n break;\n",
|
||
- bypass->latency);
|
||
+ if (bypass->bypass_guard_name == NULL)
|
||
+ {
|
||
+ gcc_assert (bypass->next == NULL
|
||
+ || (bypass->in_insn_reserv
|
||
+ != bypass->next->in_insn_reserv));
|
||
+ fprintf (output_file, " return %d;\n",
|
||
+ bypass->latency);
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ fprintf (output_file,
|
||
+ " if (%s (%s, %s))\n",
|
||
+ bypass->bypass_guard_name, INSN_PARAMETER_NAME,
|
||
+ INSN2_PARAMETER_NAME);
|
||
+ fprintf (output_file, " return %d;\n",
|
||
+ bypass->latency);
|
||
+ }
|
||
+ if (bypass->next == NULL
|
||
+ || bypass->in_insn_reserv != bypass->next->in_insn_reserv)
|
||
+ break;
|
||
+ bypass = bypass->next;
|
||
}
|
||
+ if (bypass->bypass_guard_name != NULL)
|
||
+ fprintf (output_file, " break;\n");
|
||
}
|
||
fputs (" }\n break;\n", output_file);
|
||
}
|
||
--- gcc/rtl.def (.../trunk) (revision 144460)
|
||
+++ gcc/rtl.def (.../branches/ix86/atom) (revision 144601)
|
||
@@ -1088,7 +1088,11 @@ DEF_RTL_EXPR(FINAL_ABSENCE_SET, "final_a
|
||
guard for the bypass. The function will get the two insns as
|
||
parameters. If the function returns zero the bypass will be
|
||
ignored for this case. Additional guard is necessary to recognize
|
||
- complicated bypasses, e.g. when consumer is load address. */
|
||
+ complicated bypasses, e.g. when consumer is load address. If there
|
||
+ are more one bypass with the same output and input insns, the
|
||
+ chosen bypass is the first bypass with a guard in description whose
|
||
+ guard function returns nonzero. If there is no such bypass, then
|
||
+ bypass without the guard function is chosen. */
|
||
DEF_RTL_EXPR(DEFINE_BYPASS, "define_bypass", "issS", RTX_EXTRA)
|
||
|
||
/* (define_automaton string) describes names of automata generated and
|
||
--- gcc/rtl.h (.../trunk) (revision 144460)
|
||
+++ gcc/rtl.h (.../branches/ix86/atom) (revision 144601)
|
||
@@ -1731,6 +1731,8 @@ extern rtx get_related_value (const_rtx)
|
||
extern bool offset_within_block_p (const_rtx, HOST_WIDE_INT);
|
||
extern void split_const (rtx, rtx *, rtx *);
|
||
extern int reg_mentioned_p (const_rtx, const_rtx);
|
||
+extern bool reg_mentioned_by_mem_p (const_rtx, const_rtx);
|
||
+extern bool reg_dep_by_addr_p (const_rtx, const_rtx);
|
||
extern int count_occurrences (const_rtx, const_rtx, int);
|
||
extern int reg_referenced_p (const_rtx, const_rtx);
|
||
extern int reg_used_between_p (const_rtx, const_rtx, const_rtx);
|
||
--- gcc/config.gcc (.../trunk) (revision 144460)
|
||
+++ gcc/config.gcc (.../branches/ix86/atom) (revision 144601)
|
||
@@ -1087,7 +1087,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfree
|
||
tmake_file="${tmake_file} i386/t-linux64"
|
||
need_64bit_hwint=yes
|
||
case X"${with_cpu}" in
|
||
- Xgeneric|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
|
||
+ Xgeneric|Xatom|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
|
||
;;
|
||
X)
|
||
if test x$with_cpu_64 = x; then
|
||
@@ -1096,7 +1096,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfree
|
||
;;
|
||
*)
|
||
echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
|
||
- echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
|
||
+ echo "generic atom core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
|
||
exit 1
|
||
;;
|
||
esac
|
||
@@ -1201,7 +1201,7 @@ i[34567]86-*-solaris2*)
|
||
# libgcc/configure.ac instead.
|
||
need_64bit_hwint=yes
|
||
case X"${with_cpu}" in
|
||
- Xgeneric|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
|
||
+ Xgeneric|Xatom|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
|
||
;;
|
||
X)
|
||
if test x$with_cpu_64 = x; then
|
||
@@ -1210,7 +1210,7 @@ i[34567]86-*-solaris2*)
|
||
;;
|
||
*)
|
||
echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
|
||
- echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
|
||
+ echo "generic atom core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
|
||
exit 1
|
||
;;
|
||
esac
|
||
@@ -2803,7 +2803,7 @@ case "${target}" in
|
||
esac
|
||
# OK
|
||
;;
|
||
- "" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic)
|
||
+ "" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | atom | generic)
|
||
# OK
|
||
;;
|
||
*)
|
||
--- gcc/config/i386/i386.h (.../trunk) (revision 144460)
|
||
+++ gcc/config/i386/i386.h (.../branches/ix86/atom) (revision 144601)
|
||
@@ -231,6 +231,7 @@ extern const struct processor_costs ix86
|
||
#define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
|
||
#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
|
||
#define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
|
||
+#define TARGET_ATOM (ix86_tune == PROCESSOR_ATOM)
|
||
|
||
/* Feature tests against the various tunings. */
|
||
enum ix86_tune_indices {
|
||
@@ -295,6 +296,7 @@ enum ix86_tune_indices {
|
||
X86_TUNE_USE_VECTOR_FP_CONVERTS,
|
||
X86_TUNE_USE_VECTOR_CONVERTS,
|
||
X86_TUNE_FUSE_CMP_AND_BRANCH,
|
||
+ X86_TUNE_OPT_AGU,
|
||
|
||
X86_TUNE_LAST
|
||
};
|
||
@@ -382,6 +384,7 @@ extern unsigned char ix86_tune_features[
|
||
ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS]
|
||
#define TARGET_FUSE_CMP_AND_BRANCH \
|
||
ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH]
|
||
+#define TARGET_OPT_AGU ix86_tune_features[X86_TUNE_OPT_AGU]
|
||
|
||
/* Feature tests against the various architecture variations. */
|
||
enum ix86_arch_indices {
|
||
@@ -564,6 +567,7 @@ enum target_cpu_default
|
||
TARGET_CPU_DEFAULT_prescott,
|
||
TARGET_CPU_DEFAULT_nocona,
|
||
TARGET_CPU_DEFAULT_core2,
|
||
+ TARGET_CPU_DEFAULT_atom,
|
||
|
||
TARGET_CPU_DEFAULT_geode,
|
||
TARGET_CPU_DEFAULT_k6,
|
||
@@ -2256,6 +2260,7 @@ enum processor_type
|
||
PROCESSOR_GENERIC32,
|
||
PROCESSOR_GENERIC64,
|
||
PROCESSOR_AMDFAM10,
|
||
+ PROCESSOR_ATOM,
|
||
PROCESSOR_max
|
||
};
|
||
|
||
--- gcc/config/i386/i386.md (.../trunk) (revision 144460)
|
||
+++ gcc/config/i386/i386.md (.../branches/ix86/atom) (revision 144601)
|
||
@@ -298,7 +298,7 @@ (define_constants
|
||
|
||
|
||
;; Processor type.
|
||
-(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,
|
||
+(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,atom,
|
||
generic64,amdfam10"
|
||
(const (symbol_ref "ix86_schedule")))
|
||
|
||
@@ -594,6 +594,12 @@ (define_attr "fp_int_src" "false,true"
|
||
(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any"
|
||
(const_string "any"))
|
||
|
||
+;; Define attribute to classify add/sub insns that consumes carry flag (CF)
|
||
+(define_attr "use_carry" "0,1" (const_string "0"))
|
||
+
|
||
+;; Define attribute to indicate unaligned ssemov insns
|
||
+(define_attr "movu" "0,1" (const_string "0"))
|
||
+
|
||
;; Describe a user's asm statement.
|
||
(define_asm_attributes
|
||
[(set_attr "length" "128")
|
||
@@ -709,6 +715,7 @@ (define_mode_iterator P [(SI "Pmode == S
|
||
(include "k6.md")
|
||
(include "athlon.md")
|
||
(include "geode.md")
|
||
+(include "atom.md")
|
||
|
||
|
||
;; Operand and operator predicates and constraints
|
||
@@ -5776,6 +5783,7 @@ (define_insn "adddi3_carry_rex64"
|
||
"TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
|
||
"adc{q}\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "alu")
|
||
+ (set_attr "use_carry" "1")
|
||
(set_attr "pent_pair" "pu")
|
||
(set_attr "mode" "DI")])
|
||
|
||
@@ -5850,6 +5858,7 @@ (define_insn "addqi3_carry"
|
||
"ix86_binary_operator_ok (PLUS, QImode, operands)"
|
||
"adc{b}\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "alu")
|
||
+ (set_attr "use_carry" "1")
|
||
(set_attr "pent_pair" "pu")
|
||
(set_attr "mode" "QI")])
|
||
|
||
@@ -5862,6 +5871,7 @@ (define_insn "addhi3_carry"
|
||
"ix86_binary_operator_ok (PLUS, HImode, operands)"
|
||
"adc{w}\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "alu")
|
||
+ (set_attr "use_carry" "1")
|
||
(set_attr "pent_pair" "pu")
|
||
(set_attr "mode" "HI")])
|
||
|
||
@@ -5874,6 +5884,7 @@ (define_insn "addsi3_carry"
|
||
"ix86_binary_operator_ok (PLUS, SImode, operands)"
|
||
"adc{l}\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "alu")
|
||
+ (set_attr "use_carry" "1")
|
||
(set_attr "pent_pair" "pu")
|
||
(set_attr "mode" "SI")])
|
||
|
||
@@ -5887,6 +5898,7 @@ (define_insn "*addsi3_carry_zext"
|
||
"TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
|
||
"adc{l}\t{%2, %k0|%k0, %2}"
|
||
[(set_attr "type" "alu")
|
||
+ (set_attr "use_carry" "1")
|
||
(set_attr "pent_pair" "pu")
|
||
(set_attr "mode" "SI")])
|
||
|
||
@@ -6116,9 +6128,9 @@ (define_insn_and_split "*lea_general_3_z
|
||
(set_attr "mode" "SI")])
|
||
|
||
(define_insn "*adddi_1_rex64"
|
||
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
|
||
- (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r")
|
||
- (match_operand:DI 2 "x86_64_general_operand" "rme,re,le")))
|
||
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
|
||
+ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r,r")
|
||
+ (match_operand:DI 2 "x86_64_general_operand" "rme,re,0,le")))
|
||
(clobber (reg:CC FLAGS_REG))]
|
||
"TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
|
||
{
|
||
@@ -6139,6 +6151,10 @@ (define_insn "*adddi_1_rex64"
|
||
}
|
||
|
||
default:
|
||
+ /* Use add as much as possible to replace lea for AGU optimization. */
|
||
+ if (which_alternative == 2 && TARGET_OPT_AGU)
|
||
+ return "add{q}\t{%1, %0|%0, %1}";
|
||
+
|
||
gcc_assert (rtx_equal_p (operands[0], operands[1]));
|
||
|
||
/* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
|
||
@@ -6157,8 +6173,11 @@ (define_insn "*adddi_1_rex64"
|
||
}
|
||
}
|
||
[(set (attr "type")
|
||
- (cond [(eq_attr "alternative" "2")
|
||
+ (cond [(and (eq_attr "alternative" "2")
|
||
+ (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
|
||
(const_string "lea")
|
||
+ (eq_attr "alternative" "3")
|
||
+ (const_string "lea")
|
||
; Current assemblers are broken and do not allow @GOTOFF in
|
||
; ought but a memory context.
|
||
(match_operand:DI 2 "pic_symbolic_operand" "")
|
||
@@ -6175,8 +6194,8 @@ (define_split
|
||
(plus:DI (match_operand:DI 1 "register_operand" "")
|
||
(match_operand:DI 2 "x86_64_nonmemory_operand" "")))
|
||
(clobber (reg:CC FLAGS_REG))]
|
||
- "TARGET_64BIT && reload_completed
|
||
- && true_regnum (operands[0]) != true_regnum (operands[1])"
|
||
+ "TARGET_64BIT && reload_completed
|
||
+ && ix86_lea_for_add_ok (PLUS, insn, operands)"
|
||
[(set (match_dup 0)
|
||
(plus:DI (match_dup 1)
|
||
(match_dup 2)))]
|
||
@@ -6380,9 +6399,9 @@ (define_insn "*adddi_5_rex64"
|
||
|
||
|
||
(define_insn "*addsi_1"
|
||
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r")
|
||
- (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r")
|
||
- (match_operand:SI 2 "general_operand" "g,ri,li")))
|
||
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r")
|
||
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r,r")
|
||
+ (match_operand:SI 2 "general_operand" "g,ri,0,li")))
|
||
(clobber (reg:CC FLAGS_REG))]
|
||
"ix86_binary_operator_ok (PLUS, SImode, operands)"
|
||
{
|
||
@@ -6403,6 +6422,10 @@ (define_insn "*addsi_1"
|
||
}
|
||
|
||
default:
|
||
+ /* Use add as much as possible to replace lea for AGU optimization. */
|
||
+ if (which_alternative == 2 && TARGET_OPT_AGU)
|
||
+ return "add{l}\t{%1, %0|%0, %1}";
|
||
+
|
||
gcc_assert (rtx_equal_p (operands[0], operands[1]));
|
||
|
||
/* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
|
||
@@ -6419,7 +6442,10 @@ (define_insn "*addsi_1"
|
||
}
|
||
}
|
||
[(set (attr "type")
|
||
- (cond [(eq_attr "alternative" "2")
|
||
+ (cond [(and (eq_attr "alternative" "2")
|
||
+ (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
|
||
+ (const_string "lea")
|
||
+ (eq_attr "alternative" "3")
|
||
(const_string "lea")
|
||
; Current assemblers are broken and do not allow @GOTOFF in
|
||
; ought but a memory context.
|
||
@@ -6437,8 +6463,7 @@ (define_split
|
||
(plus (match_operand 1 "register_operand" "")
|
||
(match_operand 2 "nonmemory_operand" "")))
|
||
(clobber (reg:CC FLAGS_REG))]
|
||
- "reload_completed
|
||
- && true_regnum (operands[0]) != true_regnum (operands[1])"
|
||
+ "reload_completed && ix86_lea_for_add_ok (PLUS, insn, operands)"
|
||
[(const_int 0)]
|
||
{
|
||
rtx pat;
|
||
@@ -7539,6 +7564,7 @@ (define_insn "subdi3_carry_rex64"
|
||
"TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
|
||
"sbb{q}\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "alu")
|
||
+ (set_attr "use_carry" "1")
|
||
(set_attr "pent_pair" "pu")
|
||
(set_attr "mode" "DI")])
|
||
|
||
@@ -7587,6 +7613,7 @@ (define_insn "subqi3_carry"
|
||
"ix86_binary_operator_ok (MINUS, QImode, operands)"
|
||
"sbb{b}\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "alu")
|
||
+ (set_attr "use_carry" "1")
|
||
(set_attr "pent_pair" "pu")
|
||
(set_attr "mode" "QI")])
|
||
|
||
@@ -7599,6 +7626,7 @@ (define_insn "subhi3_carry"
|
||
"ix86_binary_operator_ok (MINUS, HImode, operands)"
|
||
"sbb{w}\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "alu")
|
||
+ (set_attr "use_carry" "1")
|
||
(set_attr "pent_pair" "pu")
|
||
(set_attr "mode" "HI")])
|
||
|
||
@@ -7611,6 +7639,7 @@ (define_insn "subsi3_carry"
|
||
"ix86_binary_operator_ok (MINUS, SImode, operands)"
|
||
"sbb{l}\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "alu")
|
||
+ (set_attr "use_carry" "1")
|
||
(set_attr "pent_pair" "pu")
|
||
(set_attr "mode" "SI")])
|
||
|
||
@@ -15224,6 +15253,7 @@ (define_insn "return_internal"
|
||
"reload_completed"
|
||
"ret"
|
||
[(set_attr "length" "1")
|
||
+ (set_attr "atom_unit" "jeu")
|
||
(set_attr "length_immediate" "0")
|
||
(set_attr "modrm" "0")])
|
||
|
||
@@ -15236,6 +15266,7 @@ (define_insn "return_internal_long"
|
||
"reload_completed"
|
||
"rep\;ret"
|
||
[(set_attr "length" "1")
|
||
+ (set_attr "atom_unit" "jeu")
|
||
(set_attr "length_immediate" "0")
|
||
(set_attr "prefix_rep" "1")
|
||
(set_attr "modrm" "0")])
|
||
@@ -15246,6 +15277,7 @@ (define_insn "return_pop_internal"
|
||
"reload_completed"
|
||
"ret\t%0"
|
||
[(set_attr "length" "3")
|
||
+ (set_attr "atom_unit" "jeu")
|
||
(set_attr "length_immediate" "2")
|
||
(set_attr "modrm" "0")])
|
||
|
||
@@ -16367,6 +16399,7 @@ (define_insn "*rcpsf2_sse"
|
||
"TARGET_SSE_MATH"
|
||
"%vrcpss\t{%1, %d0|%d0, %1}"
|
||
[(set_attr "type" "sse")
|
||
+ (set_attr "atom_sse_attr" "rcp")
|
||
(set_attr "prefix" "maybe_vex")
|
||
(set_attr "mode" "SF")])
|
||
|
||
@@ -16718,6 +16751,7 @@ (define_insn "*rsqrtsf2_sse"
|
||
"TARGET_SSE_MATH"
|
||
"%vrsqrtss\t{%1, %d0|%d0, %1}"
|
||
[(set_attr "type" "sse")
|
||
+ (set_attr "atom_sse_attr" "rcp")
|
||
(set_attr "prefix" "maybe_vex")
|
||
(set_attr "mode" "SF")])
|
||
|
||
@@ -16738,6 +16772,7 @@ (define_insn "*sqrt<mode>2_sse"
|
||
"SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
|
||
"%vsqrts<ssemodefsuffix>\t{%1, %d0|%d0, %1}"
|
||
[(set_attr "type" "sse")
|
||
+ (set_attr "atom_sse_attr" "sqrt")
|
||
(set_attr "prefix" "maybe_vex")
|
||
(set_attr "mode" "<MODE>")
|
||
(set_attr "athlon_decode" "*")
|
||
@@ -19791,6 +19826,7 @@ (define_insn "x86_movdicc_0_m1_rex64"
|
||
; Since we don't have the proper number of operands for an alu insn,
|
||
; fill in all the blanks.
|
||
[(set_attr "type" "alu")
|
||
+ (set_attr "use_carry" "1")
|
||
(set_attr "pent_pair" "pu")
|
||
(set_attr "memory" "none")
|
||
(set_attr "imm_disp" "false")
|
||
@@ -19806,6 +19842,7 @@ (define_insn "*x86_movdicc_0_m1_se"
|
||
""
|
||
"sbb{q}\t%0, %0"
|
||
[(set_attr "type" "alu")
|
||
+ (set_attr "use_carry" "1")
|
||
(set_attr "pent_pair" "pu")
|
||
(set_attr "memory" "none")
|
||
(set_attr "imm_disp" "false")
|
||
@@ -19849,6 +19886,7 @@ (define_insn "x86_movsicc_0_m1"
|
||
; Since we don't have the proper number of operands for an alu insn,
|
||
; fill in all the blanks.
|
||
[(set_attr "type" "alu")
|
||
+ (set_attr "use_carry" "1")
|
||
(set_attr "pent_pair" "pu")
|
||
(set_attr "memory" "none")
|
||
(set_attr "imm_disp" "false")
|
||
@@ -19864,6 +19902,7 @@ (define_insn "*x86_movsicc_0_m1_se"
|
||
""
|
||
"sbb{l}\t%0, %0"
|
||
[(set_attr "type" "alu")
|
||
+ (set_attr "use_carry" "1")
|
||
(set_attr "pent_pair" "pu")
|
||
(set_attr "memory" "none")
|
||
(set_attr "imm_disp" "false")
|
||
@@ -20196,7 +20235,8 @@ (define_insn "pro_epilogue_adjust_stack_
|
||
}
|
||
}
|
||
[(set (attr "type")
|
||
- (cond [(eq_attr "alternative" "0")
|
||
+ (cond [(and (eq_attr "alternative" "0")
|
||
+ (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
|
||
(const_string "alu")
|
||
(match_operand:SI 2 "const0_operand" "")
|
||
(const_string "imov")
|
||
@@ -20239,7 +20279,8 @@ (define_insn "pro_epilogue_adjust_stack_
|
||
}
|
||
}
|
||
[(set (attr "type")
|
||
- (cond [(eq_attr "alternative" "0")
|
||
+ (cond [(and (eq_attr "alternative" "0")
|
||
+ (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
|
||
(const_string "alu")
|
||
(match_operand:DI 2 "const0_operand" "")
|
||
(const_string "imov")
|
||
@@ -21731,6 +21772,7 @@ (define_insn "*prefetch_sse"
|
||
return patterns[locality];
|
||
}
|
||
[(set_attr "type" "sse")
|
||
+ (set_attr "atom_sse_attr" "prefetch")
|
||
(set_attr "memory" "none")])
|
||
|
||
(define_insn "*prefetch_sse_rex"
|
||
@@ -21749,6 +21791,7 @@ (define_insn "*prefetch_sse_rex"
|
||
return patterns[locality];
|
||
}
|
||
[(set_attr "type" "sse")
|
||
+ (set_attr "atom_sse_attr" "prefetch")
|
||
(set_attr "memory" "none")])
|
||
|
||
(define_insn "*prefetch_3dnow"
|
||
--- gcc/config/i386/atom.md (.../trunk) (revision 0)
|
||
+++ gcc/config/i386/atom.md (.../branches/ix86/atom) (revision 144601)
|
||
@@ -0,0 +1,796 @@
|
||
+;; Atom Scheduling
|
||
+;; Copyright (C) 2009 Free Software Foundation, Inc.
|
||
+;;
|
||
+;; This file is part of GCC.
|
||
+;;
|
||
+;; GCC is free software; you can redistribute it and/or modify
|
||
+;; it under the terms of the GNU General Public License as published by
|
||
+;; the Free Software Foundation; either version 2, or (at your option)
|
||
+;; any later version.
|
||
+;;
|
||
+;; GCC is distributed in the hope that it will be useful,
|
||
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
+;; GNU General Public License for more details.
|
||
+;;
|
||
+;; You should have received a copy of the GNU General Public License
|
||
+;; along with GCC; see the file COPYING. If not, write to
|
||
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
|
||
+;; Boston, MA 02110-1301, USA. */
|
||
+;;
|
||
+;; Atom is an in-order core with two integer pipelines.
|
||
+
|
||
+
|
||
+(define_attr "atom_unit" "sishuf,simul,jeu,complex,other"
|
||
+ (const_string "other"))
|
||
+
|
||
+(define_attr "atom_sse_attr" "rcp,movdup,lfence,fence,prefetch,sqrt,mxcsr,other"
|
||
+ (const_string "other"))
|
||
+
|
||
+(define_automaton "atom")
|
||
+
|
||
+;; Atom has two ports: port 0 and port 1 connecting to all execution units
|
||
+(define_cpu_unit "atom-port-0,atom-port-1" "atom")
|
||
+
|
||
+;; EU: Execution Unit
|
||
+;; Atom EUs are connected by port 0 or port 1.
|
||
+
|
||
+(define_cpu_unit "atom-eu-0, atom-eu-1,
|
||
+ atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4"
|
||
+ "atom")
|
||
+
|
||
+;; Some EUs have duplicated copied and can be accessed via either
|
||
+;; port 0 or port 1
|
||
+;; (define_reservation "atom-port-either" "(atom-port-0 | atom-port-1)")
|
||
+
|
||
+;;; Some instructions is dual-pipe execution, need both ports
|
||
+;;; Complex multi-op macro-instructoins need both ports and all EUs
|
||
+(define_reservation "atom-port-dual" "(atom-port-0 + atom-port-1)")
|
||
+(define_reservation "atom-all-eu" "(atom-eu-0 + atom-eu-1 +
|
||
+ atom-imul-1 + atom-imul-2 + atom-imul-3 +
|
||
+ atom-imul-4)")
|
||
+
|
||
+;;; Most of simple instructions have 1 cycle latency. Some of them
|
||
+;;; issue in port 0, some in port 0 and some in either port.
|
||
+(define_reservation "atom-simple-0" "(atom-port-0 + atom-eu-0)")
|
||
+(define_reservation "atom-simple-1" "(atom-port-1 + atom-eu-1)")
|
||
+(define_reservation "atom-simple-either" "(atom-simple-0 | atom-simple-1)")
|
||
+
|
||
+;;; Some insn issues in port 0 with 3 cycle latency and 1 cycle tput
|
||
+(define_reservation "atom-eu-0-3-1" "(atom-port-0 + atom-eu-0, nothing*2)")
|
||
+
|
||
+;;; fmul insn can have 4 or 5 cycles latency
|
||
+(define_reservation "atom-fmul-5c" "(atom-port-0 + atom-eu-0), nothing*4")
|
||
+(define_reservation "atom-fmul-4c" "(atom-port-0 + atom-eu-0), nothing*3")
|
||
+
|
||
+;;; fadd can has 5 cycles latency depends on instruction forms
|
||
+(define_reservation "atom-fadd-5c" "(atom-port-1 + atom-eu-1), nothing*5")
|
||
+
|
||
+;;; imul insn has 5 cycles latency
|
||
+(define_reservation "atom-imul-32"
|
||
+ "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4,
|
||
+ atom-port-0")
|
||
+;;; imul instruction excludes other non-FP instructions.
|
||
+(exclusion_set "atom-eu-0, atom-eu-1"
|
||
+ "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4")
|
||
+
|
||
+;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on
|
||
+;;; instruction forms
|
||
+(define_reservation "atom-dual-1c" "(atom-port-dual + atom-eu-0 + atom-eu-1)")
|
||
+(define_reservation "atom-dual-2c"
|
||
+ "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing)")
|
||
+(define_reservation "atom-dual-5c"
|
||
+ "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing*4)")
|
||
+
|
||
+;;; Complex macro-instruction has variants of latency, and uses both ports.
|
||
+(define_reservation "atom-complex" "(atom-port-dual + atom-all-eu)")
|
||
+
|
||
+(define_insn_reservation "atom_other" 9
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "other")
|
||
+ (eq_attr "atom_unit" "!jeu")))
|
||
+ "atom-complex, atom-all-eu*8")
|
||
+
|
||
+;; return has type "other" with atom_unit "jeu"
|
||
+(define_insn_reservation "atom_other_2" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "other")
|
||
+ (eq_attr "atom_unit" "jeu")))
|
||
+ "atom-dual-1c")
|
||
+
|
||
+(define_insn_reservation "atom_multi" 9
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (eq_attr "type" "multi"))
|
||
+ "atom-complex, atom-all-eu*8")
|
||
+
|
||
+;; Normal alu insns without carry
|
||
+(define_insn_reservation "atom_alu" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "alu")
|
||
+ (and (eq_attr "memory" "none")
|
||
+ (eq_attr "use_carry" "0"))))
|
||
+ "atom-simple-either")
|
||
+
|
||
+;; Normal alu insns without carry
|
||
+(define_insn_reservation "atom_alu_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "alu")
|
||
+ (and (eq_attr "memory" "!none")
|
||
+ (eq_attr "use_carry" "0"))))
|
||
+ "atom-simple-either")
|
||
+
|
||
+;; Alu insn consuming CF, such as add/sbb
|
||
+(define_insn_reservation "atom_alu_carry" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "alu")
|
||
+ (and (eq_attr "memory" "none")
|
||
+ (eq_attr "use_carry" "1"))))
|
||
+ "atom-simple-either")
|
||
+
|
||
+;; Alu insn consuming CF, such as add/sbb
|
||
+(define_insn_reservation "atom_alu_carry_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "alu")
|
||
+ (and (eq_attr "memory" "!none")
|
||
+ (eq_attr "use_carry" "1"))))
|
||
+ "atom-simple-either")
|
||
+
|
||
+(define_insn_reservation "atom_alu1" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "alu1")
|
||
+ (eq_attr "memory" "none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+(define_insn_reservation "atom_alu1_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "alu1")
|
||
+ (eq_attr "memory" "!none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+(define_insn_reservation "atom_negnot" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "negnot")
|
||
+ (eq_attr "memory" "none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+(define_insn_reservation "atom_negnot_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "negnot")
|
||
+ (eq_attr "memory" "!none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+(define_insn_reservation "atom_imov" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "imov")
|
||
+ (eq_attr "memory" "none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+(define_insn_reservation "atom_imov_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "imov")
|
||
+ (eq_attr "memory" "!none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+;; 16<-16, 32<-32
|
||
+(define_insn_reservation "atom_imovx" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "imovx")
|
||
+ (and (eq_attr "memory" "none")
|
||
+ (ior (and (match_operand:HI 0 "register_operand")
|
||
+ (match_operand:HI 1 "general_operand"))
|
||
+ (and (match_operand:SI 0 "register_operand")
|
||
+ (match_operand:SI 1 "general_operand"))))))
|
||
+ "atom-simple-either")
|
||
+
|
||
+;; 16<-16, 32<-32, mem
|
||
+(define_insn_reservation "atom_imovx_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "imovx")
|
||
+ (and (eq_attr "memory" "!none")
|
||
+ (ior (and (match_operand:HI 0 "register_operand")
|
||
+ (match_operand:HI 1 "general_operand"))
|
||
+ (and (match_operand:SI 0 "register_operand")
|
||
+ (match_operand:SI 1 "general_operand"))))))
|
||
+ "atom-simple-either")
|
||
+
|
||
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8
|
||
+(define_insn_reservation "atom_imovx_2" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "imovx")
|
||
+ (and (eq_attr "memory" "none")
|
||
+ (ior (match_operand:QI 0 "register_operand")
|
||
+ (ior (and (match_operand:SI 0 "register_operand")
|
||
+ (not (match_operand:SI 1 "general_operand")))
|
||
+ (match_operand:DI 0 "register_operand"))))))
|
||
+ "atom-simple-0")
|
||
+
|
||
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem
|
||
+(define_insn_reservation "atom_imovx_2_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "imovx")
|
||
+ (and (eq_attr "memory" "!none")
|
||
+ (ior (match_operand:QI 0 "register_operand")
|
||
+ (ior (and (match_operand:SI 0 "register_operand")
|
||
+ (not (match_operand:SI 1 "general_operand")))
|
||
+ (match_operand:DI 0 "register_operand"))))))
|
||
+ "atom-simple-0")
|
||
+
|
||
+;; 16<-8
|
||
+(define_insn_reservation "atom_imovx_3" 3
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "imovx")
|
||
+ (and (match_operand:HI 0 "register_operand")
|
||
+ (match_operand:QI 1 "general_operand"))))
|
||
+ "atom-complex, atom-all-eu*2")
|
||
+
|
||
+(define_insn_reservation "atom_lea" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "lea")
|
||
+ (eq_attr "mode" "!HI")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+;; lea 16bit address is complex insn
|
||
+(define_insn_reservation "atom_lea_2" 2
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "lea")
|
||
+ (eq_attr "mode" "HI")))
|
||
+ "atom-complex, atom-all-eu")
|
||
+
|
||
+(define_insn_reservation "atom_incdec" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "incdec")
|
||
+ (eq_attr "memory" "none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+(define_insn_reservation "atom_incdec_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "incdec")
|
||
+ (eq_attr "memory" "!none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+;; simple shift instruction use SHIFT eu, none memory
|
||
+(define_insn_reservation "atom_ishift" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ishift")
|
||
+ (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0"))))
|
||
+ "atom-simple-0")
|
||
+
|
||
+;; simple shift instruction use SHIFT eu, memory
|
||
+(define_insn_reservation "atom_ishift_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ishift")
|
||
+ (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0"))))
|
||
+ "atom-simple-0")
|
||
+
|
||
+;; DF shift (prefixed with 0f) is complex insn with latency of 7 cycles
|
||
+(define_insn_reservation "atom_ishift_3" 7
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ishift")
|
||
+ (eq_attr "prefix_0f" "1")))
|
||
+ "atom-complex, atom-all-eu*6")
|
||
+
|
||
+(define_insn_reservation "atom_ishift1" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ishift1")
|
||
+ (eq_attr "memory" "none")))
|
||
+ "atom-simple-0")
|
||
+
|
||
+(define_insn_reservation "atom_ishift1_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ishift1")
|
||
+ (eq_attr "memory" "!none")))
|
||
+ "atom-simple-0")
|
||
+
|
||
+(define_insn_reservation "atom_rotate" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "rotate")
|
||
+ (eq_attr "memory" "none")))
|
||
+ "atom-simple-0")
|
||
+
|
||
+(define_insn_reservation "atom_rotate_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "rotate")
|
||
+ (eq_attr "memory" "!none")))
|
||
+ "atom-simple-0")
|
||
+
|
||
+(define_insn_reservation "atom_rotate1" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "rotate1")
|
||
+ (eq_attr "memory" "none")))
|
||
+ "atom-simple-0")
|
||
+
|
||
+(define_insn_reservation "atom_rotate1_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "rotate1")
|
||
+ (eq_attr "memory" "!none")))
|
||
+ "atom-simple-0")
|
||
+
|
||
+(define_insn_reservation "atom_imul" 5
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "imul")
|
||
+ (and (eq_attr "memory" "none") (eq_attr "mode" "SI"))))
|
||
+ "atom-imul-32")
|
||
+
|
||
+(define_insn_reservation "atom_imul_mem" 5
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "imul")
|
||
+ (and (eq_attr "memory" "!none") (eq_attr "mode" "SI"))))
|
||
+ "atom-imul-32")
|
||
+
|
||
+;; latency set to 10 as common 64x64 imul
|
||
+(define_insn_reservation "atom_imul_3" 10
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "imul")
|
||
+ (eq_attr "mode" "!SI")))
|
||
+ "atom-complex, atom-all-eu*9")
|
||
+
|
||
+(define_insn_reservation "atom_idiv" 65
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (eq_attr "type" "idiv"))
|
||
+ "atom-complex, atom-all-eu*32, nothing*32")
|
||
+
|
||
+(define_insn_reservation "atom_icmp" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "icmp")
|
||
+ (eq_attr "memory" "none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+(define_insn_reservation "atom_icmp_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "icmp")
|
||
+ (eq_attr "memory" "!none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+(define_insn_reservation "atom_test" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "test")
|
||
+ (eq_attr "memory" "none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+(define_insn_reservation "atom_test_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "test")
|
||
+ (eq_attr "memory" "!none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+(define_insn_reservation "atom_ibr" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ibr")
|
||
+ (eq_attr "memory" "!load")))
|
||
+ "atom-simple-1")
|
||
+
|
||
+;; complex if jump target is from address
|
||
+(define_insn_reservation "atom_ibr_2" 2
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ibr")
|
||
+ (eq_attr "memory" "load")))
|
||
+ "atom-complex, atom-all-eu")
|
||
+
|
||
+(define_insn_reservation "atom_setcc" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "setcc")
|
||
+ (eq_attr "memory" "!store")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+;; 2 cycles complex if target is in memory
|
||
+(define_insn_reservation "atom_setcc_2" 2
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "setcc")
|
||
+ (eq_attr "memory" "store")))
|
||
+ "atom-complex, atom-all-eu")
|
||
+
|
||
+(define_insn_reservation "atom_icmov" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "icmov")
|
||
+ (eq_attr "memory" "none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+(define_insn_reservation "atom_icmov_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "icmov")
|
||
+ (eq_attr "memory" "!none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+;; UCODE if segreg, ignored
|
||
+(define_insn_reservation "atom_push" 2
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (eq_attr "type" "push"))
|
||
+ "atom-dual-2c")
|
||
+
|
||
+;; pop r64 is 1 cycle. UCODE if segreg, ignored
|
||
+(define_insn_reservation "atom_pop" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "pop")
|
||
+ (eq_attr "mode" "DI")))
|
||
+ "atom-dual-1c")
|
||
+
|
||
+;; pop non-r64 is 2 cycles. UCODE if segreg, ignored
|
||
+(define_insn_reservation "atom_pop_2" 2
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "pop")
|
||
+ (eq_attr "mode" "!DI")))
|
||
+ "atom-dual-2c")
|
||
+
|
||
+;; UCODE if segreg, ignored
|
||
+(define_insn_reservation "atom_call" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (eq_attr "type" "call"))
|
||
+ "atom-dual-1c")
|
||
+
|
||
+(define_insn_reservation "atom_callv" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (eq_attr "type" "callv"))
|
||
+ "atom-dual-1c")
|
||
+
|
||
+(define_insn_reservation "atom_leave" 3
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (eq_attr "type" "leave"))
|
||
+ "atom-complex, atom-all-eu*2")
|
||
+
|
||
+(define_insn_reservation "atom_str" 3
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (eq_attr "type" "str"))
|
||
+ "atom-complex, atom-all-eu*2")
|
||
+
|
||
+(define_insn_reservation "atom_sselog" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sselog")
|
||
+ (eq_attr "memory" "none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+(define_insn_reservation "atom_sselog_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sselog")
|
||
+ (eq_attr "memory" "!none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+(define_insn_reservation "atom_sselog1" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sselog1")
|
||
+ (eq_attr "memory" "none")))
|
||
+ "atom-simple-0")
|
||
+
|
||
+(define_insn_reservation "atom_sselog1_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sselog1")
|
||
+ (eq_attr "memory" "!none")))
|
||
+ "atom-simple-0")
|
||
+
|
||
+;; not pmad, not psad
|
||
+(define_insn_reservation "atom_sseiadd" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sseiadd")
|
||
+ (and (not (match_operand:V2DI 0 "register_operand"))
|
||
+ (and (eq_attr "atom_unit" "!simul")
|
||
+ (eq_attr "atom_unit" "!complex")))))
|
||
+ "atom-simple-either")
|
||
+
|
||
+;; pmad, psad and 64
|
||
+(define_insn_reservation "atom_sseiadd_2" 4
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sseiadd")
|
||
+ (and (not (match_operand:V2DI 0 "register_operand"))
|
||
+ (and (eq_attr "atom_unit" "simul" )
|
||
+ (eq_attr "mode" "DI")))))
|
||
+ "atom-fmul-4c")
|
||
+
|
||
+;; pmad, psad and 128
|
||
+(define_insn_reservation "atom_sseiadd_3" 5
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sseiadd")
|
||
+ (and (not (match_operand:V2DI 0 "register_operand"))
|
||
+ (and (eq_attr "atom_unit" "simul" )
|
||
+ (eq_attr "mode" "TI")))))
|
||
+ "atom-fmul-5c")
|
||
+
|
||
+;; if paddq(64 bit op), phadd/phsub
|
||
+(define_insn_reservation "atom_sseiadd_4" 6
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sseiadd")
|
||
+ (ior (match_operand:V2DI 0 "register_operand")
|
||
+ (eq_attr "atom_unit" "complex"))))
|
||
+ "atom-complex, atom-all-eu*5")
|
||
+
|
||
+;; if immediate op.
|
||
+(define_insn_reservation "atom_sseishft" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sseishft")
|
||
+ (and (eq_attr "atom_unit" "!sishuf")
|
||
+ (match_operand 2 "immediate_operand"))))
|
||
+ "atom-simple-either")
|
||
+
|
||
+;; if palignr or psrldq
|
||
+(define_insn_reservation "atom_sseishft_2" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sseishft")
|
||
+ (and (eq_attr "atom_unit" "sishuf")
|
||
+ (match_operand 2 "immediate_operand"))))
|
||
+ "atom-simple-0")
|
||
+
|
||
+;; if reg/mem op
|
||
+(define_insn_reservation "atom_sseishft_3" 2
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sseishft")
|
||
+ (not (match_operand 2 "immediate_operand"))))
|
||
+ "atom-complex, atom-all-eu")
|
||
+
|
||
+(define_insn_reservation "atom_sseimul" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (eq_attr "type" "sseimul"))
|
||
+ "atom-simple-0")
|
||
+
|
||
+;; rcpss or rsqrtss
|
||
+(define_insn_reservation "atom_sse" 4
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sse")
|
||
+ (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF"))))
|
||
+ "atom-fmul-4c")
|
||
+
|
||
+;; movshdup, movsldup. Suggest to type sseishft
|
||
+(define_insn_reservation "atom_sse_2" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sse")
|
||
+ (eq_attr "atom_sse_attr" "movdup")))
|
||
+ "atom-simple-0")
|
||
+
|
||
+;; lfence
|
||
+(define_insn_reservation "atom_sse_3" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sse")
|
||
+ (eq_attr "atom_sse_attr" "lfence")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+;; sfence,clflush,mfence, prefetch
|
||
+(define_insn_reservation "atom_sse_4" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sse")
|
||
+ (ior (eq_attr "atom_sse_attr" "fence")
|
||
+ (eq_attr "atom_sse_attr" "prefetch"))))
|
||
+ "atom-simple-0")
|
||
+
|
||
+;; rcpps, rsqrtss, sqrt, ldmxcsr
|
||
+(define_insn_reservation "atom_sse_5" 7
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sse")
|
||
+ (ior (ior (eq_attr "atom_sse_attr" "sqrt")
|
||
+ (eq_attr "atom_sse_attr" "mxcsr"))
|
||
+ (and (eq_attr "atom_sse_attr" "rcp")
|
||
+ (eq_attr "mode" "V4SF")))))
|
||
+ "atom-complex, atom-all-eu*6")
|
||
+
|
||
+;; xmm->xmm
|
||
+(define_insn_reservation "atom_ssemov" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ssemov")
|
||
+ (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "xy"))))
|
||
+ "atom-simple-either")
|
||
+
|
||
+;; reg->xmm
|
||
+(define_insn_reservation "atom_ssemov_2" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ssemov")
|
||
+ (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "r"))))
|
||
+ "atom-simple-0")
|
||
+
|
||
+;; xmm->reg
|
||
+(define_insn_reservation "atom_ssemov_3" 3
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ssemov")
|
||
+ (and (match_operand 0 "register_operand" "r") (match_operand 1 "register_operand" "xy"))))
|
||
+ "atom-eu-0-3-1")
|
||
+
|
||
+;; mov mem
|
||
+(define_insn_reservation "atom_ssemov_4" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ssemov")
|
||
+ (and (eq_attr "movu" "0") (eq_attr "memory" "!none"))))
|
||
+ "atom-simple-0")
|
||
+
|
||
+;; movu mem
|
||
+(define_insn_reservation "atom_ssemov_5" 2
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ssemov")
|
||
+ (ior (eq_attr "movu" "1") (eq_attr "memory" "!none"))))
|
||
+ "atom-complex, atom-all-eu")
|
||
+
|
||
+;; no memory simple
|
||
+(define_insn_reservation "atom_sseadd" 5
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sseadd")
|
||
+ (and (eq_attr "memory" "none")
|
||
+ (and (eq_attr "mode" "!V2DF")
|
||
+ (eq_attr "atom_unit" "!complex")))))
|
||
+ "atom-fadd-5c")
|
||
+
|
||
+;; memory simple
|
||
+(define_insn_reservation "atom_sseadd_mem" 5
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sseadd")
|
||
+ (and (eq_attr "memory" "!none")
|
||
+ (and (eq_attr "mode" "!V2DF")
|
||
+ (eq_attr "atom_unit" "!complex")))))
|
||
+ "atom-dual-5c")
|
||
+
|
||
+;; maxps, minps, *pd, hadd, hsub
|
||
+(define_insn_reservation "atom_sseadd_3" 8
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sseadd")
|
||
+ (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex"))))
|
||
+ "atom-complex, atom-all-eu*7")
|
||
+
|
||
+;; Except dppd/dpps
|
||
+(define_insn_reservation "atom_ssemul" 5
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ssemul")
|
||
+ (eq_attr "mode" "!SF")))
|
||
+ "atom-fmul-5c")
|
||
+
|
||
+;; Except dppd/dpps, 4 cycle if mulss
|
||
+(define_insn_reservation "atom_ssemul_2" 4
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ssemul")
|
||
+ (eq_attr "mode" "SF")))
|
||
+ "atom-fmul-4c")
|
||
+
|
||
+(define_insn_reservation "atom_ssecmp" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (eq_attr "type" "ssecmp"))
|
||
+ "atom-simple-either")
|
||
+
|
||
+(define_insn_reservation "atom_ssecomi" 10
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (eq_attr "type" "ssecomi"))
|
||
+ "atom-complex, atom-all-eu*9")
|
||
+
|
||
+;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi
|
||
+(define_insn_reservation "atom_ssecvt" 5
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ssecvt")
|
||
+ (ior (and (match_operand:V2SI 0 "register_operand")
|
||
+ (match_operand:V4SF 1 "register_operand"))
|
||
+ (and (match_operand:V4SF 0 "register_operand")
|
||
+ (match_operand:V2SI 1 "register_operand")))))
|
||
+ "atom-fadd-5c")
|
||
+
|
||
+;; memory and cvtpi2ps, cvtps2pi, cvttps2pi
|
||
+(define_insn_reservation "atom_ssecvt_2" 5
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ssecvt")
|
||
+ (ior (and (match_operand:V2SI 0 "register_operand")
|
||
+ (match_operand:V4SF 1 "memory_operand"))
|
||
+ (and (match_operand:V4SF 0 "register_operand")
|
||
+ (match_operand:V2SI 1 "memory_operand")))))
|
||
+ "atom-dual-5c")
|
||
+
|
||
+;; otherwise. 7 cycles average for cvtss2sd
|
||
+(define_insn_reservation "atom_ssecvt_3" 7
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "ssecvt")
|
||
+ (not (ior (and (match_operand:V2SI 0 "register_operand")
|
||
+ (match_operand:V4SF 1 "nonimmediate_operand"))
|
||
+ (and (match_operand:V4SF 0 "register_operand")
|
||
+ (match_operand:V2SI 1 "nonimmediate_operand"))))))
|
||
+ "atom-complex, atom-all-eu*6")
|
||
+
|
||
+;; memory and cvtsi2sd
|
||
+(define_insn_reservation "atom_sseicvt" 5
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sseicvt")
|
||
+ (and (match_operand:V2DF 0 "register_operand")
|
||
+ (match_operand:SI 1 "memory_operand"))))
|
||
+ "atom-dual-5c")
|
||
+
|
||
+;; otherwise. 8 cycles average for cvtsd2si
|
||
+(define_insn_reservation "atom_sseicvt_2" 8
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "sseicvt")
|
||
+ (not (and (match_operand:V2DF 0 "register_operand")
|
||
+ (match_operand:SI 1 "memory_operand")))))
|
||
+ "atom-complex, atom-all-eu*7")
|
||
+
|
||
+(define_insn_reservation "atom_ssediv" 62
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (eq_attr "type" "ssediv"))
|
||
+ "atom-complex, atom-all-eu*12, nothing*49")
|
||
+
|
||
+;; simple for fmov
|
||
+(define_insn_reservation "atom_fmov" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "fmov")
|
||
+ (eq_attr "memory" "none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+;; simple for fmov
|
||
+(define_insn_reservation "atom_fmov_mem" 1
|
||
+ (and (eq_attr "cpu" "atom")
|
||
+ (and (eq_attr "type" "fmov")
|
||
+ (eq_attr "memory" "!none")))
|
||
+ "atom-simple-either")
|
||
+
|
||
+;; Define bypass here
|
||
+
|
||
+;; There will be no stall from lea to non-mem EX insns
|
||
+(define_bypass 0 "atom_lea"
|
||
+ "atom_alu_carry,
|
||
+ atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
|
||
+ atom_incdec, atom_setcc, atom_icmov, atom_pop")
|
||
+
|
||
+(define_bypass 0 "atom_lea"
|
||
+ "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
|
||
+ atom_imovx_mem, atom_imovx_2_mem,
|
||
+ atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
|
||
+ "!ix86_agi_dependent")
|
||
+
|
||
+;; There will be 3 cycles stall from EX insns to AGAN insns LEA
|
||
+(define_bypass 4 "atom_alu_carry,
|
||
+ atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
|
||
+ atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
|
||
+ atom_rotate1, atom_setcc, atom_icmov, atom_pop,
|
||
+ atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
|
||
+ atom_imovx_mem, atom_imovx_2_mem,
|
||
+ atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
|
||
+ "atom_lea")
|
||
+
|
||
+;; There will be 3 cycles stall from EX insns to insns need addr calculation
|
||
+(define_bypass 4 "atom_alu_carry,
|
||
+ atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
|
||
+ atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
|
||
+ atom_rotate1, atom_setcc, atom_icmov, atom_pop,
|
||
+ atom_imovx_mem, atom_imovx_2_mem,
|
||
+ atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
|
||
+ atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
|
||
+ "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
|
||
+ atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
|
||
+ atom_imovx_mem, atom_imovx_2_mem,
|
||
+ atom_imul_mem, atom_icmp_mem,
|
||
+ atom_test_mem, atom_icmov_mem, atom_sselog_mem,
|
||
+ atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem,
|
||
+ atom_ishift_mem, atom_ishift1_mem,
|
||
+ atom_rotate_mem, atom_rotate1_mem"
|
||
+ "ix86_agi_dependent")
|
||
+
|
||
+;; Stall from imul to lea is 8 cycles.
|
||
+(define_bypass 9 "atom_imul, atom_imul_mem" "atom_lea")
|
||
+
|
||
+;; Stall from imul to memory address is 8 cycles.
|
||
+(define_bypass 9 "atom_imul, atom_imul_mem"
|
||
+ "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
|
||
+ atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
|
||
+ atom_ishift_mem, atom_ishift1_mem, atom_rotate_mem,
|
||
+ atom_rotate1_mem, atom_imul_mem, atom_icmp_mem,
|
||
+ atom_test_mem, atom_icmov_mem, atom_sselog_mem,
|
||
+ atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem"
|
||
+ "ix86_agi_dependent")
|
||
+
|
||
+;; There will be 0 cycle stall from cmp/test to jcc
|
||
+
|
||
+;; There will be 1 cycle stall from flag producer to cmov and adc/sbb
|
||
+(define_bypass 2 "atom_icmp, atom_test, atom_alu, atom_alu_carry,
|
||
+ atom_alu1, atom_negnot, atom_incdec, atom_ishift,
|
||
+ atom_ishift1, atom_rotate, atom_rotate1"
|
||
+ "atom_icmov, atom_alu_carry")
|
||
+
|
||
+;; lea to shift count stall is 2 cycles
|
||
+(define_bypass 3 "atom_lea"
|
||
+ "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
|
||
+ atom_ishift_mem, atom_ishift1_mem,
|
||
+ atom_rotate_mem, atom_rotate1_mem"
|
||
+ "ix86_dep_by_shift_count")
|
||
+
|
||
+;; lea to shift source stall is 1 cycle
|
||
+(define_bypass 2 "atom_lea"
|
||
+ "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1"
|
||
+ "!ix86_dep_by_shift_count")
|
||
+
|
||
+;; non-lea to shift count stall is 1 cycle
|
||
+(define_bypass 2 "atom_alu_carry,
|
||
+ atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
|
||
+ atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
|
||
+ atom_rotate1, atom_setcc, atom_icmov, atom_pop,
|
||
+ atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
|
||
+ atom_imovx_mem, atom_imovx_2_mem,
|
||
+ atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
|
||
+ "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
|
||
+ atom_ishift_mem, atom_ishift1_mem,
|
||
+ atom_rotate_mem, atom_rotate1_mem"
|
||
+ "ix86_dep_by_shift_count")
|
||
--- gcc/config/i386/sse.md (.../trunk) (revision 144460)
|
||
+++ gcc/config/i386/sse.md (.../branches/ix86/atom) (revision 144601)
|
||
@@ -338,6 +338,7 @@ (define_insn "avx_movup<avxmodesuffixf2c
|
||
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
|
||
"vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
|
||
[(set_attr "type" "ssemov")
|
||
+ (set_attr "movu" "1")
|
||
(set_attr "prefix" "vex")
|
||
(set_attr "mode" "<MODE>")])
|
||
|
||
@@ -363,6 +364,7 @@ (define_insn "<sse>_movup<ssemodesuffixf
|
||
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
|
||
"movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
|
||
[(set_attr "type" "ssemov")
|
||
+ (set_attr "movu" "1")
|
||
(set_attr "mode" "<MODE>")])
|
||
|
||
(define_insn "avx_movdqu<avxmodesuffix>"
|
||
@@ -373,6 +375,7 @@ (define_insn "avx_movdqu<avxmodesuffix>"
|
||
"TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
|
||
"vmovdqu\t{%1, %0|%0, %1}"
|
||
[(set_attr "type" "ssemov")
|
||
+ (set_attr "movu" "1")
|
||
(set_attr "prefix" "vex")
|
||
(set_attr "mode" "<avxvecmode>")])
|
||
|
||
@@ -383,6 +386,7 @@ (define_insn "sse2_movdqu"
|
||
"TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
|
||
"movdqu\t{%1, %0|%0, %1}"
|
||
[(set_attr "type" "ssemov")
|
||
+ (set_attr "movu" "1")
|
||
(set_attr "prefix_data16" "1")
|
||
(set_attr "mode" "TI")])
|
||
|
||
@@ -424,7 +428,7 @@ (define_insn "sse2_movntv2di"
|
||
UNSPEC_MOVNT))]
|
||
"TARGET_SSE2"
|
||
"movntdq\t{%1, %0|%0, %1}"
|
||
- [(set_attr "type" "ssecvt")
|
||
+ [(set_attr "type" "ssemov")
|
||
(set_attr "prefix_data16" "1")
|
||
(set_attr "mode" "TI")])
|
||
|
||
@@ -434,7 +438,7 @@ (define_insn "sse2_movntsi"
|
||
UNSPEC_MOVNT))]
|
||
"TARGET_SSE2"
|
||
"movnti\t{%1, %0|%0, %1}"
|
||
- [(set_attr "type" "ssecvt")
|
||
+ [(set_attr "type" "ssemov")
|
||
(set_attr "mode" "V2DF")])
|
||
|
||
(define_insn "avx_lddqu<avxmodesuffix>"
|
||
@@ -445,6 +449,7 @@ (define_insn "avx_lddqu<avxmodesuffix>"
|
||
"TARGET_AVX"
|
||
"vlddqu\t{%1, %0|%0, %1}"
|
||
[(set_attr "type" "ssecvt")
|
||
+ (set_attr "movu" "1")
|
||
(set_attr "prefix" "vex")
|
||
(set_attr "mode" "<avxvecmode>")])
|
||
|
||
@@ -454,7 +459,8 @@ (define_insn "sse3_lddqu"
|
||
UNSPEC_LDDQU))]
|
||
"TARGET_SSE3"
|
||
"lddqu\t{%1, %0|%0, %1}"
|
||
- [(set_attr "type" "ssecvt")
|
||
+ [(set_attr "type" "ssemov")
|
||
+ (set_attr "movu" "1")
|
||
(set_attr "prefix_rep" "1")
|
||
(set_attr "mode" "TI")])
|
||
|
||
@@ -761,6 +767,7 @@ (define_insn "sse_rcpv4sf2"
|
||
"TARGET_SSE"
|
||
"%vrcpps\t{%1, %0|%0, %1}"
|
||
[(set_attr "type" "sse")
|
||
+ (set_attr "atom_sse_attr" "rcp")
|
||
(set_attr "prefix" "maybe_vex")
|
||
(set_attr "mode" "V4SF")])
|
||
|
||
@@ -787,6 +794,7 @@ (define_insn "sse_vmrcpv4sf2"
|
||
"TARGET_SSE"
|
||
"rcpss\t{%1, %0|%0, %1}"
|
||
[(set_attr "type" "sse")
|
||
+ (set_attr "atom_sse_attr" "rcp")
|
||
(set_attr "mode" "SF")])
|
||
|
||
(define_expand "sqrtv8sf2"
|
||
@@ -832,6 +840,7 @@ (define_insn "sse_sqrtv4sf2"
|
||
"TARGET_SSE"
|
||
"%vsqrtps\t{%1, %0|%0, %1}"
|
||
[(set_attr "type" "sse")
|
||
+ (set_attr "atom_sse_attr" "sqrt")
|
||
(set_attr "prefix" "maybe_vex")
|
||
(set_attr "mode" "V4SF")])
|
||
|
||
@@ -876,6 +885,7 @@ (define_insn "<sse>_vmsqrt<mode>2"
|
||
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
|
||
"sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
|
||
[(set_attr "type" "sse")
|
||
+ (set_attr "atom_sse_attr" "sqrt")
|
||
(set_attr "mode" "<ssescalarmode>")])
|
||
|
||
(define_expand "rsqrtv8sf2"
|
||
@@ -1039,7 +1049,7 @@ (define_insn "<sse>_vm<code><mode>3"
|
||
(const_int 1)))]
|
||
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
|
||
"<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
|
||
- [(set_attr "type" "sse")
|
||
+ [(set_attr "type" "sseadd")
|
||
(set_attr "mode" "<ssescalarmode>")])
|
||
|
||
;; These versions of the min/max patterns implement exactly the operations
|
||
@@ -1175,6 +1185,7 @@ (define_insn "sse3_addsubv2df3"
|
||
"TARGET_SSE3"
|
||
"addsubpd\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseadd")
|
||
+ (set_attr "atom_unit" "complex")
|
||
(set_attr "mode" "V2DF")])
|
||
|
||
(define_insn "avx_h<plusminus_insn>v4df3"
|
||
@@ -1298,6 +1309,7 @@ (define_insn "sse3_h<plusminus_insn>v4sf
|
||
"TARGET_SSE3"
|
||
"h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseadd")
|
||
+ (set_attr "atom_unit" "complex")
|
||
(set_attr "prefix_rep" "1")
|
||
(set_attr "mode" "V4SF")])
|
||
|
||
@@ -5066,6 +5078,7 @@ (define_insn "*sse2_pmaddwd"
|
||
"TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
|
||
"pmaddwd\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseiadd")
|
||
+ (set_attr "atom_unit" "simul")
|
||
(set_attr "prefix_data16" "1")
|
||
(set_attr "mode" "TI")])
|
||
|
||
@@ -7025,6 +7038,7 @@ (define_insn "*vec_extractv2di_1_rex64"
|
||
movq\t{%H1, %0|%0, %H1}
|
||
mov{q}\t{%H1, %0|%0, %H1}"
|
||
[(set_attr "type" "ssemov,sseishft,ssemov,imov")
|
||
+ (set_attr "atom_unit" "*,sishuf,*,*")
|
||
(set_attr "memory" "*,none,*,*")
|
||
(set_attr "mode" "V2SF,TI,TI,DI")])
|
||
|
||
@@ -7057,6 +7071,7 @@ (define_insn "*vec_extractv2di_1_sse2"
|
||
psrldq\t{$8, %0|%0, 8}
|
||
movq\t{%H1, %0|%0, %H1}"
|
||
[(set_attr "type" "ssemov,sseishft,ssemov")
|
||
+ (set_attr "atom_unit" "*,sishuf,*")
|
||
(set_attr "memory" "*,none,*")
|
||
(set_attr "mode" "V2SF,TI,TI")])
|
||
|
||
@@ -7614,6 +7629,7 @@ (define_insn "sse2_psadbw"
|
||
"TARGET_SSE2"
|
||
"psadbw\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseiadd")
|
||
+ (set_attr "atom_unit" "simul")
|
||
(set_attr "prefix_data16" "1")
|
||
(set_attr "mode" "TI")])
|
||
|
||
@@ -7635,7 +7651,7 @@ (define_insn "<sse>_movmskp<ssemodesuffi
|
||
UNSPEC_MOVMSK))]
|
||
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
|
||
"%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
|
||
- [(set_attr "type" "ssecvt")
|
||
+ [(set_attr "type" "ssemov")
|
||
(set_attr "prefix" "maybe_vex")
|
||
(set_attr "mode" "<MODE>")])
|
||
|
||
@@ -7645,7 +7661,7 @@ (define_insn "sse2_pmovmskb"
|
||
UNSPEC_MOVMSK))]
|
||
"TARGET_SSE2"
|
||
"%vpmovmskb\t{%1, %0|%0, %1}"
|
||
- [(set_attr "type" "ssecvt")
|
||
+ [(set_attr "type" "ssemov")
|
||
(set_attr "prefix_data16" "1")
|
||
(set_attr "prefix" "maybe_vex")
|
||
(set_attr "mode" "SI")])
|
||
@@ -7668,7 +7684,7 @@ (define_insn "*sse2_maskmovdqu"
|
||
"TARGET_SSE2 && !TARGET_64BIT"
|
||
;; @@@ check ordering of operands in intel/nonintel syntax
|
||
"%vmaskmovdqu\t{%2, %1|%1, %2}"
|
||
- [(set_attr "type" "ssecvt")
|
||
+ [(set_attr "type" "ssemov")
|
||
(set_attr "prefix_data16" "1")
|
||
(set_attr "prefix" "maybe_vex")
|
||
(set_attr "mode" "TI")])
|
||
@@ -7682,7 +7698,7 @@ (define_insn "*sse2_maskmovdqu_rex64"
|
||
"TARGET_SSE2 && TARGET_64BIT"
|
||
;; @@@ check ordering of operands in intel/nonintel syntax
|
||
"%vmaskmovdqu\t{%2, %1|%1, %2}"
|
||
- [(set_attr "type" "ssecvt")
|
||
+ [(set_attr "type" "ssemov")
|
||
(set_attr "prefix_data16" "1")
|
||
(set_attr "prefix" "maybe_vex")
|
||
(set_attr "mode" "TI")])
|
||
@@ -7693,6 +7709,7 @@ (define_insn "sse_ldmxcsr"
|
||
"TARGET_SSE"
|
||
"%vldmxcsr\t%0"
|
||
[(set_attr "type" "sse")
|
||
+ (set_attr "atom_sse_attr" "mxcsr")
|
||
(set_attr "prefix" "maybe_vex")
|
||
(set_attr "memory" "load")])
|
||
|
||
@@ -7702,6 +7719,7 @@ (define_insn "sse_stmxcsr"
|
||
"TARGET_SSE"
|
||
"%vstmxcsr\t%0"
|
||
[(set_attr "type" "sse")
|
||
+ (set_attr "atom_sse_attr" "mxcsr")
|
||
(set_attr "prefix" "maybe_vex")
|
||
(set_attr "memory" "store")])
|
||
|
||
@@ -7720,6 +7738,7 @@ (define_insn "*sse_sfence"
|
||
"TARGET_SSE || TARGET_3DNOW_A"
|
||
"sfence"
|
||
[(set_attr "type" "sse")
|
||
+ (set_attr "atom_sse_attr" "fence")
|
||
(set_attr "memory" "unknown")])
|
||
|
||
(define_insn "sse2_clflush"
|
||
@@ -7728,6 +7747,7 @@ (define_insn "sse2_clflush"
|
||
"TARGET_SSE2"
|
||
"clflush\t%a0"
|
||
[(set_attr "type" "sse")
|
||
+ (set_attr "atom_sse_attr" "fence")
|
||
(set_attr "memory" "unknown")])
|
||
|
||
(define_expand "sse2_mfence"
|
||
@@ -7745,6 +7765,7 @@ (define_insn "*sse2_mfence"
|
||
"TARGET_64BIT || TARGET_SSE2"
|
||
"mfence"
|
||
[(set_attr "type" "sse")
|
||
+ (set_attr "atom_sse_attr" "fence")
|
||
(set_attr "memory" "unknown")])
|
||
|
||
(define_expand "sse2_lfence"
|
||
@@ -7762,6 +7783,7 @@ (define_insn "*sse2_lfence"
|
||
"TARGET_SSE2"
|
||
"lfence"
|
||
[(set_attr "type" "sse")
|
||
+ (set_attr "atom_sse_attr" "lfence")
|
||
(set_attr "memory" "unknown")])
|
||
|
||
(define_insn "sse3_mwait"
|
||
@@ -7885,6 +7907,7 @@ (define_insn "ssse3_phaddwv8hi3"
|
||
"TARGET_SSSE3"
|
||
"phaddw\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseiadd")
|
||
+ (set_attr "atom_unit" "complex")
|
||
(set_attr "prefix_data16" "1")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "mode" "TI")])
|
||
@@ -7913,6 +7936,7 @@ (define_insn "ssse3_phaddwv4hi3"
|
||
"TARGET_SSSE3"
|
||
"phaddw\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseiadd")
|
||
+ (set_attr "atom_unit" "complex")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "mode" "DI")])
|
||
|
||
@@ -7967,6 +7991,7 @@ (define_insn "ssse3_phadddv4si3"
|
||
"TARGET_SSSE3"
|
||
"phaddd\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseiadd")
|
||
+ (set_attr "atom_unit" "complex")
|
||
(set_attr "prefix_data16" "1")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "mode" "TI")])
|
||
@@ -7987,6 +8012,7 @@ (define_insn "ssse3_phadddv2si3"
|
||
"TARGET_SSSE3"
|
||
"phaddd\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseiadd")
|
||
+ (set_attr "atom_unit" "complex")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "mode" "DI")])
|
||
|
||
@@ -8073,6 +8099,7 @@ (define_insn "ssse3_phaddswv8hi3"
|
||
"TARGET_SSSE3"
|
||
"phaddsw\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseiadd")
|
||
+ (set_attr "atom_unit" "complex")
|
||
(set_attr "prefix_data16" "1")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "mode" "TI")])
|
||
@@ -8101,6 +8128,7 @@ (define_insn "ssse3_phaddswv4hi3"
|
||
"TARGET_SSSE3"
|
||
"phaddsw\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseiadd")
|
||
+ (set_attr "atom_unit" "complex")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "mode" "DI")])
|
||
|
||
@@ -8187,6 +8215,7 @@ (define_insn "ssse3_phsubwv8hi3"
|
||
"TARGET_SSSE3"
|
||
"phsubw\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseiadd")
|
||
+ (set_attr "atom_unit" "complex")
|
||
(set_attr "prefix_data16" "1")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "mode" "TI")])
|
||
@@ -8215,6 +8244,7 @@ (define_insn "ssse3_phsubwv4hi3"
|
||
"TARGET_SSSE3"
|
||
"phsubw\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseiadd")
|
||
+ (set_attr "atom_unit" "complex")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "mode" "DI")])
|
||
|
||
@@ -8269,6 +8299,7 @@ (define_insn "ssse3_phsubdv4si3"
|
||
"TARGET_SSSE3"
|
||
"phsubd\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseiadd")
|
||
+ (set_attr "atom_unit" "complex")
|
||
(set_attr "prefix_data16" "1")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "mode" "TI")])
|
||
@@ -8289,6 +8320,7 @@ (define_insn "ssse3_phsubdv2si3"
|
||
"TARGET_SSSE3"
|
||
"phsubd\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseiadd")
|
||
+ (set_attr "atom_unit" "complex")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "mode" "DI")])
|
||
|
||
@@ -8375,6 +8407,7 @@ (define_insn "ssse3_phsubswv8hi3"
|
||
"TARGET_SSSE3"
|
||
"phsubsw\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseiadd")
|
||
+ (set_attr "atom_unit" "complex")
|
||
(set_attr "prefix_data16" "1")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "mode" "TI")])
|
||
@@ -8403,6 +8436,7 @@ (define_insn "ssse3_phsubswv4hi3"
|
||
"TARGET_SSSE3"
|
||
"phsubsw\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseiadd")
|
||
+ (set_attr "atom_unit" "complex")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "mode" "DI")])
|
||
|
||
@@ -8509,6 +8543,7 @@ (define_insn "ssse3_pmaddubsw128"
|
||
"TARGET_SSSE3"
|
||
"pmaddubsw\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseiadd")
|
||
+ (set_attr "atom_unit" "simul")
|
||
(set_attr "prefix_data16" "1")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "mode" "TI")])
|
||
@@ -8547,6 +8582,7 @@ (define_insn "ssse3_pmaddubsw"
|
||
"TARGET_SSSE3"
|
||
"pmaddubsw\t{%2, %0|%0, %2}"
|
||
[(set_attr "type" "sseiadd")
|
||
+ (set_attr "atom_unit" "simul")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "mode" "DI")])
|
||
|
||
@@ -8754,6 +8790,7 @@ (define_insn "ssse3_palignrti"
|
||
return "palignr\t{%3, %2, %0|%0, %2, %3}";
|
||
}
|
||
[(set_attr "type" "sseishft")
|
||
+ (set_attr "atom_unit" "sishuf")
|
||
(set_attr "prefix_data16" "1")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "mode" "TI")])
|
||
@@ -8770,6 +8807,7 @@ (define_insn "ssse3_palignrdi"
|
||
return "palignr\t{%3, %2, %0|%0, %2, %3}";
|
||
}
|
||
[(set_attr "type" "sseishft")
|
||
+ (set_attr "atom_unit" "sishuf")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "mode" "DI")])
|
||
|
||
@@ -8956,7 +8994,7 @@ (define_insn "sse4_1_movntdqa"
|
||
UNSPEC_MOVNTDQA))]
|
||
"TARGET_SSE4_1"
|
||
"%vmovntdqa\t{%1, %0|%0, %1}"
|
||
- [(set_attr "type" "ssecvt")
|
||
+ [(set_attr "type" "ssemov")
|
||
(set_attr "prefix_extra" "1")
|
||
(set_attr "prefix" "maybe_vex")
|
||
(set_attr "mode" "TI")])
|
||
--- gcc/config/i386/i386-c.c (.../trunk) (revision 144460)
|
||
+++ gcc/config/i386/i386-c.c (.../branches/ix86/atom) (revision 144601)
|
||
@@ -119,6 +119,10 @@ ix86_target_macros_internal (int isa_fla
|
||
def_or_undef (parse_in, "__core2");
|
||
def_or_undef (parse_in, "__core2__");
|
||
break;
|
||
+ case PROCESSOR_ATOM:
|
||
+ def_or_undef (parse_in, "__atom");
|
||
+ def_or_undef (parse_in, "__atom__");
|
||
+ break;
|
||
/* use PROCESSOR_max to not set/unset the arch macro. */
|
||
case PROCESSOR_max:
|
||
break;
|
||
@@ -187,6 +191,9 @@ ix86_target_macros_internal (int isa_fla
|
||
case PROCESSOR_CORE2:
|
||
def_or_undef (parse_in, "__tune_core2__");
|
||
break;
|
||
+ case PROCESSOR_ATOM:
|
||
+ def_or_undef (parse_in, "__tune_atom__");
|
||
+ break;
|
||
case PROCESSOR_GENERIC32:
|
||
case PROCESSOR_GENERIC64:
|
||
break;
|
||
--- gcc/config/i386/i386-protos.h (.../trunk) (revision 144460)
|
||
+++ gcc/config/i386/i386-protos.h (.../branches/ix86/atom) (revision 144601)
|
||
@@ -85,6 +85,9 @@ extern void ix86_fixup_binary_operands_n
|
||
extern void ix86_expand_binary_operator (enum rtx_code,
|
||
enum machine_mode, rtx[]);
|
||
extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
|
||
+extern bool ix86_lea_for_add_ok (enum rtx_code, rtx, rtx[]);
|
||
+extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn);
|
||
+extern bool ix86_agi_dependent (rtx set_insn, rtx use_insn);
|
||
extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
|
||
rtx[]);
|
||
extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
|
||
--- gcc/config/i386/i386.c (.../trunk) (revision 144460)
|
||
+++ gcc/config/i386/i386.c (.../branches/ix86/atom) (revision 144601)
|
||
@@ -1036,6 +1036,79 @@ struct processor_costs core2_cost = {
|
||
1, /* cond_not_taken_branch_cost. */
|
||
};
|
||
|
||
+static const
|
||
+struct processor_costs atom_cost = {
|
||
+ COSTS_N_INSNS (1), /* cost of an add instruction */
|
||
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
|
||
+ COSTS_N_INSNS (1), /* variable shift costs */
|
||
+ COSTS_N_INSNS (1), /* constant shift costs */
|
||
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
|
||
+ COSTS_N_INSNS (4), /* HI */
|
||
+ COSTS_N_INSNS (3), /* SI */
|
||
+ COSTS_N_INSNS (4), /* DI */
|
||
+ COSTS_N_INSNS (2)}, /* other */
|
||
+ 0, /* cost of multiply per each bit set */
|
||
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
|
||
+ COSTS_N_INSNS (26), /* HI */
|
||
+ COSTS_N_INSNS (42), /* SI */
|
||
+ COSTS_N_INSNS (74), /* DI */
|
||
+ COSTS_N_INSNS (74)}, /* other */
|
||
+ COSTS_N_INSNS (1), /* cost of movsx */
|
||
+ COSTS_N_INSNS (1), /* cost of movzx */
|
||
+ 8, /* "large" insn */
|
||
+ 17, /* MOVE_RATIO */
|
||
+ 2, /* cost for loading QImode using movzbl */
|
||
+ {4, 4, 4}, /* cost of loading integer registers
|
||
+ in QImode, HImode and SImode.
|
||
+ Relative to reg-reg move (2). */
|
||
+ {4, 4, 4}, /* cost of storing integer registers */
|
||
+ 4, /* cost of reg,reg fld/fst */
|
||
+ {12, 12, 12}, /* cost of loading fp registers
|
||
+ in SFmode, DFmode and XFmode */
|
||
+ {6, 6, 8}, /* cost of storing fp registers
|
||
+ in SFmode, DFmode and XFmode */
|
||
+ 2, /* cost of moving MMX register */
|
||
+ {8, 8}, /* cost of loading MMX registers
|
||
+ in SImode and DImode */
|
||
+ {8, 8}, /* cost of storing MMX registers
|
||
+ in SImode and DImode */
|
||
+ 2, /* cost of moving SSE register */
|
||
+ {8, 8, 8}, /* cost of loading SSE registers
|
||
+ in SImode, DImode and TImode */
|
||
+ {8, 8, 8}, /* cost of storing SSE registers
|
||
+ in SImode, DImode and TImode */
|
||
+ 5, /* MMX or SSE register to integer */
|
||
+ 32, /* size of l1 cache. */
|
||
+ 256, /* size of l2 cache. */
|
||
+ 64, /* size of prefetch block */
|
||
+ 6, /* number of parallel prefetches */
|
||
+ 3, /* Branch cost */
|
||
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
|
||
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */
|
||
+ COSTS_N_INSNS (20), /* cost of FDIV instruction. */
|
||
+ COSTS_N_INSNS (8), /* cost of FABS instruction. */
|
||
+ COSTS_N_INSNS (8), /* cost of FCHS instruction. */
|
||
+ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
|
||
+ {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
|
||
+ {libcall, {{32, loop}, {64, rep_prefix_4_byte},
|
||
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
|
||
+ {{libcall, {{8, loop}, {15, unrolled_loop},
|
||
+ {2048, rep_prefix_4_byte}, {-1, libcall}}},
|
||
+ {libcall, {{24, loop}, {32, unrolled_loop},
|
||
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
|
||
+ 1, /* scalar_stmt_cost. */
|
||
+ 1, /* scalar load_cost. */
|
||
+ 1, /* scalar_store_cost. */
|
||
+ 1, /* vec_stmt_cost. */
|
||
+ 1, /* vec_to_scalar_cost. */
|
||
+ 1, /* scalar_to_vec_cost. */
|
||
+ 1, /* vec_align_load_cost. */
|
||
+ 2, /* vec_unalign_load_cost. */
|
||
+ 1, /* vec_store_cost. */
|
||
+ 3, /* cond_taken_branch_cost. */
|
||
+ 1, /* cond_not_taken_branch_cost. */
|
||
+};
|
||
+
|
||
/* Generic64 should produce code tuned for Nocona and K8. */
|
||
static const
|
||
struct processor_costs generic64_cost = {
|
||
@@ -1194,6 +1267,7 @@ const struct processor_costs *ix86_cost
|
||
#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
|
||
#define m_NOCONA (1<<PROCESSOR_NOCONA)
|
||
#define m_CORE2 (1<<PROCESSOR_CORE2)
|
||
+#define m_ATOM (1<<PROCESSOR_ATOM)
|
||
|
||
#define m_GEODE (1<<PROCESSOR_GEODE)
|
||
#define m_K6 (1<<PROCESSOR_K6)
|
||
@@ -1231,10 +1305,11 @@ static unsigned int initial_ix86_tune_fe
|
||
m_486 | m_PENT,
|
||
|
||
/* X86_TUNE_UNROLL_STRLEN */
|
||
- m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
|
||
+ m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
|
||
+ | m_CORE2 | m_GENERIC,
|
||
|
||
/* X86_TUNE_DEEP_BRANCH_PREDICTION */
|
||
- m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
|
||
+ m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
|
||
|
||
/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
|
||
on simulation result. But after P4 was made, no performance benefit
|
||
@@ -1246,12 +1321,12 @@ static unsigned int initial_ix86_tune_fe
|
||
~m_386,
|
||
|
||
/* X86_TUNE_USE_SAHF */
|
||
- m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
|
||
+ m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
|
||
| m_NOCONA | m_CORE2 | m_GENERIC,
|
||
|
||
/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
|
||
partial dependencies. */
|
||
- m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
|
||
+ m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
|
||
| m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
|
||
|
||
/* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
|
||
@@ -1271,13 +1346,13 @@ static unsigned int initial_ix86_tune_fe
|
||
m_386 | m_486 | m_K6_GEODE,
|
||
|
||
/* X86_TUNE_USE_SIMODE_FIOP */
|
||
- ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
|
||
+ ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
|
||
|
||
/* X86_TUNE_USE_MOV0 */
|
||
m_K6,
|
||
|
||
/* X86_TUNE_USE_CLTD */
|
||
- ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
|
||
+ ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
|
||
|
||
/* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
|
||
m_PENT4,
|
||
@@ -1292,8 +1367,8 @@ static unsigned int initial_ix86_tune_fe
|
||
~(m_PENT | m_PPRO),
|
||
|
||
/* X86_TUNE_PROMOTE_QIMODE */
|
||
- m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
|
||
- | m_GENERIC /* | m_PENT4 ? */,
|
||
+ m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
|
||
+ | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
|
||
|
||
/* X86_TUNE_FAST_PREFIX */
|
||
~(m_PENT | m_486 | m_386),
|
||
@@ -1317,26 +1392,28 @@ static unsigned int initial_ix86_tune_fe
|
||
m_PPRO,
|
||
|
||
/* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
|
||
- m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
||
+ m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
|
||
+ | m_CORE2 | m_GENERIC,
|
||
|
||
/* X86_TUNE_ADD_ESP_8 */
|
||
- m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
|
||
+ m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
|
||
| m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
||
|
||
/* X86_TUNE_SUB_ESP_4 */
|
||
- m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
||
+ m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
|
||
+ | m_GENERIC,
|
||
|
||
/* X86_TUNE_SUB_ESP_8 */
|
||
- m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
|
||
+ m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
|
||
| m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
||
|
||
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
|
||
for DFmode copies */
|
||
- ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
|
||
+ ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
|
||
| m_GENERIC | m_GEODE),
|
||
|
||
/* X86_TUNE_PARTIAL_REG_DEPENDENCY */
|
||
- m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
||
+ m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
||
|
||
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
|
||
conflict here in between PPro/Pentium4 based chips that thread 128bit
|
||
@@ -1347,7 +1424,8 @@ static unsigned int initial_ix86_tune_fe
|
||
shows that disabling this option on P4 brings over 20% SPECfp regression,
|
||
while enabling it on K8 brings roughly 2.4% regression that can be partly
|
||
masked by careful scheduling of moves. */
|
||
- m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
|
||
+ m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
|
||
+ | m_AMDFAM10,
|
||
|
||
/* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
|
||
m_AMDFAM10,
|
||
@@ -1365,13 +1443,13 @@ static unsigned int initial_ix86_tune_fe
|
||
m_PPRO | m_PENT4 | m_NOCONA,
|
||
|
||
/* X86_TUNE_MEMORY_MISMATCH_STALL */
|
||
- m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
||
+ m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
||
|
||
/* X86_TUNE_PROLOGUE_USING_MOVE */
|
||
- m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
|
||
+ m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
|
||
|
||
/* X86_TUNE_EPILOGUE_USING_MOVE */
|
||
- m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
|
||
+ m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
|
||
|
||
/* X86_TUNE_SHIFT1 */
|
||
~m_486,
|
||
@@ -1380,29 +1458,32 @@ static unsigned int initial_ix86_tune_fe
|
||
m_AMD_MULTIPLE,
|
||
|
||
/* X86_TUNE_INTER_UNIT_MOVES */
|
||
- ~(m_AMD_MULTIPLE | m_GENERIC),
|
||
+ ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
|
||
|
||
/* X86_TUNE_INTER_UNIT_CONVERSIONS */
|
||
~(m_AMDFAM10),
|
||
|
||
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
|
||
than 4 branch instructions in the 16 byte window. */
|
||
- m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
||
+ m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
|
||
+ | m_GENERIC,
|
||
|
||
/* X86_TUNE_SCHEDULE */
|
||
- m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
|
||
+ m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
|
||
+ | m_GENERIC,
|
||
|
||
/* X86_TUNE_USE_BT */
|
||
- m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
|
||
+ m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
|
||
|
||
/* X86_TUNE_USE_INCDEC */
|
||
- ~(m_PENT4 | m_NOCONA | m_GENERIC),
|
||
+ ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
|
||
|
||
/* X86_TUNE_PAD_RETURNS */
|
||
m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
|
||
|
||
/* X86_TUNE_EXT_80387_CONSTANTS */
|
||
- m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
|
||
+ m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
|
||
+ | m_CORE2 | m_GENERIC,
|
||
|
||
/* X86_TUNE_SHORTEN_X87_SSE */
|
||
~m_K8,
|
||
@@ -1447,6 +1528,10 @@ static unsigned int initial_ix86_tune_fe
|
||
with a subsequent conditional jump instruction into a single
|
||
compare-and-branch uop. */
|
||
m_CORE2,
|
||
+
|
||
+ /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
|
||
+ will impact LEA instruction selection. */
|
||
+ m_ATOM,
|
||
};
|
||
|
||
/* Feature tests against the various architecture variations. */
|
||
@@ -1472,10 +1557,11 @@ static unsigned int initial_ix86_arch_fe
|
||
};
|
||
|
||
static const unsigned int x86_accumulate_outgoing_args
|
||
- = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
|
||
+ = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
|
||
+ | m_GENERIC;
|
||
|
||
static const unsigned int x86_arch_always_fancy_math_387
|
||
- = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
|
||
+ = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
|
||
| m_NOCONA | m_CORE2 | m_GENERIC;
|
||
|
||
static enum stringop_alg stringop_alg = no_stringop;
|
||
@@ -1953,7 +2039,8 @@ static const struct ptt processor_target
|
||
{&core2_cost, 16, 10, 16, 10, 16},
|
||
{&generic32_cost, 16, 7, 16, 7, 16},
|
||
{&generic64_cost, 16, 10, 16, 10, 16},
|
||
- {&amdfam10_cost, 32, 24, 32, 7, 32}
|
||
+ {&amdfam10_cost, 32, 24, 32, 7, 32},
|
||
+ {&atom_cost, 16, 7, 16, 7, 16}
|
||
};
|
||
|
||
static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
|
||
@@ -1971,6 +2058,7 @@ static const char *const cpu_names[TARGE
|
||
"prescott",
|
||
"nocona",
|
||
"core2",
|
||
+ "atom",
|
||
"geode",
|
||
"k6",
|
||
"k6-2",
|
||
@@ -2529,6 +2617,9 @@ override_options (bool main_args_p)
|
||
{"core2", PROCESSOR_CORE2, CPU_CORE2,
|
||
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
|
||
| PTA_SSSE3 | PTA_CX16},
|
||
+ {"atom", PROCESSOR_ATOM, CPU_ATOM,
|
||
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
|
||
+ | PTA_SSSE3 | PTA_CX16},
|
||
{"geode", PROCESSOR_GEODE, CPU_GEODE,
|
||
PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
|
||
{"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
|
||
@@ -12903,6 +12994,263 @@ ix86_expand_unary_operator (enum rtx_cod
|
||
emit_move_insn (operands[0], dst);
|
||
}
|
||
|
||
+#define LEA_SEARCH_THRESHOLD 12
|
||
+
|
||
+/* Reach non-agu definition of op1 and op2 in insn's basic block.
|
||
+ Search backward until 1. passed LEA_SEARCH_THRESHOLD instructions,
|
||
+ or 2. reach BB boundary, or reach agu definition.
|
||
+ Returns the distance between the non-agu definition point and insn.
|
||
+ If no definition point, returns -1
|
||
+ TODO: Currently we have no way to distinguish if definition insn is a LEA.
|
||
+ We just assume all definitions are non-lea. */
|
||
+static int
|
||
+distance_non_agu_define (rtx op1, rtx op2, rtx insn)
|
||
+{
|
||
+ rtx reg_op1 = REG_P (op1) ? op1 : NULL;
|
||
+ rtx reg_op2 = REG_P (op2) ? op2 : NULL;
|
||
+ basic_block bb = BLOCK_FOR_INSN (insn);
|
||
+ int distance = 0;
|
||
+
|
||
+ if (insn != BB_HEAD (bb))
|
||
+ {
|
||
+
|
||
+ rtx prev = PREV_INSN (insn);
|
||
+ while (prev && distance < LEA_SEARCH_THRESHOLD)
|
||
+ {
|
||
+ if (INSN_P (prev))
|
||
+ {
|
||
+ distance++;
|
||
+ if ((reg_op1 && reg_set_p (reg_op1, prev))
|
||
+ || (reg_op2 && reg_set_p (reg_op2, prev)))
|
||
+ return distance ;
|
||
+ }
|
||
+ if (prev == BB_HEAD (bb))
|
||
+ break;
|
||
+ prev = PREV_INSN (prev);
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (distance < LEA_SEARCH_THRESHOLD)
|
||
+ {
|
||
+ edge e;
|
||
+ edge_iterator ei;
|
||
+ bool simple_loop = false;
|
||
+
|
||
+ FOR_EACH_EDGE (e, ei, bb->preds)
|
||
+ if (e->src == bb)
|
||
+ {
|
||
+ simple_loop = true;
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ if (simple_loop)
|
||
+ {
|
||
+ rtx prev = BB_END (bb);
|
||
+ while (prev
|
||
+ && prev != insn
|
||
+ && distance < LEA_SEARCH_THRESHOLD)
|
||
+ {
|
||
+ if (INSN_P (prev))
|
||
+ {
|
||
+ distance++;
|
||
+ if ((reg_op1 && reg_set_p (reg_op1, prev))
|
||
+ || (reg_op2 && reg_set_p (reg_op2, prev)))
|
||
+ return distance;
|
||
+ }
|
||
+ prev = PREV_INSN (prev);
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ return -1;
|
||
+}
|
||
+
|
||
+/* Return the distance between this insn and the next insn that uses
|
||
+ result of this insn as memory address.
|
||
+ Return -1 if not found such a use within LEA_SEARCH_THRESHOLD. */
|
||
+static int
|
||
+distance_agu_use (rtx op0, rtx insn)
|
||
+{
|
||
+ basic_block bb = BLOCK_FOR_INSN (insn);
|
||
+ int distance = 0;
|
||
+
|
||
+ if (insn != BB_END(bb))
|
||
+ {
|
||
+ rtx next = NEXT_INSN (insn);
|
||
+
|
||
+ while (next && distance < LEA_SEARCH_THRESHOLD)
|
||
+ {
|
||
+ if (INSN_P (next))
|
||
+ {
|
||
+ distance++;
|
||
+ if (reg_mentioned_by_mem_p (op0, next))
|
||
+ return distance;
|
||
+ if (reg_set_p (op0, next))
|
||
+ return -1;
|
||
+ }
|
||
+ if (next == BB_END (bb))
|
||
+ break;
|
||
+ next = NEXT_INSN (next);
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (distance < LEA_SEARCH_THRESHOLD)
|
||
+ {
|
||
+ edge e;
|
||
+ edge_iterator ei;
|
||
+ bool simple_loop = false;
|
||
+
|
||
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||
+ if (e->dest == bb)
|
||
+ {
|
||
+ simple_loop = true;
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ if (simple_loop)
|
||
+ {
|
||
+ rtx next = BB_HEAD (bb);
|
||
+ while (next && distance < LEA_SEARCH_THRESHOLD)
|
||
+ {
|
||
+ if (next == insn)
|
||
+ break;
|
||
+ if (INSN_P (next))
|
||
+ {
|
||
+ distance++;
|
||
+ if (reg_mentioned_by_mem_p (op0, next))
|
||
+ return distance;
|
||
+ if (reg_set_p (op0, next))
|
||
+ return -1;
|
||
+ }
|
||
+ next = NEXT_INSN (next);
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ return -1;
|
||
+}
|
||
+
|
||
+/* Define this macro to tune LEA priority vs ADD, it take effect when
|
||
+ there is a dilemma of choicing LEA or ADD
|
||
+ Negative value: ADD is more preferred than LEA
|
||
+ Zero: Netrual
|
||
+ Positive value: LEA is more preferred than ADD*/
|
||
+#define IX86_LEA_PRIORITY 2
|
||
+
|
||
+/* Return true if it is ok to optimize an ADD operation to LEA
|
||
+ operation to avoid flag register consumation. For the processors
|
||
+ like ATOM, if the destination register of LEA holds an actual
|
||
+ address which will be used soon, LEA is better and otherwise ADD
|
||
+ is better. */
|
||
+
|
||
+bool
|
||
+ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
|
||
+ rtx insn,
|
||
+ rtx operands[])
|
||
+{
|
||
+ gcc_assert (REG_P (operands[0]));
|
||
+ gcc_assert (operands[1] && operands[2]);
|
||
+
|
||
+ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
|
||
+ {
|
||
+ if (true_regnum (operands[0]) != true_regnum (operands[1]))
|
||
+ return true;
|
||
+ else
|
||
+ return false;
|
||
+ }
|
||
+
|
||
+ /* If a = b + c, (a!=b && a!=c), must use lea form. */
|
||
+ if (true_regnum (operands[0]) != true_regnum (operands[1])
|
||
+ && true_regnum (operands[0]) != true_regnum (operands[2]))
|
||
+ return true;
|
||
+ else
|
||
+ {
|
||
+ int dist_define, dist_use;
|
||
+ dist_define = distance_non_agu_define (operands[1],
|
||
+ operands[2], insn);
|
||
+ if (dist_define <= 0)
|
||
+ return true;
|
||
+
|
||
+ /* If this insn has both backward non-agu dependence and forward
|
||
+ agu dependence, the one with short distance take effect. */
|
||
+ dist_use = distance_agu_use (operands[0], insn);
|
||
+ if (dist_use <= 0
|
||
+ || (dist_define + IX86_LEA_PRIORITY) < dist_use)
|
||
+ return false;
|
||
+
|
||
+ return true;
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Return true if destination reg of SET_INSN is shift count of
|
||
+ USE_INSN. */
|
||
+
|
||
+bool
|
||
+ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
|
||
+{
|
||
+ rtx set_pattern = PATTERN (set_insn);
|
||
+ rtx set_dest;
|
||
+ rtx shift_rtx;
|
||
+ rtx use_pattern;
|
||
+
|
||
+ /* Retrieve destination of set_insn */
|
||
+ switch (GET_CODE (set_pattern))
|
||
+ {
|
||
+ case SET:
|
||
+ set_dest = SET_DEST (set_pattern);
|
||
+ break;
|
||
+ case PARALLEL:
|
||
+ set_pattern = XVECEXP (set_pattern, 0, 0);
|
||
+ if (GET_CODE (set_pattern ) == SET)
|
||
+ {
|
||
+ set_dest = SET_DEST (set_pattern);
|
||
+ break;
|
||
+ }
|
||
+ default:
|
||
+ set_dest = NULL;
|
||
+ break;
|
||
+ }
|
||
+ if (!set_dest || !REG_P (set_dest))
|
||
+ return false;
|
||
+
|
||
+ /* Retrieve shift count of use_insn */
|
||
+ use_pattern = PATTERN (use_insn);
|
||
+ switch (GET_CODE (use_pattern))
|
||
+ {
|
||
+ case SET:
|
||
+ shift_rtx = XEXP (use_pattern, 1);
|
||
+ break;
|
||
+ case PARALLEL:
|
||
+ set_pattern = XVECEXP (use_pattern, 0, 0);
|
||
+ if (GET_CODE (set_pattern) == SET)
|
||
+ {
|
||
+ shift_rtx = XEXP (set_pattern, 1);
|
||
+ break;
|
||
+ }
|
||
+ default:
|
||
+ shift_rtx = NULL;
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ if (shift_rtx
|
||
+ && (GET_CODE (shift_rtx) == ASHIFT
|
||
+ || GET_CODE (shift_rtx) == LSHIFTRT
|
||
+ || GET_CODE (shift_rtx) == ASHIFTRT
|
||
+ || GET_CODE (shift_rtx) == ROTATE
|
||
+ || GET_CODE (shift_rtx) == ROTATERT))
|
||
+ {
|
||
+ rtx shift_count = XEXP (shift_rtx, 1);
|
||
+ gcc_assert (shift_count);
|
||
+
|
||
+ /* Return true if shift count is dest of set_insn */
|
||
+ if (REG_P (shift_count)
|
||
+ && true_regnum (set_dest) == true_regnum (shift_count))
|
||
+ return true;
|
||
+ }
|
||
+
|
||
+ return false;
|
||
+}
|
||
+
|
||
/* Return TRUE or FALSE depending on whether the unary operator meets the
|
||
appropriate constraints. */
|
||
|
||
@@ -19022,6 +19370,7 @@ ix86_issue_rate (void)
|
||
switch (ix86_tune)
|
||
{
|
||
case PROCESSOR_PENTIUM:
|
||
+ case PROCESSOR_ATOM:
|
||
case PROCESSOR_K6:
|
||
return 2;
|
||
|
||
@@ -19088,41 +19437,21 @@ ix86_flags_dependent (rtx insn, rtx dep_
|
||
return 1;
|
||
}
|
||
|
||
-/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
|
||
- address with operands set by DEP_INSN. */
|
||
+/* Return true iff USE_INSN has a memory address with operands set by
|
||
+ SET_INSN. */
|
||
|
||
-static int
|
||
-ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
|
||
+bool
|
||
+ix86_agi_dependent (rtx set_insn, rtx use_insn)
|
||
{
|
||
- rtx addr;
|
||
-
|
||
- if (insn_type == TYPE_LEA
|
||
- && TARGET_PENTIUM)
|
||
- {
|
||
- addr = PATTERN (insn);
|
||
-
|
||
- if (GET_CODE (addr) == PARALLEL)
|
||
- addr = XVECEXP (addr, 0, 0);
|
||
-
|
||
- gcc_assert (GET_CODE (addr) == SET);
|
||
-
|
||
- addr = SET_SRC (addr);
|
||
- }
|
||
- else
|
||
- {
|
||
- int i;
|
||
- extract_insn_cached (insn);
|
||
- for (i = recog_data.n_operands - 1; i >= 0; --i)
|
||
- if (MEM_P (recog_data.operand[i]))
|
||
- {
|
||
- addr = XEXP (recog_data.operand[i], 0);
|
||
- goto found;
|
||
- }
|
||
- return 0;
|
||
- found:;
|
||
- }
|
||
-
|
||
- return modified_in_p (addr, dep_insn);
|
||
+ int i;
|
||
+ extract_insn_cached (use_insn);
|
||
+ for (i = recog_data.n_operands - 1; i >= 0; --i)
|
||
+ if (MEM_P (recog_data.operand[i]))
|
||
+ {
|
||
+ rtx addr = XEXP (recog_data.operand[i], 0);
|
||
+ return modified_in_p (addr, set_insn) != 0;
|
||
+ }
|
||
+ return false;
|
||
}
|
||
|
||
static int
|
||
@@ -19150,8 +19479,19 @@ ix86_adjust_cost (rtx insn, rtx link, rt
|
||
{
|
||
case PROCESSOR_PENTIUM:
|
||
/* Address Generation Interlock adds a cycle of latency. */
|
||
- if (ix86_agi_dependent (insn, dep_insn, insn_type))
|
||
- cost += 1;
|
||
+ if (insn_type == TYPE_LEA)
|
||
+ {
|
||
+ rtx addr = PATTERN (insn);
|
||
+
|
||
+ if (GET_CODE (addr) == PARALLEL)
|
||
+ addr = XVECEXP (addr, 0, 0);
|
||
+
|
||
+ gcc_assert (GET_CODE (addr) == SET);
|
||
+
|
||
+ addr = SET_SRC (addr);
|
||
+ if (modified_in_p (addr, dep_insn))
|
||
+ cost += 1;
|
||
+ }
|
||
|
||
/* ??? Compares pair with jump/setcc. */
|
||
if (ix86_flags_dependent (insn, dep_insn, insn_type))
|
||
@@ -19160,7 +19500,7 @@ ix86_adjust_cost (rtx insn, rtx link, rt
|
||
/* Floating point stores require value to be ready one cycle earlier. */
|
||
if (insn_type == TYPE_FMOV
|
||
&& get_attr_memory (insn) == MEMORY_STORE
|
||
- && !ix86_agi_dependent (insn, dep_insn, insn_type))
|
||
+ && !ix86_agi_dependent (dep_insn, insn))
|
||
cost += 1;
|
||
break;
|
||
|
||
@@ -19183,7 +19523,7 @@ ix86_adjust_cost (rtx insn, rtx link, rt
|
||
in parallel with previous instruction in case
|
||
previous instruction is not needed to compute the address. */
|
||
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
|
||
- && !ix86_agi_dependent (insn, dep_insn, insn_type))
|
||
+ && !ix86_agi_dependent (dep_insn, insn))
|
||
{
|
||
/* Claim moves to take one cycle, as core can issue one load
|
||
at time and the next load can start cycle later. */
|
||
@@ -19212,7 +19552,7 @@ ix86_adjust_cost (rtx insn, rtx link, rt
|
||
in parallel with previous instruction in case
|
||
previous instruction is not needed to compute the address. */
|
||
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
|
||
- && !ix86_agi_dependent (insn, dep_insn, insn_type))
|
||
+ && !ix86_agi_dependent (dep_insn, insn))
|
||
{
|
||
/* Claim moves to take one cycle, as core can issue one load
|
||
at time and the next load can start cycle later. */
|
||
@@ -19229,6 +19569,7 @@ ix86_adjust_cost (rtx insn, rtx link, rt
|
||
case PROCESSOR_ATHLON:
|
||
case PROCESSOR_K8:
|
||
case PROCESSOR_AMDFAM10:
|
||
+ case PROCESSOR_ATOM:
|
||
case PROCESSOR_GENERIC32:
|
||
case PROCESSOR_GENERIC64:
|
||
memory = get_attr_memory (insn);
|
||
@@ -19237,7 +19578,7 @@ ix86_adjust_cost (rtx insn, rtx link, rt
|
||
in parallel with previous instruction in case
|
||
previous instruction is not needed to compute the address. */
|
||
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
|
||
- && !ix86_agi_dependent (insn, dep_insn, insn_type))
|
||
+ && !ix86_agi_dependent (dep_insn, insn))
|
||
{
|
||
enum attr_unit unit = get_attr_unit (insn);
|
||
int loadcost = 3;
|