2015-02-26 Jakub Jelinek PR tree-optimization/65215 * tree-ssa-math-opts.c (find_bswap_or_nop_load): Return false for PDP endian targets. (perform_symbolic_merge, find_bswap_or_nop_1, find_bswap_or_nop): Fix up formatting issues. (bswap_replace): Likewise. For BYTES_BIG_ENDIAN, if the final access size is smaller than the original, adjust MEM_REF offset by the difference of sizes. Use is_gimple_mem_ref_addr instead of is_gimple_min_invariant test to avoid adding address temporaries. * gcc.c-torture/execute/pr65215-1.c: New test. * gcc.c-torture/execute/pr65215-2.c: New test. * gcc.c-torture/execute/pr65215-3.c: New test. * gcc.c-torture/execute/pr65215-4.c: New test. * gcc.c-torture/execute/pr65215-5.c: New test. --- gcc/tree-ssa-math-opts.c.jj 2015-01-28 21:24:56.000000000 +0100 +++ gcc/tree-ssa-math-opts.c 2015-02-26 11:16:01.062024749 +0100 @@ -1780,6 +1780,10 @@ find_bswap_or_nop_load (gimple stmt, tre int unsignedp, volatilep; tree offset, base_addr; + /* Not prepared to handle PDP endian. */ + if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) + return false; + if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt)) return false; @@ -1860,8 +1864,8 @@ perform_symbolic_merge (gimple source_st || !operand_equal_p (n1->base_addr, n2->base_addr, 0)) return NULL; - if (!n1->offset != !n2->offset || - (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0))) + if (!n1->offset != !n2->offset + || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0))) return NULL; if (n1->bytepos < n2->bytepos) @@ -1912,8 +1916,8 @@ perform_symbolic_merge (gimple source_st size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT; for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER) { - unsigned marker = - (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK; + unsigned marker + = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK; if (marker && marker != MARKER_BYTE_UNKNOWN) toinc_n_ptr->n += inc; } @@ -2032,7 +2036,7 @@ find_bswap_or_nop_1 (gimple stmt, struct case RSHIFT_EXPR: case LROTATE_EXPR: case RROTATE_EXPR: - if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2))) + if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2))) return NULL; break; CASE_CONVERT: @@ -2104,12 +2108,12 @@ find_bswap_or_nop_1 (gimple stmt, struct if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type)) return NULL; - if (!n1.vuse != !n2.vuse || - (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0))) + if (!n1.vuse != !n2.vuse + || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0))) return NULL; - source_stmt = - perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n); + source_stmt + = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n); if (!source_stmt) return NULL; @@ -2153,12 +2157,12 @@ find_bswap_or_nop (gimple stmt, struct s in libgcc, and for initial shift/and operation of the src operand. */ limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt))); limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit); - source_stmt = find_bswap_or_nop_1 (stmt, n, limit); + source_stmt = find_bswap_or_nop_1 (stmt, n, limit); if (!source_stmt) return NULL; - /* Find real size of result (highest non zero byte). */ + /* Find real size of result (highest non-zero byte). */ if (n->base_addr) { int rsize; @@ -2261,8 +2265,30 @@ bswap_replace (gimple cur_stmt, gimple s tree load_offset_ptr, aligned_load_type; gimple addr_stmt, load_stmt; unsigned align; + HOST_WIDE_INT load_offset = 0; align = get_object_alignment (src); + /* If the new access is smaller than the original one, we need + to perform big endian adjustment. */ + if (BYTES_BIG_ENDIAN) + { + HOST_WIDE_INT bitsize, bitpos; + machine_mode mode; + int unsignedp, volatilep; + tree offset; + + get_inner_reference (src, &bitsize, &bitpos, &offset, &mode, + &unsignedp, &volatilep, false); + if (n->range < (unsigned HOST_WIDE_INT) bitsize) + { + load_offset = (bitsize - n->range) / BITS_PER_UNIT; + unsigned HOST_WIDE_INT l + = (load_offset * BITS_PER_UNIT) & (align - 1); + if (l) + align = l & -l; + } + } + if (bswap && align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type)) && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align)) @@ -2274,10 +2300,10 @@ bswap_replace (gimple cur_stmt, gimple s gsi_move_before (&gsi, &gsi_ins); gsi = gsi_for_stmt (cur_stmt); - /* Compute address to load from and cast according to the size - of the load. */ + /* Compute address to load from and cast according to the size + of the load. */ addr_expr = build_fold_addr_expr (unshare_expr (src)); - if (is_gimple_min_invariant (addr_expr)) + if (is_gimple_mem_ref_addr (addr_expr)) addr_tmp = addr_expr; else { @@ -2291,7 +2317,7 @@ bswap_replace (gimple cur_stmt, gimple s aligned_load_type = load_type; if (align < TYPE_ALIGN (load_type)) aligned_load_type = build_aligned_type (load_type, align); - load_offset_ptr = build_int_cst (n->alias_set, 0); + load_offset_ptr = build_int_cst (n->alias_set, load_offset); val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp, load_offset_ptr); @@ -2328,7 +2354,7 @@ bswap_replace (gimple cur_stmt, gimple s { fprintf (dump_file, "%d bit load in target endianness found at: ", - (int)n->range); + (int) n->range); print_gimple_stmt (dump_file, cur_stmt, 0, 0); } return true; @@ -2395,7 +2421,7 @@ bswap_replace (gimple cur_stmt, gimple s if (dump_file) { fprintf (dump_file, "%d bit bswap implementation found at: ", - (int)n->range); + (int) n->range); print_gimple_stmt (dump_file, cur_stmt, 0, 0); } --- gcc/testsuite/gcc.c-torture/execute/pr65215-1.c.jj 2015-02-26 10:46:29.102441519 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr65215-1.c 2015-02-26 10:44:39.000000000 +0100 @@ -0,0 +1,24 @@ +/* PR tree-optimization/65215 */ + +static inline unsigned int +foo (unsigned int x) +{ + return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24); +} + +__attribute__((noinline, noclone)) unsigned int +bar (unsigned long long *x) +{ + return foo (*x); +} + +int +main () +{ + if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof (unsigned long long) != 8) + return 0; + unsigned long long l = foo (0xdeadbeefU) | 0xfeedbea800000000ULL; + if (bar (&l) != 0xdeadbeefU) + __builtin_abort (); + return 0; +} --- gcc/testsuite/gcc.c-torture/execute/pr65215-2.c.jj 2015-02-26 10:46:31.524401403 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr65215-2.c 2015-02-26 10:45:15.000000000 +0100 @@ -0,0 +1,24 @@ +/* PR tree-optimization/65215 */ + +static inline unsigned int +foo (unsigned int x) +{ + return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24); +} + +__attribute__((noinline, noclone)) unsigned long long +bar (unsigned long long *x) +{ + return ((unsigned long long) foo (*x) << 32) | foo (*x >> 32); +} + +int +main () +{ + if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof (unsigned long long) != 8) + return 0; + unsigned long long l = foo (0xfeedbea8U) | ((unsigned long long) foo (0xdeadbeefU) << 32); + if (bar (&l) != 0xfeedbea8deadbeefULL) + __builtin_abort (); + return 0; +} --- gcc/testsuite/gcc.c-torture/execute/pr65215-3.c.jj 2015-02-26 10:46:33.463369288 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr65215-3.c 2015-02-26 10:45:37.000000000 +0100 @@ -0,0 +1,31 @@ +/* PR tree-optimization/65215 */ + +struct S { unsigned long long l1 : 24, l2 : 8, l3 : 32; }; + +static inline unsigned int +foo (unsigned int x) +{ + return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24); +} + +__attribute__((noinline, noclone)) unsigned long long +bar (struct S *x) +{ + unsigned long long x1 = foo (((unsigned int) x->l1 << 8) | x->l2); + unsigned long long x2 = foo (x->l3); + return (x2 << 32) | x1; +} + +int +main () +{ + if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof (unsigned long long) != 8) + return 0; + struct S s = { 0xdeadbeU, 0xefU, 0xfeedbea8U }; + unsigned long long l = bar (&s); + if (foo (l >> 32) != s.l3 + || (foo (l) >> 8) != s.l1 + || (foo (l) & 0xff) != s.l2) + __builtin_abort (); + return 0; +} --- gcc/testsuite/gcc.c-torture/execute/pr65215-4.c.jj 2015-02-26 10:46:35.438336576 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr65215-4.c 2015-02-26 10:45:46.000000000 +0100 @@ -0,0 +1,27 @@ +/* PR tree-optimization/65215 */ + +struct S { unsigned long long l1 : 48; }; + +static inline unsigned int +foo (unsigned int x) +{ + return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24); +} + +__attribute__((noinline, noclone)) unsigned int +bar (struct S *x) +{ + return foo (x->l1); +} + +int +main () +{ + if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4 || sizeof (unsigned long long) != 8) + return 0; + struct S s; + s.l1 = foo (0xdeadbeefU) | (0xfeedULL << 32); + if (bar (&s) != 0xdeadbeefU) + __builtin_abort (); + return 0; +} --- gcc/testsuite/gcc.c-torture/execute/pr65215-5.c.jj 2015-02-26 11:14:44.664298719 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr65215-5.c 2015-02-26 11:12:27.000000000 +0100 @@ -0,0 +1,27 @@ +/* PR tree-optimization/65215 */ + +__attribute__((noinline, noclone)) unsigned int +foo (unsigned char *p) +{ + return ((unsigned int) p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]; +} + +__attribute__((noinline, noclone)) unsigned int +bar (unsigned char *p) +{ + return ((unsigned int) p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0]; +} + +struct S { unsigned int a; unsigned char b[5]; }; + +int +main () +{ + struct S s = { 1, { 2, 3, 4, 5, 6 } }; + if (__CHAR_BIT__ != 8 || sizeof (unsigned int) != 4) + return 0; + if (foo (&s.b[1]) != 0x03040506U + || bar (&s.b[1]) != 0x06050403U) + __builtin_abort (); + return 0; +}