diff --git a/binutils-gas-multibyte-warnings.patch b/binutils-gas-multibyte-warnings.patch new file mode 100644 index 0000000..d19cd7b --- /dev/null +++ b/binutils-gas-multibyte-warnings.patch @@ -0,0 +1,384 @@ +diff -rupN binutils.orig/gas/NEWS binutils-2.37/gas/NEWS +--- binutils.orig/gas/NEWS 2021-11-18 16:50:39.104088534 +0000 ++++ binutils-2.37/gas/NEWS 2021-11-18 16:51:16.340948280 +0000 +@@ -1,5 +1,13 @@ + -*- text -*- + ++* The --multibyte-handling=[allow|warn|warn-sym-only] option tells the ++ assembler what to when it encoutners multibyte characters in the input. The ++ default is to allow them. Setting the option to "warn" will generate a ++ warning message whenever any multibyte character is encountered. Using the ++ option to "warn-sym-only" will make the assembler generate a warning whenever a ++ symbol is defined containing multibyte characters. (References to undefined ++ symbols will not generate warnings). ++ + Changes in 2.37: + + * arm-symbianelf support removed. +diff -rupN binutils.orig/gas/app.c binutils-2.37/gas/app.c +--- binutils.orig/gas/app.c 2021-11-18 16:50:39.104088534 +0000 ++++ binutils-2.37/gas/app.c 2021-11-18 16:50:42.530075630 +0000 +@@ -345,6 +345,55 @@ process_escape (int ch) + } + } + ++#define MULTIBYTE_WARN_COUNT_LIMIT 10 ++static unsigned int multibyte_warn_count = 0; ++ ++bool ++scan_for_multibyte_characters (const unsigned char * start, ++ const unsigned char * end, ++ bool warn) ++{ ++ if (end <= start) ++ return false; ++ ++ if (warn && multibyte_warn_count > MULTIBYTE_WARN_COUNT_LIMIT) ++ return false; ++ ++ bool found = false; ++ ++ while (start < end) ++ { ++ unsigned char c; ++ ++ if ((c = * start++) <= 0x7f) ++ continue; ++ ++ if (!warn) ++ return true; ++ ++ found = true; ++ ++ const char * filename; ++ unsigned int lineno; ++ ++ filename = as_where (& lineno); ++ if (filename == NULL) ++ as_warn (_("multibyte character (%#x) encountered in input"), c); ++ else if (lineno == 0) ++ as_warn (_("multibyte character (%#x) encountered in %s"), c, filename); ++ else ++ as_warn (_("multibyte character (%#x) encountered in %s at or near line %u"), c, filename, lineno); ++ ++ if (++ multibyte_warn_count == MULTIBYTE_WARN_COUNT_LIMIT) ++ { ++ as_warn (_("further multibyte character warnings suppressed")); ++ break; ++ } ++ } ++ ++ return found; ++} ++ + /* This function is called to process input characters. The GET + parameter is used to retrieve more input characters. GET should + set its parameter to point to a buffer, and return the length of +@@ -463,6 +512,11 @@ do_scrub_chars (size_t (*get) (char *, s + return 0; + from = input_buffer; + fromend = from + fromlen; ++ ++ if (multibyte_handling == multibyte_warn) ++ (void) scan_for_multibyte_characters ((const unsigned char *) from, ++ (const unsigned char* ) fromend, ++ true /* Generate warnings. */); + } + + while (1) +diff -rupN binutils.orig/gas/as.c binutils-2.37/gas/as.c +--- binutils.orig/gas/as.c 2021-11-18 16:50:39.104088534 +0000 ++++ binutils-2.37/gas/as.c 2021-11-18 16:50:42.531075627 +0000 +@@ -474,7 +474,7 @@ parse_args (int * pargc, char *** pargv) + OPTION_DEBUG_PREFIX_MAP, + OPTION_DEFSYM, + OPTION_LISTING_LHS_WIDTH, +- OPTION_LISTING_LHS_WIDTH2, ++ OPTION_LISTING_LHS_WIDTH2, /* = STD_BASE + 10 */ + OPTION_LISTING_RHS_WIDTH, + OPTION_LISTING_CONT_LINES, + OPTION_DEPFILE, +@@ -484,7 +484,7 @@ parse_args (int * pargc, char *** pargv) + OPTION_GDWARF_3, + OPTION_GDWARF_4, + OPTION_GDWARF_5, +- OPTION_GDWARF_SECTIONS, ++ OPTION_GDWARF_SECTIONS, /* = STD_BASE + 20 */ + OPTION_GDWARF_CIE_VERSION, + OPTION_STRIP_LOCAL_ABSOLUTE, + OPTION_TRADITIONAL_FORMAT, +@@ -494,7 +494,7 @@ parse_args (int * pargc, char *** pargv) + OPTION_NOEXECSTACK, + OPTION_SIZE_CHECK, + OPTION_ELF_STT_COMMON, +- OPTION_ELF_BUILD_NOTES, ++ OPTION_ELF_BUILD_NOTES, /* = STD_BASE + 30 */ + OPTION_SECTNAME_SUBST, + OPTION_ALTERNATE, + OPTION_AL, +@@ -503,7 +503,8 @@ parse_args (int * pargc, char *** pargv) + OPTION_WARN_FATAL, + OPTION_COMPRESS_DEBUG, + OPTION_NOCOMPRESS_DEBUG, +- OPTION_NO_PAD_SECTIONS /* = STD_BASE + 40 */ ++ OPTION_NO_PAD_SECTIONS, ++ OPTION_MULTIBYTE_HANDLING /* = STD_BASE + 40 */ + /* When you add options here, check that they do + not collide with OPTION_MD_BASE. See as.h. */ + }; +@@ -581,6 +582,7 @@ parse_args (int * pargc, char *** pargv) + ,{"target-help", no_argument, NULL, OPTION_TARGET_HELP} + ,{"traditional-format", no_argument, NULL, OPTION_TRADITIONAL_FORMAT} + ,{"warn", no_argument, NULL, OPTION_WARN} ++ ,{"multibyte-handling", required_argument, NULL, OPTION_MULTIBYTE_HANDLING} + }; + + /* Construct the option lists from the standard list and the target +@@ -683,6 +685,19 @@ parse_args (int * pargc, char *** pargv) + flag_traditional_format = 1; + break; + ++ case OPTION_MULTIBYTE_HANDLING: ++ if (strcmp (optarg, "allow") == 0) ++ multibyte_handling = multibyte_allow; ++ else if (strcmp (optarg, "warn") == 0) ++ multibyte_handling = multibyte_warn; ++ else if (strcmp (optarg, "warn-sym-only") == 0) ++ multibyte_handling = multibyte_warn_syms; ++ else if (strcmp (optarg, "warn_sym_only") == 0) ++ multibyte_handling = multibyte_warn_syms; ++ else ++ as_fatal (_("unexpected argument to --multibyte-input-option: '%s'"), optarg); ++ break; ++ + case OPTION_VERSION: + /* This output is intended to follow the GNU standards document. */ + printf (_("GNU assembler %s\n"), BFD_VERSION_STRING); +diff -rupN binutils.orig/gas/as.h binutils-2.37/gas/as.h +--- binutils.orig/gas/as.h 2021-11-18 16:50:38.834089551 +0000 ++++ binutils-2.37/gas/as.h 2021-11-18 16:50:42.531075627 +0000 +@@ -344,6 +344,14 @@ COMMON int linkrelax; + + COMMON int do_not_pad_sections_to_alignment; + ++enum multibyte_input_handling ++{ ++ multibyte_allow = 0, ++ multibyte_warn, ++ multibyte_warn_syms ++}; ++COMMON enum multibyte_input_handling multibyte_handling; ++ + /* TRUE if we should produce a listing. */ + extern int listing; + +@@ -450,6 +458,7 @@ void input_scrub_insert_file (char *); + char * input_scrub_new_file (const char *); + char * input_scrub_next_buffer (char **bufp); + size_t do_scrub_chars (size_t (*get) (char *, size_t), char *, size_t); ++bool scan_for_multibyte_characters (const unsigned char *, const unsigned char *, bool); + int gen_to_words (LITTLENUM_TYPE *, int, long); + int had_err (void); + int ignore_input (void); +diff -rupN binutils.orig/gas/doc/as.texi binutils-2.37/gas/doc/as.texi +--- binutils.orig/gas/doc/as.texi 2021-11-18 16:50:38.838089536 +0000 ++++ binutils-2.37/gas/doc/as.texi 2021-11-18 16:50:42.535075612 +0000 +@@ -245,6 +245,7 @@ gcc(1), ld(1), and the Info entries for + [@b{--sectname-subst}] [@b{--size-check=[error|warning]}] + [@b{--elf-stt-common=[no|yes]}] + [@b{--generate-missing-build-notes=[no|yes]}] ++ [@b{--multibyte-handling=[allow|warn|warn-sym-only]}] + [@b{--target-help}] [@var{target-options}] + [@b{--}|@var{files} @dots{}] + @c +@@ -866,6 +867,18 @@ Set the maximum width of an input source + Set the maximum number of lines printed in a listing for a single line of input + to @var{number} + 1. + ++@item --multibyte-handling=allow ++@itemx --multibyte-handling=warn ++@itemx --multibyte-handling=warn-sym-only ++Controls how the assembler handles multibyte characters in the input. The ++default (which can be restored by using the @option{allow} argument) is to ++allow such characters without complaint. Using the @option{warn} argument will ++make the assembler generate a warning message whenever any multibyte character ++is encountered. Using the @option{warn-sym-only} argument will only cause a ++warning to be generated when a symbol is defined with a name that contains ++multibyte characters. (References to undefined symbols will not generate a ++warning). ++ + @item --no-pad-sections + Stop the assembler for padding the ends of output sections to the alignment + of that section. The default is to pad the sections, but this can waste space +@@ -2942,9 +2955,11 @@ are noted in @ref{Machine Dependencies}. + @end ifset + No symbol may begin with a digit. Case is significant. + There is no length limit; all characters are significant. Multibyte characters +-are supported. Symbols are delimited by characters not in that set, or by the +-beginning of a file (since the source program must end with a newline, the end +-of a file is not a possible symbol delimiter). @xref{Symbols}. ++are supported, but note that the setting of the ++@option{--multibyte-handling} option might prevent their use. Symbols ++are delimited by characters not in that set, or by the beginning of a file ++(since the source program must end with a newline, the end of a file is not a ++possible symbol delimiter). @xref{Symbols}. + + Symbol names may also be enclosed in double quote @code{"} characters. In such + cases any characters are allowed, except for the NUL character. If a double +@@ -3834,11 +3849,18 @@ than @code{Foo}. + Symbol names do not start with a digit. An exception to this rule is made for + Local Labels. See below. + +-Multibyte characters are supported. To generate a symbol name containing ++Multibyte characters are supported, but note that the setting of the ++@option{multibyte-handling} option might prevent their use. ++To generate a symbol name containing + multibyte characters enclose it within double quotes and use escape codes. cf + @xref{Strings}. Generating a multibyte symbol name from a label is not + currently supported. + ++Since multibyte symbol names are unusual, and could possibly be used ++maliciously, @command{@value{AS}} provides a command line option ++(@option{--multibyte-handling=warn-sym-only}) which can be used to generate a ++warning message whenever a symbol name containing multibyte characters is defined. ++ + Each symbol has exactly one name. Each name in an assembly language program + refers to exactly one symbol. You may use that symbol name any number of times + in a program. +diff -rupN binutils.orig/gas/input-scrub.c binutils-2.37/gas/input-scrub.c +--- binutils.orig/gas/input-scrub.c 2021-11-18 16:50:38.835089547 +0000 ++++ binutils-2.37/gas/input-scrub.c 2021-11-18 16:50:42.535075612 +0000 +@@ -377,6 +377,11 @@ input_scrub_next_buffer (char **bufp) + ++p; + } + ++ if (multibyte_handling == multibyte_warn) ++ (void) scan_for_multibyte_characters ((const unsigned char *) p, ++ (const unsigned char *) limit, ++ true /* Generate warnings */); ++ + /* We found a newline in the newly read chars. */ + partial_where = p; + partial_size = limit - p; +diff -rupN binutils.orig/gas/symbols.c binutils-2.37/gas/symbols.c +--- binutils.orig/gas/symbols.c 2021-11-18 16:50:39.105088530 +0000 ++++ binutils-2.37/gas/symbols.c 2021-11-18 16:52:17.980716107 +0000 +@@ -78,6 +78,10 @@ struct symbol_flags + before. It is cleared as soon as any direct reference to the + symbol is present. */ + unsigned int weakrefd : 1; ++ ++ /* Set when a warning about the symbol containing multibyte characters ++ is generated. */ ++ unsigned int multibyte_warned : 1; + }; + + /* A pointer in the symbol may point to either a complete symbol +@@ -194,7 +198,7 @@ static void * + symbol_entry_find (htab_t table, const char *name) + { + hashval_t hash = htab_hash_string (name); +- symbol_entry_t needle = { { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, ++ symbol_entry_t needle = { { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + hash, name, 0, 0, 0 } }; + return htab_find_with_hash (table, &needle, hash); + } +@@ -305,6 +309,18 @@ symbol_init (symbolS *symbolP, const cha + symbolP->bsym->name = name; + symbolP->bsym->section = sec; + ++ if (multibyte_handling == multibyte_warn_syms ++ && ! symbolP->flags.local_symbol ++ && sec != undefined_section ++ && ! symbolP->flags.multibyte_warned ++ && scan_for_multibyte_characters ((const unsigned char *) name, ++ (const unsigned char *) name + strlen (name), ++ false /* Do not warn. */)) ++ { ++ as_warn (_("symbol '%s' contains multibyte characters"), name); ++ symbolP->flags.multibyte_warned = 1; ++ } ++ + S_SET_VALUE (symbolP, valu); + + symbol_clear_list_pointers (symbolP); +@@ -2413,7 +2429,21 @@ S_SET_SEGMENT (symbolS *s, segT seg) + abort (); + } + else +- s->bsym->section = seg; ++ { ++ if (multibyte_handling == multibyte_warn_syms ++ && ! s->flags.local_symbol ++ && seg != undefined_section ++ && ! s->flags.multibyte_warned ++ && scan_for_multibyte_characters ((const unsigned char *) s->name, ++ (const unsigned char *) s->name + strlen (s->name), ++ false)) ++ { ++ as_warn (_("symbol '%s' contains multibyte characters"), s->name); ++ s->flags.multibyte_warned = 1; ++ } ++ ++ s->bsym->section = seg; ++ } + } + + void +diff -rupN binutils.orig/gas/testsuite/gas/all/gas.exp binutils-2.37/gas/testsuite/gas/all/gas.exp +--- binutils.orig/gas/testsuite/gas/all/gas.exp 2021-11-18 16:50:39.101088545 +0000 ++++ binutils-2.37/gas/testsuite/gas/all/gas.exp 2021-11-18 16:50:42.538075600 +0000 +@@ -494,3 +494,5 @@ run_dump_test "nop" + run_dump_test "asciz" + run_dump_test "pr27384" + run_dump_test "pr27381" ++run_dump_test "multibyte1" ++run_dump_test "multibyte2" +diff -rupN binutils.orig/testsuite/gas/all/multibyte.s binutils-2.37/testsuite/gas/all/multibyte.s +--- binutils.orig/testsuite/gas/all/multibyte.s 1970-01-01 01:00:00.000000000 +0100 ++++ binutils-2.37/testsuite/gas/all/multibyte.s 2021-11-18 16:50:42.541075589 +0000 +@@ -0,0 +1,8 @@ ++ .text ++ .globl he‮oll‬ ++he‮oll‬: ++ .nop ++ ++ .globl hello ++hello: ++ .nop +diff -rupN binutils.orig/testsuite/gas/all/multibyte1.d binutils-2.37/testsuite/gas/all/multibyte1.d +--- binutils.orig/testsuite/gas/all/multibyte1.d 1970-01-01 01:00:00.000000000 +0100 ++++ binutils-2.37/testsuite/gas/all/multibyte1.d 2021-11-18 16:50:42.541075589 +0000 +@@ -0,0 +1,3 @@ ++#source: multibyte.s ++#as: --multibyte-handling=warn ++#warning_output: multibyte1.l +diff -rupN binutils.orig/testsuite/gas/all/multibyte1.l binutils-2.37/testsuite/gas/all/multibyte1.l +--- binutils.orig/testsuite/gas/all/multibyte1.l 1970-01-01 01:00:00.000000000 +0100 ++++ binutils-2.37/testsuite/gas/all/multibyte1.l 2021-11-18 16:50:42.541075589 +0000 +@@ -0,0 +1,12 @@ ++[^:]*: Assembler messages: ++[^:]*: Warning: multibyte character \(0xe2\) encountered in .*multibyte.s ++[^:]*: Warning: multibyte character \(0x80\) encountered in .*multibyte.s ++[^:]*: Warning: multibyte character \(0xae\) encountered in .*multibyte.s ++[^:]*: Warning: multibyte character \(0xe2\) encountered in .*multibyte.s ++[^:]*: Warning: multibyte character \(0x80\) encountered in .*multibyte.s ++[^:]*: Warning: multibyte character \(0xac\) encountered in .*multibyte.s ++[^:]*: Warning: multibyte character \(0xe2\) encountered in .*multibyte.s ++[^:]*: Warning: multibyte character \(0x80\) encountered in .*multibyte.s ++[^:]*: Warning: multibyte character \(0xae\) encountered in .*multibyte.s ++[^:]*: Warning: multibyte character \(0xe2\) encountered in .*multibyte.s ++[^:]*: Warning: further multibyte character warnings suppressed +diff -rupN binutils.orig/testsuite/gas/all/multibyte2.d binutils-2.37/testsuite/gas/all/multibyte2.d +--- binutils.orig/testsuite/gas/all/multibyte2.d 1970-01-01 01:00:00.000000000 +0100 ++++ binutils-2.37/testsuite/gas/all/multibyte2.d 2021-11-18 16:50:42.542075585 +0000 +@@ -0,0 +1,3 @@ ++#source: multibyte.s ++#as: --multibyte-handling=warn-sym-only ++#warning_output: multibyte2.l +diff -rupN binutils.orig/testsuite/gas/all/multibyte2.l binutils-2.37/testsuite/gas/all/multibyte2.l +--- binutils.orig/testsuite/gas/all/multibyte2.l 1970-01-01 01:00:00.000000000 +0100 ++++ binutils-2.37/testsuite/gas/all/multibyte2.l 2021-11-18 16:50:42.541075589 +0000 +@@ -0,0 +1,2 @@ ++[^:]*: Assembler messages: ++[^:]*:3: Warning: symbol '.*' contains multibyte characters diff --git a/binutils.spec b/binutils.spec index 329738f..e053fdc 100644 --- a/binutils.spec +++ b/binutils.spec @@ -39,7 +39,7 @@ Summary: A GNU collection of binary utilities Name: binutils%{?name_cross}%{?_with_debug:-debug} Version: 2.37 -Release: 12%{?dist} +Release: 13%{?dist} License: GPLv3+ URL: https://sourceware.org/binutils @@ -295,6 +295,10 @@ Patch21: binutils-gas-Use-the-directory-name-in-.file-0.patch # Lifetime: Fixed in 2.38. Patch22: binutils.unicode.patch +# Purpose: Add ability to warn about unicode characters in the assembler +# Lifetime: Fixed in 2.38. +Patch23: binutils-gas-multibyte-warnings.patch + #---------------------------------------------------------------------------- Provides: bundled(libiberty) @@ -912,6 +916,9 @@ exit 0 #---------------------------------------------------------------------------- %changelog +* Fri Nov 19 2021 Nick Clifton - 2.37-13 +- Add ability to warn about multibyte characters in the assembler. (#2018848) + * Tue Nov 09 2021 Nick Clifton - 2.37-12 - Add ability to show unicode characters to display tools.