binutils/binutils-gas-multibyte-warnings.patch

385 lines
15 KiB
Diff
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

diff -rupN binutils.orig/gas/NEWS binutils-2.37/gas/NEWS
--- binutils.orig/gas/NEWS 2021-11-18 16:50:39.104088534 +0000
+++ binutils-2.37/gas/NEWS 2021-11-18 16:51:16.340948280 +0000
@@ -1,5 +1,13 @@
-*- text -*-
+* The --multibyte-handling=[allow|warn|warn-sym-only] option tells the
+ assembler what to when it encoutners multibyte characters in the input. The
+ default is to allow them. Setting the option to "warn" will generate a
+ warning message whenever any multibyte character is encountered. Using the
+ option to "warn-sym-only" will make the assembler generate a warning whenever a
+ symbol is defined containing multibyte characters. (References to undefined
+ symbols will not generate warnings).
+
Changes in 2.37:
* arm-symbianelf support removed.
diff -rupN binutils.orig/gas/app.c binutils-2.37/gas/app.c
--- binutils.orig/gas/app.c 2021-11-18 16:50:39.104088534 +0000
+++ binutils-2.37/gas/app.c 2021-11-18 16:50:42.530075630 +0000
@@ -345,6 +345,55 @@ process_escape (int ch)
}
}
+#define MULTIBYTE_WARN_COUNT_LIMIT 10
+static unsigned int multibyte_warn_count = 0;
+
+bool
+scan_for_multibyte_characters (const unsigned char * start,
+ const unsigned char * end,
+ bool warn)
+{
+ if (end <= start)
+ return false;
+
+ if (warn && multibyte_warn_count > MULTIBYTE_WARN_COUNT_LIMIT)
+ return false;
+
+ bool found = false;
+
+ while (start < end)
+ {
+ unsigned char c;
+
+ if ((c = * start++) <= 0x7f)
+ continue;
+
+ if (!warn)
+ return true;
+
+ found = true;
+
+ const char * filename;
+ unsigned int lineno;
+
+ filename = as_where (& lineno);
+ if (filename == NULL)
+ as_warn (_("multibyte character (%#x) encountered in input"), c);
+ else if (lineno == 0)
+ as_warn (_("multibyte character (%#x) encountered in %s"), c, filename);
+ else
+ as_warn (_("multibyte character (%#x) encountered in %s at or near line %u"), c, filename, lineno);
+
+ if (++ multibyte_warn_count == MULTIBYTE_WARN_COUNT_LIMIT)
+ {
+ as_warn (_("further multibyte character warnings suppressed"));
+ break;
+ }
+ }
+
+ return found;
+}
+
/* This function is called to process input characters. The GET
parameter is used to retrieve more input characters. GET should
set its parameter to point to a buffer, and return the length of
@@ -463,6 +512,11 @@ do_scrub_chars (size_t (*get) (char *, s
return 0;
from = input_buffer;
fromend = from + fromlen;
+
+ if (multibyte_handling == multibyte_warn)
+ (void) scan_for_multibyte_characters ((const unsigned char *) from,
+ (const unsigned char* ) fromend,
+ true /* Generate warnings. */);
}
while (1)
diff -rupN binutils.orig/gas/as.c binutils-2.37/gas/as.c
--- binutils.orig/gas/as.c 2021-11-18 16:50:39.104088534 +0000
+++ binutils-2.37/gas/as.c 2021-11-18 16:50:42.531075627 +0000
@@ -474,7 +474,7 @@ parse_args (int * pargc, char *** pargv)
OPTION_DEBUG_PREFIX_MAP,
OPTION_DEFSYM,
OPTION_LISTING_LHS_WIDTH,
- OPTION_LISTING_LHS_WIDTH2,
+ OPTION_LISTING_LHS_WIDTH2, /* = STD_BASE + 10 */
OPTION_LISTING_RHS_WIDTH,
OPTION_LISTING_CONT_LINES,
OPTION_DEPFILE,
@@ -484,7 +484,7 @@ parse_args (int * pargc, char *** pargv)
OPTION_GDWARF_3,
OPTION_GDWARF_4,
OPTION_GDWARF_5,
- OPTION_GDWARF_SECTIONS,
+ OPTION_GDWARF_SECTIONS, /* = STD_BASE + 20 */
OPTION_GDWARF_CIE_VERSION,
OPTION_STRIP_LOCAL_ABSOLUTE,
OPTION_TRADITIONAL_FORMAT,
@@ -494,7 +494,7 @@ parse_args (int * pargc, char *** pargv)
OPTION_NOEXECSTACK,
OPTION_SIZE_CHECK,
OPTION_ELF_STT_COMMON,
- OPTION_ELF_BUILD_NOTES,
+ OPTION_ELF_BUILD_NOTES, /* = STD_BASE + 30 */
OPTION_SECTNAME_SUBST,
OPTION_ALTERNATE,
OPTION_AL,
@@ -503,7 +503,8 @@ parse_args (int * pargc, char *** pargv)
OPTION_WARN_FATAL,
OPTION_COMPRESS_DEBUG,
OPTION_NOCOMPRESS_DEBUG,
- OPTION_NO_PAD_SECTIONS /* = STD_BASE + 40 */
+ OPTION_NO_PAD_SECTIONS,
+ OPTION_MULTIBYTE_HANDLING /* = STD_BASE + 40 */
/* When you add options here, check that they do
not collide with OPTION_MD_BASE. See as.h. */
};
@@ -581,6 +582,7 @@ parse_args (int * pargc, char *** pargv)
,{"target-help", no_argument, NULL, OPTION_TARGET_HELP}
,{"traditional-format", no_argument, NULL, OPTION_TRADITIONAL_FORMAT}
,{"warn", no_argument, NULL, OPTION_WARN}
+ ,{"multibyte-handling", required_argument, NULL, OPTION_MULTIBYTE_HANDLING}
};
/* Construct the option lists from the standard list and the target
@@ -683,6 +685,19 @@ parse_args (int * pargc, char *** pargv)
flag_traditional_format = 1;
break;
+ case OPTION_MULTIBYTE_HANDLING:
+ if (strcmp (optarg, "allow") == 0)
+ multibyte_handling = multibyte_allow;
+ else if (strcmp (optarg, "warn") == 0)
+ multibyte_handling = multibyte_warn;
+ else if (strcmp (optarg, "warn-sym-only") == 0)
+ multibyte_handling = multibyte_warn_syms;
+ else if (strcmp (optarg, "warn_sym_only") == 0)
+ multibyte_handling = multibyte_warn_syms;
+ else
+ as_fatal (_("unexpected argument to --multibyte-input-option: '%s'"), optarg);
+ break;
+
case OPTION_VERSION:
/* This output is intended to follow the GNU standards document. */
printf (_("GNU assembler %s\n"), BFD_VERSION_STRING);
diff -rupN binutils.orig/gas/as.h binutils-2.37/gas/as.h
--- binutils.orig/gas/as.h 2021-11-18 16:50:38.834089551 +0000
+++ binutils-2.37/gas/as.h 2021-11-18 16:50:42.531075627 +0000
@@ -344,6 +344,14 @@ COMMON int linkrelax;
COMMON int do_not_pad_sections_to_alignment;
+enum multibyte_input_handling
+{
+ multibyte_allow = 0,
+ multibyte_warn,
+ multibyte_warn_syms
+};
+COMMON enum multibyte_input_handling multibyte_handling;
+
/* TRUE if we should produce a listing. */
extern int listing;
@@ -450,6 +458,7 @@ void input_scrub_insert_file (char *);
char * input_scrub_new_file (const char *);
char * input_scrub_next_buffer (char **bufp);
size_t do_scrub_chars (size_t (*get) (char *, size_t), char *, size_t);
+bool scan_for_multibyte_characters (const unsigned char *, const unsigned char *, bool);
int gen_to_words (LITTLENUM_TYPE *, int, long);
int had_err (void);
int ignore_input (void);
diff -rupN binutils.orig/gas/doc/as.texi binutils-2.37/gas/doc/as.texi
--- binutils.orig/gas/doc/as.texi 2021-11-18 16:50:38.838089536 +0000
+++ binutils-2.37/gas/doc/as.texi 2021-11-18 16:50:42.535075612 +0000
@@ -245,6 +245,7 @@ gcc(1), ld(1), and the Info entries for
[@b{--sectname-subst}] [@b{--size-check=[error|warning]}]
[@b{--elf-stt-common=[no|yes]}]
[@b{--generate-missing-build-notes=[no|yes]}]
+ [@b{--multibyte-handling=[allow|warn|warn-sym-only]}]
[@b{--target-help}] [@var{target-options}]
[@b{--}|@var{files} @dots{}]
@c
@@ -866,6 +867,18 @@ Set the maximum width of an input source
Set the maximum number of lines printed in a listing for a single line of input
to @var{number} + 1.
+@item --multibyte-handling=allow
+@itemx --multibyte-handling=warn
+@itemx --multibyte-handling=warn-sym-only
+Controls how the assembler handles multibyte characters in the input. The
+default (which can be restored by using the @option{allow} argument) is to
+allow such characters without complaint. Using the @option{warn} argument will
+make the assembler generate a warning message whenever any multibyte character
+is encountered. Using the @option{warn-sym-only} argument will only cause a
+warning to be generated when a symbol is defined with a name that contains
+multibyte characters. (References to undefined symbols will not generate a
+warning).
+
@item --no-pad-sections
Stop the assembler for padding the ends of output sections to the alignment
of that section. The default is to pad the sections, but this can waste space
@@ -2942,9 +2955,11 @@ are noted in @ref{Machine Dependencies}.
@end ifset
No symbol may begin with a digit. Case is significant.
There is no length limit; all characters are significant. Multibyte characters
-are supported. Symbols are delimited by characters not in that set, or by the
-beginning of a file (since the source program must end with a newline, the end
-of a file is not a possible symbol delimiter). @xref{Symbols}.
+are supported, but note that the setting of the
+@option{--multibyte-handling} option might prevent their use. Symbols
+are delimited by characters not in that set, or by the beginning of a file
+(since the source program must end with a newline, the end of a file is not a
+possible symbol delimiter). @xref{Symbols}.
Symbol names may also be enclosed in double quote @code{"} characters. In such
cases any characters are allowed, except for the NUL character. If a double
@@ -3834,11 +3849,18 @@ than @code{Foo}.
Symbol names do not start with a digit. An exception to this rule is made for
Local Labels. See below.
-Multibyte characters are supported. To generate a symbol name containing
+Multibyte characters are supported, but note that the setting of the
+@option{multibyte-handling} option might prevent their use.
+To generate a symbol name containing
multibyte characters enclose it within double quotes and use escape codes. cf
@xref{Strings}. Generating a multibyte symbol name from a label is not
currently supported.
+Since multibyte symbol names are unusual, and could possibly be used
+maliciously, @command{@value{AS}} provides a command line option
+(@option{--multibyte-handling=warn-sym-only}) which can be used to generate a
+warning message whenever a symbol name containing multibyte characters is defined.
+
Each symbol has exactly one name. Each name in an assembly language program
refers to exactly one symbol. You may use that symbol name any number of times
in a program.
diff -rupN binutils.orig/gas/input-scrub.c binutils-2.37/gas/input-scrub.c
--- binutils.orig/gas/input-scrub.c 2021-11-18 16:50:38.835089547 +0000
+++ binutils-2.37/gas/input-scrub.c 2021-11-18 16:50:42.535075612 +0000
@@ -377,6 +377,11 @@ input_scrub_next_buffer (char **bufp)
++p;
}
+ if (multibyte_handling == multibyte_warn)
+ (void) scan_for_multibyte_characters ((const unsigned char *) p,
+ (const unsigned char *) limit,
+ true /* Generate warnings */);
+
/* We found a newline in the newly read chars. */
partial_where = p;
partial_size = limit - p;
diff -rupN binutils.orig/gas/symbols.c binutils-2.37/gas/symbols.c
--- binutils.orig/gas/symbols.c 2021-11-18 16:50:39.105088530 +0000
+++ binutils-2.37/gas/symbols.c 2021-11-18 16:52:17.980716107 +0000
@@ -78,6 +78,10 @@ struct symbol_flags
before. It is cleared as soon as any direct reference to the
symbol is present. */
unsigned int weakrefd : 1;
+
+ /* Set when a warning about the symbol containing multibyte characters
+ is generated. */
+ unsigned int multibyte_warned : 1;
};
/* A pointer in the symbol may point to either a complete symbol
@@ -194,7 +198,7 @@ static void *
symbol_entry_find (htab_t table, const char *name)
{
hashval_t hash = htab_hash_string (name);
- symbol_entry_t needle = { { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ symbol_entry_t needle = { { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
hash, name, 0, 0, 0 } };
return htab_find_with_hash (table, &needle, hash);
}
@@ -305,6 +309,18 @@ symbol_init (symbolS *symbolP, const cha
symbolP->bsym->name = name;
symbolP->bsym->section = sec;
+ if (multibyte_handling == multibyte_warn_syms
+ && ! symbolP->flags.local_symbol
+ && sec != undefined_section
+ && ! symbolP->flags.multibyte_warned
+ && scan_for_multibyte_characters ((const unsigned char *) name,
+ (const unsigned char *) name + strlen (name),
+ false /* Do not warn. */))
+ {
+ as_warn (_("symbol '%s' contains multibyte characters"), name);
+ symbolP->flags.multibyte_warned = 1;
+ }
+
S_SET_VALUE (symbolP, valu);
symbol_clear_list_pointers (symbolP);
@@ -2413,7 +2429,21 @@ S_SET_SEGMENT (symbolS *s, segT seg)
abort ();
}
else
- s->bsym->section = seg;
+ {
+ if (multibyte_handling == multibyte_warn_syms
+ && ! s->flags.local_symbol
+ && seg != undefined_section
+ && ! s->flags.multibyte_warned
+ && scan_for_multibyte_characters ((const unsigned char *) s->name,
+ (const unsigned char *) s->name + strlen (s->name),
+ false))
+ {
+ as_warn (_("symbol '%s' contains multibyte characters"), s->name);
+ s->flags.multibyte_warned = 1;
+ }
+
+ s->bsym->section = seg;
+ }
}
void
diff -rupN binutils.orig/gas/testsuite/gas/all/gas.exp binutils-2.37/gas/testsuite/gas/all/gas.exp
--- binutils.orig/gas/testsuite/gas/all/gas.exp 2021-11-18 16:50:39.101088545 +0000
+++ binutils-2.37/gas/testsuite/gas/all/gas.exp 2021-11-18 16:50:42.538075600 +0000
@@ -494,3 +494,5 @@ run_dump_test "nop"
run_dump_test "asciz"
run_dump_test "pr27384"
run_dump_test "pr27381"
+run_dump_test "multibyte1"
+run_dump_test "multibyte2"
diff -rupN binutils.orig/testsuite/gas/all/multibyte.s binutils-2.37/testsuite/gas/all/multibyte.s
--- binutils.orig/testsuite/gas/all/multibyte.s 1970-01-01 01:00:00.000000000 +0100
+++ binutils-2.37/testsuite/gas/all/multibyte.s 2021-11-18 16:50:42.541075589 +0000
@@ -0,0 +1,8 @@
+ .text
+ .globl heoll
+heoll:
+ .nop
+
+ .globl hello
+hello:
+ .nop
diff -rupN binutils.orig/testsuite/gas/all/multibyte1.d binutils-2.37/testsuite/gas/all/multibyte1.d
--- binutils.orig/testsuite/gas/all/multibyte1.d 1970-01-01 01:00:00.000000000 +0100
+++ binutils-2.37/testsuite/gas/all/multibyte1.d 2021-11-18 16:50:42.541075589 +0000
@@ -0,0 +1,3 @@
+#source: multibyte.s
+#as: --multibyte-handling=warn
+#warning_output: multibyte1.l
diff -rupN binutils.orig/testsuite/gas/all/multibyte1.l binutils-2.37/testsuite/gas/all/multibyte1.l
--- binutils.orig/testsuite/gas/all/multibyte1.l 1970-01-01 01:00:00.000000000 +0100
+++ binutils-2.37/testsuite/gas/all/multibyte1.l 2021-11-18 16:50:42.541075589 +0000
@@ -0,0 +1,12 @@
+[^:]*: Assembler messages:
+[^:]*: Warning: multibyte character \(0xe2\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0x80\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0xae\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0xe2\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0x80\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0xac\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0xe2\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0x80\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0xae\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0xe2\) encountered in .*multibyte.s
+[^:]*: Warning: further multibyte character warnings suppressed
diff -rupN binutils.orig/testsuite/gas/all/multibyte2.d binutils-2.37/testsuite/gas/all/multibyte2.d
--- binutils.orig/testsuite/gas/all/multibyte2.d 1970-01-01 01:00:00.000000000 +0100
+++ binutils-2.37/testsuite/gas/all/multibyte2.d 2021-11-18 16:50:42.542075585 +0000
@@ -0,0 +1,3 @@
+#source: multibyte.s
+#as: --multibyte-handling=warn-sym-only
+#warning_output: multibyte2.l
diff -rupN binutils.orig/testsuite/gas/all/multibyte2.l binutils-2.37/testsuite/gas/all/multibyte2.l
--- binutils.orig/testsuite/gas/all/multibyte2.l 1970-01-01 01:00:00.000000000 +0100
+++ binutils-2.37/testsuite/gas/all/multibyte2.l 2021-11-18 16:50:42.541075589 +0000
@@ -0,0 +1,2 @@
+[^:]*: Assembler messages:
+[^:]*:3: Warning: symbol '.*' contains multibyte characters