Add ability to warn about multibyte characters in the assembler. (#2018848)
This commit is contained in:
parent
1524ef5833
commit
620973c050
384
binutils-gas-multibyte-warnings.patch
Normal file
384
binutils-gas-multibyte-warnings.patch
Normal file
@ -0,0 +1,384 @@
|
||||
diff -rupN binutils.orig/gas/NEWS binutils-2.37/gas/NEWS
|
||||
--- binutils.orig/gas/NEWS 2021-11-18 16:50:39.104088534 +0000
|
||||
+++ binutils-2.37/gas/NEWS 2021-11-18 16:51:16.340948280 +0000
|
||||
@@ -1,5 +1,13 @@
|
||||
-*- text -*-
|
||||
|
||||
+* The --multibyte-handling=[allow|warn|warn-sym-only] option tells the
|
||||
+ assembler what to when it encoutners multibyte characters in the input. The
|
||||
+ default is to allow them. Setting the option to "warn" will generate a
|
||||
+ warning message whenever any multibyte character is encountered. Using the
|
||||
+ option to "warn-sym-only" will make the assembler generate a warning whenever a
|
||||
+ symbol is defined containing multibyte characters. (References to undefined
|
||||
+ symbols will not generate warnings).
|
||||
+
|
||||
Changes in 2.37:
|
||||
|
||||
* arm-symbianelf support removed.
|
||||
diff -rupN binutils.orig/gas/app.c binutils-2.37/gas/app.c
|
||||
--- binutils.orig/gas/app.c 2021-11-18 16:50:39.104088534 +0000
|
||||
+++ binutils-2.37/gas/app.c 2021-11-18 16:50:42.530075630 +0000
|
||||
@@ -345,6 +345,55 @@ process_escape (int ch)
|
||||
}
|
||||
}
|
||||
|
||||
+#define MULTIBYTE_WARN_COUNT_LIMIT 10
|
||||
+static unsigned int multibyte_warn_count = 0;
|
||||
+
|
||||
+bool
|
||||
+scan_for_multibyte_characters (const unsigned char * start,
|
||||
+ const unsigned char * end,
|
||||
+ bool warn)
|
||||
+{
|
||||
+ if (end <= start)
|
||||
+ return false;
|
||||
+
|
||||
+ if (warn && multibyte_warn_count > MULTIBYTE_WARN_COUNT_LIMIT)
|
||||
+ return false;
|
||||
+
|
||||
+ bool found = false;
|
||||
+
|
||||
+ while (start < end)
|
||||
+ {
|
||||
+ unsigned char c;
|
||||
+
|
||||
+ if ((c = * start++) <= 0x7f)
|
||||
+ continue;
|
||||
+
|
||||
+ if (!warn)
|
||||
+ return true;
|
||||
+
|
||||
+ found = true;
|
||||
+
|
||||
+ const char * filename;
|
||||
+ unsigned int lineno;
|
||||
+
|
||||
+ filename = as_where (& lineno);
|
||||
+ if (filename == NULL)
|
||||
+ as_warn (_("multibyte character (%#x) encountered in input"), c);
|
||||
+ else if (lineno == 0)
|
||||
+ as_warn (_("multibyte character (%#x) encountered in %s"), c, filename);
|
||||
+ else
|
||||
+ as_warn (_("multibyte character (%#x) encountered in %s at or near line %u"), c, filename, lineno);
|
||||
+
|
||||
+ if (++ multibyte_warn_count == MULTIBYTE_WARN_COUNT_LIMIT)
|
||||
+ {
|
||||
+ as_warn (_("further multibyte character warnings suppressed"));
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return found;
|
||||
+}
|
||||
+
|
||||
/* This function is called to process input characters. The GET
|
||||
parameter is used to retrieve more input characters. GET should
|
||||
set its parameter to point to a buffer, and return the length of
|
||||
@@ -463,6 +512,11 @@ do_scrub_chars (size_t (*get) (char *, s
|
||||
return 0;
|
||||
from = input_buffer;
|
||||
fromend = from + fromlen;
|
||||
+
|
||||
+ if (multibyte_handling == multibyte_warn)
|
||||
+ (void) scan_for_multibyte_characters ((const unsigned char *) from,
|
||||
+ (const unsigned char* ) fromend,
|
||||
+ true /* Generate warnings. */);
|
||||
}
|
||||
|
||||
while (1)
|
||||
diff -rupN binutils.orig/gas/as.c binutils-2.37/gas/as.c
|
||||
--- binutils.orig/gas/as.c 2021-11-18 16:50:39.104088534 +0000
|
||||
+++ binutils-2.37/gas/as.c 2021-11-18 16:50:42.531075627 +0000
|
||||
@@ -474,7 +474,7 @@ parse_args (int * pargc, char *** pargv)
|
||||
OPTION_DEBUG_PREFIX_MAP,
|
||||
OPTION_DEFSYM,
|
||||
OPTION_LISTING_LHS_WIDTH,
|
||||
- OPTION_LISTING_LHS_WIDTH2,
|
||||
+ OPTION_LISTING_LHS_WIDTH2, /* = STD_BASE + 10 */
|
||||
OPTION_LISTING_RHS_WIDTH,
|
||||
OPTION_LISTING_CONT_LINES,
|
||||
OPTION_DEPFILE,
|
||||
@@ -484,7 +484,7 @@ parse_args (int * pargc, char *** pargv)
|
||||
OPTION_GDWARF_3,
|
||||
OPTION_GDWARF_4,
|
||||
OPTION_GDWARF_5,
|
||||
- OPTION_GDWARF_SECTIONS,
|
||||
+ OPTION_GDWARF_SECTIONS, /* = STD_BASE + 20 */
|
||||
OPTION_GDWARF_CIE_VERSION,
|
||||
OPTION_STRIP_LOCAL_ABSOLUTE,
|
||||
OPTION_TRADITIONAL_FORMAT,
|
||||
@@ -494,7 +494,7 @@ parse_args (int * pargc, char *** pargv)
|
||||
OPTION_NOEXECSTACK,
|
||||
OPTION_SIZE_CHECK,
|
||||
OPTION_ELF_STT_COMMON,
|
||||
- OPTION_ELF_BUILD_NOTES,
|
||||
+ OPTION_ELF_BUILD_NOTES, /* = STD_BASE + 30 */
|
||||
OPTION_SECTNAME_SUBST,
|
||||
OPTION_ALTERNATE,
|
||||
OPTION_AL,
|
||||
@@ -503,7 +503,8 @@ parse_args (int * pargc, char *** pargv)
|
||||
OPTION_WARN_FATAL,
|
||||
OPTION_COMPRESS_DEBUG,
|
||||
OPTION_NOCOMPRESS_DEBUG,
|
||||
- OPTION_NO_PAD_SECTIONS /* = STD_BASE + 40 */
|
||||
+ OPTION_NO_PAD_SECTIONS,
|
||||
+ OPTION_MULTIBYTE_HANDLING /* = STD_BASE + 40 */
|
||||
/* When you add options here, check that they do
|
||||
not collide with OPTION_MD_BASE. See as.h. */
|
||||
};
|
||||
@@ -581,6 +582,7 @@ parse_args (int * pargc, char *** pargv)
|
||||
,{"target-help", no_argument, NULL, OPTION_TARGET_HELP}
|
||||
,{"traditional-format", no_argument, NULL, OPTION_TRADITIONAL_FORMAT}
|
||||
,{"warn", no_argument, NULL, OPTION_WARN}
|
||||
+ ,{"multibyte-handling", required_argument, NULL, OPTION_MULTIBYTE_HANDLING}
|
||||
};
|
||||
|
||||
/* Construct the option lists from the standard list and the target
|
||||
@@ -683,6 +685,19 @@ parse_args (int * pargc, char *** pargv)
|
||||
flag_traditional_format = 1;
|
||||
break;
|
||||
|
||||
+ case OPTION_MULTIBYTE_HANDLING:
|
||||
+ if (strcmp (optarg, "allow") == 0)
|
||||
+ multibyte_handling = multibyte_allow;
|
||||
+ else if (strcmp (optarg, "warn") == 0)
|
||||
+ multibyte_handling = multibyte_warn;
|
||||
+ else if (strcmp (optarg, "warn-sym-only") == 0)
|
||||
+ multibyte_handling = multibyte_warn_syms;
|
||||
+ else if (strcmp (optarg, "warn_sym_only") == 0)
|
||||
+ multibyte_handling = multibyte_warn_syms;
|
||||
+ else
|
||||
+ as_fatal (_("unexpected argument to --multibyte-input-option: '%s'"), optarg);
|
||||
+ break;
|
||||
+
|
||||
case OPTION_VERSION:
|
||||
/* This output is intended to follow the GNU standards document. */
|
||||
printf (_("GNU assembler %s\n"), BFD_VERSION_STRING);
|
||||
diff -rupN binutils.orig/gas/as.h binutils-2.37/gas/as.h
|
||||
--- binutils.orig/gas/as.h 2021-11-18 16:50:38.834089551 +0000
|
||||
+++ binutils-2.37/gas/as.h 2021-11-18 16:50:42.531075627 +0000
|
||||
@@ -344,6 +344,14 @@ COMMON int linkrelax;
|
||||
|
||||
COMMON int do_not_pad_sections_to_alignment;
|
||||
|
||||
+enum multibyte_input_handling
|
||||
+{
|
||||
+ multibyte_allow = 0,
|
||||
+ multibyte_warn,
|
||||
+ multibyte_warn_syms
|
||||
+};
|
||||
+COMMON enum multibyte_input_handling multibyte_handling;
|
||||
+
|
||||
/* TRUE if we should produce a listing. */
|
||||
extern int listing;
|
||||
|
||||
@@ -450,6 +458,7 @@ void input_scrub_insert_file (char *);
|
||||
char * input_scrub_new_file (const char *);
|
||||
char * input_scrub_next_buffer (char **bufp);
|
||||
size_t do_scrub_chars (size_t (*get) (char *, size_t), char *, size_t);
|
||||
+bool scan_for_multibyte_characters (const unsigned char *, const unsigned char *, bool);
|
||||
int gen_to_words (LITTLENUM_TYPE *, int, long);
|
||||
int had_err (void);
|
||||
int ignore_input (void);
|
||||
diff -rupN binutils.orig/gas/doc/as.texi binutils-2.37/gas/doc/as.texi
|
||||
--- binutils.orig/gas/doc/as.texi 2021-11-18 16:50:38.838089536 +0000
|
||||
+++ binutils-2.37/gas/doc/as.texi 2021-11-18 16:50:42.535075612 +0000
|
||||
@@ -245,6 +245,7 @@ gcc(1), ld(1), and the Info entries for
|
||||
[@b{--sectname-subst}] [@b{--size-check=[error|warning]}]
|
||||
[@b{--elf-stt-common=[no|yes]}]
|
||||
[@b{--generate-missing-build-notes=[no|yes]}]
|
||||
+ [@b{--multibyte-handling=[allow|warn|warn-sym-only]}]
|
||||
[@b{--target-help}] [@var{target-options}]
|
||||
[@b{--}|@var{files} @dots{}]
|
||||
@c
|
||||
@@ -866,6 +867,18 @@ Set the maximum width of an input source
|
||||
Set the maximum number of lines printed in a listing for a single line of input
|
||||
to @var{number} + 1.
|
||||
|
||||
+@item --multibyte-handling=allow
|
||||
+@itemx --multibyte-handling=warn
|
||||
+@itemx --multibyte-handling=warn-sym-only
|
||||
+Controls how the assembler handles multibyte characters in the input. The
|
||||
+default (which can be restored by using the @option{allow} argument) is to
|
||||
+allow such characters without complaint. Using the @option{warn} argument will
|
||||
+make the assembler generate a warning message whenever any multibyte character
|
||||
+is encountered. Using the @option{warn-sym-only} argument will only cause a
|
||||
+warning to be generated when a symbol is defined with a name that contains
|
||||
+multibyte characters. (References to undefined symbols will not generate a
|
||||
+warning).
|
||||
+
|
||||
@item --no-pad-sections
|
||||
Stop the assembler for padding the ends of output sections to the alignment
|
||||
of that section. The default is to pad the sections, but this can waste space
|
||||
@@ -2942,9 +2955,11 @@ are noted in @ref{Machine Dependencies}.
|
||||
@end ifset
|
||||
No symbol may begin with a digit. Case is significant.
|
||||
There is no length limit; all characters are significant. Multibyte characters
|
||||
-are supported. Symbols are delimited by characters not in that set, or by the
|
||||
-beginning of a file (since the source program must end with a newline, the end
|
||||
-of a file is not a possible symbol delimiter). @xref{Symbols}.
|
||||
+are supported, but note that the setting of the
|
||||
+@option{--multibyte-handling} option might prevent their use. Symbols
|
||||
+are delimited by characters not in that set, or by the beginning of a file
|
||||
+(since the source program must end with a newline, the end of a file is not a
|
||||
+possible symbol delimiter). @xref{Symbols}.
|
||||
|
||||
Symbol names may also be enclosed in double quote @code{"} characters. In such
|
||||
cases any characters are allowed, except for the NUL character. If a double
|
||||
@@ -3834,11 +3849,18 @@ than @code{Foo}.
|
||||
Symbol names do not start with a digit. An exception to this rule is made for
|
||||
Local Labels. See below.
|
||||
|
||||
-Multibyte characters are supported. To generate a symbol name containing
|
||||
+Multibyte characters are supported, but note that the setting of the
|
||||
+@option{multibyte-handling} option might prevent their use.
|
||||
+To generate a symbol name containing
|
||||
multibyte characters enclose it within double quotes and use escape codes. cf
|
||||
@xref{Strings}. Generating a multibyte symbol name from a label is not
|
||||
currently supported.
|
||||
|
||||
+Since multibyte symbol names are unusual, and could possibly be used
|
||||
+maliciously, @command{@value{AS}} provides a command line option
|
||||
+(@option{--multibyte-handling=warn-sym-only}) which can be used to generate a
|
||||
+warning message whenever a symbol name containing multibyte characters is defined.
|
||||
+
|
||||
Each symbol has exactly one name. Each name in an assembly language program
|
||||
refers to exactly one symbol. You may use that symbol name any number of times
|
||||
in a program.
|
||||
diff -rupN binutils.orig/gas/input-scrub.c binutils-2.37/gas/input-scrub.c
|
||||
--- binutils.orig/gas/input-scrub.c 2021-11-18 16:50:38.835089547 +0000
|
||||
+++ binutils-2.37/gas/input-scrub.c 2021-11-18 16:50:42.535075612 +0000
|
||||
@@ -377,6 +377,11 @@ input_scrub_next_buffer (char **bufp)
|
||||
++p;
|
||||
}
|
||||
|
||||
+ if (multibyte_handling == multibyte_warn)
|
||||
+ (void) scan_for_multibyte_characters ((const unsigned char *) p,
|
||||
+ (const unsigned char *) limit,
|
||||
+ true /* Generate warnings */);
|
||||
+
|
||||
/* We found a newline in the newly read chars. */
|
||||
partial_where = p;
|
||||
partial_size = limit - p;
|
||||
diff -rupN binutils.orig/gas/symbols.c binutils-2.37/gas/symbols.c
|
||||
--- binutils.orig/gas/symbols.c 2021-11-18 16:50:39.105088530 +0000
|
||||
+++ binutils-2.37/gas/symbols.c 2021-11-18 16:52:17.980716107 +0000
|
||||
@@ -78,6 +78,10 @@ struct symbol_flags
|
||||
before. It is cleared as soon as any direct reference to the
|
||||
symbol is present. */
|
||||
unsigned int weakrefd : 1;
|
||||
+
|
||||
+ /* Set when a warning about the symbol containing multibyte characters
|
||||
+ is generated. */
|
||||
+ unsigned int multibyte_warned : 1;
|
||||
};
|
||||
|
||||
/* A pointer in the symbol may point to either a complete symbol
|
||||
@@ -194,7 +198,7 @@ static void *
|
||||
symbol_entry_find (htab_t table, const char *name)
|
||||
{
|
||||
hashval_t hash = htab_hash_string (name);
|
||||
- symbol_entry_t needle = { { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
+ symbol_entry_t needle = { { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
hash, name, 0, 0, 0 } };
|
||||
return htab_find_with_hash (table, &needle, hash);
|
||||
}
|
||||
@@ -305,6 +309,18 @@ symbol_init (symbolS *symbolP, const cha
|
||||
symbolP->bsym->name = name;
|
||||
symbolP->bsym->section = sec;
|
||||
|
||||
+ if (multibyte_handling == multibyte_warn_syms
|
||||
+ && ! symbolP->flags.local_symbol
|
||||
+ && sec != undefined_section
|
||||
+ && ! symbolP->flags.multibyte_warned
|
||||
+ && scan_for_multibyte_characters ((const unsigned char *) name,
|
||||
+ (const unsigned char *) name + strlen (name),
|
||||
+ false /* Do not warn. */))
|
||||
+ {
|
||||
+ as_warn (_("symbol '%s' contains multibyte characters"), name);
|
||||
+ symbolP->flags.multibyte_warned = 1;
|
||||
+ }
|
||||
+
|
||||
S_SET_VALUE (symbolP, valu);
|
||||
|
||||
symbol_clear_list_pointers (symbolP);
|
||||
@@ -2413,7 +2429,21 @@ S_SET_SEGMENT (symbolS *s, segT seg)
|
||||
abort ();
|
||||
}
|
||||
else
|
||||
- s->bsym->section = seg;
|
||||
+ {
|
||||
+ if (multibyte_handling == multibyte_warn_syms
|
||||
+ && ! s->flags.local_symbol
|
||||
+ && seg != undefined_section
|
||||
+ && ! s->flags.multibyte_warned
|
||||
+ && scan_for_multibyte_characters ((const unsigned char *) s->name,
|
||||
+ (const unsigned char *) s->name + strlen (s->name),
|
||||
+ false))
|
||||
+ {
|
||||
+ as_warn (_("symbol '%s' contains multibyte characters"), s->name);
|
||||
+ s->flags.multibyte_warned = 1;
|
||||
+ }
|
||||
+
|
||||
+ s->bsym->section = seg;
|
||||
+ }
|
||||
}
|
||||
|
||||
void
|
||||
diff -rupN binutils.orig/gas/testsuite/gas/all/gas.exp binutils-2.37/gas/testsuite/gas/all/gas.exp
|
||||
--- binutils.orig/gas/testsuite/gas/all/gas.exp 2021-11-18 16:50:39.101088545 +0000
|
||||
+++ binutils-2.37/gas/testsuite/gas/all/gas.exp 2021-11-18 16:50:42.538075600 +0000
|
||||
@@ -494,3 +494,5 @@ run_dump_test "nop"
|
||||
run_dump_test "asciz"
|
||||
run_dump_test "pr27384"
|
||||
run_dump_test "pr27381"
|
||||
+run_dump_test "multibyte1"
|
||||
+run_dump_test "multibyte2"
|
||||
diff -rupN binutils.orig/testsuite/gas/all/multibyte.s binutils-2.37/testsuite/gas/all/multibyte.s
|
||||
--- binutils.orig/testsuite/gas/all/multibyte.s 1970-01-01 01:00:00.000000000 +0100
|
||||
+++ binutils-2.37/testsuite/gas/all/multibyte.s 2021-11-18 16:50:42.541075589 +0000
|
||||
@@ -0,0 +1,8 @@
|
||||
+ .text
|
||||
+ .globl heoll
|
||||
+heoll:
|
||||
+ .nop
|
||||
+
|
||||
+ .globl hello
|
||||
+hello:
|
||||
+ .nop
|
||||
diff -rupN binutils.orig/testsuite/gas/all/multibyte1.d binutils-2.37/testsuite/gas/all/multibyte1.d
|
||||
--- binutils.orig/testsuite/gas/all/multibyte1.d 1970-01-01 01:00:00.000000000 +0100
|
||||
+++ binutils-2.37/testsuite/gas/all/multibyte1.d 2021-11-18 16:50:42.541075589 +0000
|
||||
@@ -0,0 +1,3 @@
|
||||
+#source: multibyte.s
|
||||
+#as: --multibyte-handling=warn
|
||||
+#warning_output: multibyte1.l
|
||||
diff -rupN binutils.orig/testsuite/gas/all/multibyte1.l binutils-2.37/testsuite/gas/all/multibyte1.l
|
||||
--- binutils.orig/testsuite/gas/all/multibyte1.l 1970-01-01 01:00:00.000000000 +0100
|
||||
+++ binutils-2.37/testsuite/gas/all/multibyte1.l 2021-11-18 16:50:42.541075589 +0000
|
||||
@@ -0,0 +1,12 @@
|
||||
+[^:]*: Assembler messages:
|
||||
+[^:]*: Warning: multibyte character \(0xe2\) encountered in .*multibyte.s
|
||||
+[^:]*: Warning: multibyte character \(0x80\) encountered in .*multibyte.s
|
||||
+[^:]*: Warning: multibyte character \(0xae\) encountered in .*multibyte.s
|
||||
+[^:]*: Warning: multibyte character \(0xe2\) encountered in .*multibyte.s
|
||||
+[^:]*: Warning: multibyte character \(0x80\) encountered in .*multibyte.s
|
||||
+[^:]*: Warning: multibyte character \(0xac\) encountered in .*multibyte.s
|
||||
+[^:]*: Warning: multibyte character \(0xe2\) encountered in .*multibyte.s
|
||||
+[^:]*: Warning: multibyte character \(0x80\) encountered in .*multibyte.s
|
||||
+[^:]*: Warning: multibyte character \(0xae\) encountered in .*multibyte.s
|
||||
+[^:]*: Warning: multibyte character \(0xe2\) encountered in .*multibyte.s
|
||||
+[^:]*: Warning: further multibyte character warnings suppressed
|
||||
diff -rupN binutils.orig/testsuite/gas/all/multibyte2.d binutils-2.37/testsuite/gas/all/multibyte2.d
|
||||
--- binutils.orig/testsuite/gas/all/multibyte2.d 1970-01-01 01:00:00.000000000 +0100
|
||||
+++ binutils-2.37/testsuite/gas/all/multibyte2.d 2021-11-18 16:50:42.542075585 +0000
|
||||
@@ -0,0 +1,3 @@
|
||||
+#source: multibyte.s
|
||||
+#as: --multibyte-handling=warn-sym-only
|
||||
+#warning_output: multibyte2.l
|
||||
diff -rupN binutils.orig/testsuite/gas/all/multibyte2.l binutils-2.37/testsuite/gas/all/multibyte2.l
|
||||
--- binutils.orig/testsuite/gas/all/multibyte2.l 1970-01-01 01:00:00.000000000 +0100
|
||||
+++ binutils-2.37/testsuite/gas/all/multibyte2.l 2021-11-18 16:50:42.541075589 +0000
|
||||
@@ -0,0 +1,2 @@
|
||||
+[^:]*: Assembler messages:
|
||||
+[^:]*:3: Warning: symbol '.*' contains multibyte characters
|
@ -39,7 +39,7 @@
|
||||
Summary: A GNU collection of binary utilities
|
||||
Name: binutils%{?name_cross}%{?_with_debug:-debug}
|
||||
Version: 2.37
|
||||
Release: 12%{?dist}
|
||||
Release: 13%{?dist}
|
||||
License: GPLv3+
|
||||
URL: https://sourceware.org/binutils
|
||||
|
||||
@ -295,6 +295,10 @@ Patch21: binutils-gas-Use-the-directory-name-in-.file-0.patch
|
||||
# Lifetime: Fixed in 2.38.
|
||||
Patch22: binutils.unicode.patch
|
||||
|
||||
# Purpose: Add ability to warn about unicode characters in the assembler
|
||||
# Lifetime: Fixed in 2.38.
|
||||
Patch23: binutils-gas-multibyte-warnings.patch
|
||||
|
||||
#----------------------------------------------------------------------------
|
||||
|
||||
Provides: bundled(libiberty)
|
||||
@ -912,6 +916,9 @@ exit 0
|
||||
|
||||
#----------------------------------------------------------------------------
|
||||
%changelog
|
||||
* Fri Nov 19 2021 Nick Clifton <nickc@redhat.com> - 2.37-13
|
||||
- Add ability to warn about multibyte characters in the assembler. (#2018848)
|
||||
|
||||
* Tue Nov 09 2021 Nick Clifton <nickc@redhat.com> - 2.37-12
|
||||
- Add ability to show unicode characters to display tools.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user