kernel-ark/scripts/genksyms/lex.l
Jan Beulich dc53324060 genksyms: fix typeof() handling
Recent increased use of typeof() throughout the tree resulted in a
number of symbols (25 in a typical distro config of ours) not getting a
proper CRC calculated for them anymore, due to the parser in genksyms
not coping with several of these uses (interestingly in the majority of
[if not all] cases the problem is due to the use of typeof() in code
preceding a certain export, not in the declaration/definition of the
exported function/object itself; I wasn't able to find a way to address
this more general parser shortcoming).

The use of parameter_declaration is a little more relaxed than would be
ideal (permitting not just a bare type specification, but also one with
identifier), but since the same code is being passed through an actual
compiler, there's no apparent risk of allowing through any broken code.

Otoh using parameter_declaration instead of the ad hoc
"decl_specifier_seq '*'" / "decl_specifier_seq" pair allows all types to
be handled rather than just plain ones and pointers to plain ones.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Michal Marek <mmarek@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-04-03 16:20:52 -07:00

483 lines
9.4 KiB
Plaintext

/* Lexical analysis for genksyms.
Copyright 1996, 1997 Linux International.
New implementation contributed by Richard Henderson <rth@tamu.edu>
Based on original work by Bjorn Ekwall <bj0rn@blox.se>
Taken from Linux modutils 2.4.22.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2 of the License, or (at your
option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
%{
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "genksyms.h"
#include "parse.tab.h"
/* We've got a two-level lexer here. We let flex do basic tokenization
and then we categorize those basic tokens in the second stage. */
#define YY_DECL static int yylex1(void)
%}
IDENT [A-Za-z_\$][A-Za-z0-9_\$]*
O_INT 0[0-7]*
D_INT [1-9][0-9]*
X_INT 0[Xx][0-9A-Fa-f]+
I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}?
FRAC ([0-9]*\.[0-9]+)|([0-9]+\.)
EXP [Ee][+-]?[0-9]+
F_SUF [FfLl]
REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
STRING L?\"([^\\\"]*\\.)*[^\\\"]*\"
CHAR L?\'([^\\\']*\\.)*[^\\\']*\'
MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
/* We don't do multiple input files. */
%option noyywrap
%option noinput
%%
/* Keep track of our location in the original source files. */
^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME;
^#.*\n cur_line++;
\n cur_line++;
/* Ignore all other whitespace. */
[ \t\f\v\r]+ ;
{STRING} return STRING;
{CHAR} return CHAR;
{IDENT} return IDENT;
/* The Pedant requires that the other C multi-character tokens be
recognized as tokens. We don't actually use them since we don't
parse expressions, but we do want whitespace to be arranged
around them properly. */
{MC_TOKEN} return OTHER;
{INT} return INT;
{REAL} return REAL;
"..." return DOTS;
/* All other tokens are single characters. */
. return yytext[0];
%%
/* Bring in the keyword recognizer. */
#include "keywords.hash.c"
/* Macros to append to our phrase collection list. */
/*
* We mark any token, that that equals to a known enumerator, as
* SYM_ENUM_CONST. The parser will change this for struct and union tags later,
* the only problem is struct and union members:
* enum e { a, b }; struct s { int a, b; }
* but in this case, the only effect will be, that the ABI checksums become
* more volatile, which is acceptable. Also, such collisions are quite rare,
* so far it was only observed in include/linux/telephony.h.
*/
#define _APP(T,L) do { \
cur_node = next_node; \
next_node = xmalloc(sizeof(*next_node)); \
next_node->next = cur_node; \
cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
cur_node->tag = \
find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
SYM_ENUM_CONST : SYM_NORMAL ; \
cur_node->in_source_file = in_source_file; \
} while (0)
#define APP _APP(yytext, yyleng)
/* The second stage lexer. Here we incorporate knowledge of the state
of the parser to tailor the tokens that are returned. */
int
yylex(void)
{
static enum {
ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
ST_BRACKET, ST_BRACE, ST_EXPRESSION,
ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
ST_TABLE_5, ST_TABLE_6
} lexstate = ST_NOTSTARTED;
static int suppress_type_lookup, dont_want_brace_phrase;
static struct string_list *next_node;
int token, count = 0;
struct string_list *cur_node;
if (lexstate == ST_NOTSTARTED)
{
next_node = xmalloc(sizeof(*next_node));
next_node->next = NULL;
lexstate = ST_NORMAL;
}
repeat:
token = yylex1();
if (token == 0)
return 0;
else if (token == FILENAME)
{
char *file, *e;
/* Save the filename and line number for later error messages. */
if (cur_filename)
free(cur_filename);
file = strchr(yytext, '\"')+1;
e = strchr(file, '\"');
*e = '\0';
cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
cur_line = atoi(yytext+2);
if (!source_file) {
source_file = xstrdup(cur_filename);
in_source_file = 1;
} else {
in_source_file = (strcmp(cur_filename, source_file) == 0);
}
goto repeat;
}
switch (lexstate)
{
case ST_NORMAL:
switch (token)
{
case IDENT:
APP;
{
const struct resword *r = is_reserved_word(yytext, yyleng);
if (r)
{
switch (token = r->token)
{
case ATTRIBUTE_KEYW:
lexstate = ST_ATTRIBUTE;
count = 0;
goto repeat;
case ASM_KEYW:
lexstate = ST_ASM;
count = 0;
goto repeat;
case TYPEOF_KEYW:
lexstate = ST_TYPEOF;
count = 0;
goto repeat;
case STRUCT_KEYW:
case UNION_KEYW:
case ENUM_KEYW:
dont_want_brace_phrase = 3;
suppress_type_lookup = 2;
goto fini;
case EXPORT_SYMBOL_KEYW:
goto fini;
}
}
if (!suppress_type_lookup)
{
if (find_symbol(yytext, SYM_TYPEDEF, 1))
token = TYPE;
}
}
break;
case '[':
APP;
lexstate = ST_BRACKET;
count = 1;
goto repeat;
case '{':
APP;
if (dont_want_brace_phrase)
break;
lexstate = ST_BRACE;
count = 1;
goto repeat;
case '=': case ':':
APP;
lexstate = ST_EXPRESSION;
break;
case DOTS:
default:
APP;
break;
}
break;
case ST_ATTRIBUTE:
APP;
switch (token)
{
case '(':
++count;
goto repeat;
case ')':
if (--count == 0)
{
lexstate = ST_NORMAL;
token = ATTRIBUTE_PHRASE;
break;
}
goto repeat;
default:
goto repeat;
}
break;
case ST_ASM:
APP;
switch (token)
{
case '(':
++count;
goto repeat;
case ')':
if (--count == 0)
{
lexstate = ST_NORMAL;
token = ASM_PHRASE;
break;
}
goto repeat;
default:
goto repeat;
}
break;
case ST_TYPEOF:
switch (token)
{
case '(':
if ( ++count == 1 )
lexstate = ST_TYPEOF_1;
else
APP;
goto repeat;
case ')':
APP;
if (--count == 0)
{
lexstate = ST_NORMAL;
token = TYPEOF_PHRASE;
break;
}
goto repeat;
default:
APP;
goto repeat;
}
break;
case ST_TYPEOF_1:
if (token == IDENT)
{
if (is_reserved_word(yytext, yyleng)
|| find_symbol(yytext, SYM_TYPEDEF, 1))
{
yyless(0);
unput('(');
lexstate = ST_NORMAL;
token = TYPEOF_KEYW;
break;
}
_APP("(", 1);
}
APP;
lexstate = ST_TYPEOF;
goto repeat;
case ST_BRACKET:
APP;
switch (token)
{
case '[':
++count;
goto repeat;
case ']':
if (--count == 0)
{
lexstate = ST_NORMAL;
token = BRACKET_PHRASE;
break;
}
goto repeat;
default:
goto repeat;
}
break;
case ST_BRACE:
APP;
switch (token)
{
case '{':
++count;
goto repeat;
case '}':
if (--count == 0)
{
lexstate = ST_NORMAL;
token = BRACE_PHRASE;
break;
}
goto repeat;
default:
goto repeat;
}
break;
case ST_EXPRESSION:
switch (token)
{
case '(': case '[': case '{':
++count;
APP;
goto repeat;
case '}':
/* is this the last line of an enum declaration? */
if (count == 0)
{
/* Put back the token we just read so's we can find it again
after registering the expression. */
unput(token);
lexstate = ST_NORMAL;
token = EXPRESSION_PHRASE;
break;
}
/* FALLTHRU */
case ')': case ']':
--count;
APP;
goto repeat;
case ',': case ';':
if (count == 0)
{
/* Put back the token we just read so's we can find it again
after registering the expression. */
unput(token);
lexstate = ST_NORMAL;
token = EXPRESSION_PHRASE;
break;
}
APP;
goto repeat;
default:
APP;
goto repeat;
}
break;
case ST_TABLE_1:
goto repeat;
case ST_TABLE_2:
if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
{
token = EXPORT_SYMBOL_KEYW;
lexstate = ST_TABLE_5;
APP;
break;
}
lexstate = ST_TABLE_6;
/* FALLTHRU */
case ST_TABLE_6:
switch (token)
{
case '{': case '[': case '(':
++count;
break;
case '}': case ']': case ')':
--count;
break;
case ',':
if (count == 0)
lexstate = ST_TABLE_2;
break;
};
goto repeat;
case ST_TABLE_3:
goto repeat;
case ST_TABLE_4:
if (token == ';')
lexstate = ST_NORMAL;
goto repeat;
case ST_TABLE_5:
switch (token)
{
case ',':
token = ';';
lexstate = ST_TABLE_2;
APP;
break;
default:
APP;
break;
}
break;
default:
exit(1);
}
fini:
if (suppress_type_lookup > 0)
--suppress_type_lookup;
if (dont_want_brace_phrase > 0)
--dont_want_brace_phrase;
yylval = &next_node->next;
return token;
}