Spaces:
Running
Running
text-generation-webui
/
installer_files
/conda
/lib
/python3.10
/site-packages
/pycparser
/c_parser.py
#------------------------------------------------------------------------------ | |
# pycparser: c_parser.py | |
# | |
# CParser class: Parser and AST builder for the C language | |
# | |
# Eli Bendersky [https://eli.thegreenplace.net/] | |
# License: BSD | |
#------------------------------------------------------------------------------ | |
from .ply import yacc | |
from . import c_ast | |
from .c_lexer import CLexer | |
from .plyparser import PLYParser, ParseError, parameterized, template | |
from .ast_transforms import fix_switch_cases, fix_atomic_specifiers | |
class CParser(PLYParser): | |
def __init__( | |
self, | |
lex_optimize=True, | |
lexer=CLexer, | |
lextab='pycparser.lextab', | |
yacc_optimize=True, | |
yacctab='pycparser.yacctab', | |
yacc_debug=False, | |
taboutputdir=''): | |
""" Create a new CParser. | |
Some arguments for controlling the debug/optimization | |
level of the parser are provided. The defaults are | |
tuned for release/performance mode. | |
The simple rules for using them are: | |
*) When tweaking CParser/CLexer, set these to False | |
*) When releasing a stable parser, set to True | |
lex_optimize: | |
Set to False when you're modifying the lexer. | |
Otherwise, changes in the lexer won't be used, if | |
some lextab.py file exists. | |
When releasing with a stable lexer, set to True | |
to save the re-generation of the lexer table on | |
each run. | |
lexer: | |
Set this parameter to define the lexer to use if | |
you're not using the default CLexer. | |
lextab: | |
Points to the lex table that's used for optimized | |
mode. Only if you're modifying the lexer and want | |
some tests to avoid re-generating the table, make | |
this point to a local lex table file (that's been | |
earlier generated with lex_optimize=True) | |
yacc_optimize: | |
Set to False when you're modifying the parser. | |
Otherwise, changes in the parser won't be used, if | |
some parsetab.py file exists. | |
When releasing with a stable parser, set to True | |
to save the re-generation of the parser table on | |
each run. | |
yacctab: | |
Points to the yacc table that's used for optimized | |
mode. Only if you're modifying the parser, make | |
this point to a local yacc table file | |
yacc_debug: | |
Generate a parser.out file that explains how yacc | |
built the parsing table from the grammar. | |
taboutputdir: | |
Set this parameter to control the location of generated | |
lextab and yacctab files. | |
""" | |
self.clex = lexer( | |
error_func=self._lex_error_func, | |
on_lbrace_func=self._lex_on_lbrace_func, | |
on_rbrace_func=self._lex_on_rbrace_func, | |
type_lookup_func=self._lex_type_lookup_func) | |
self.clex.build( | |
optimize=lex_optimize, | |
lextab=lextab, | |
outputdir=taboutputdir) | |
self.tokens = self.clex.tokens | |
rules_with_opt = [ | |
'abstract_declarator', | |
'assignment_expression', | |
'declaration_list', | |
'declaration_specifiers_no_type', | |
'designation', | |
'expression', | |
'identifier_list', | |
'init_declarator_list', | |
'id_init_declarator_list', | |
'initializer_list', | |
'parameter_type_list', | |
'block_item_list', | |
'type_qualifier_list', | |
'struct_declarator_list' | |
] | |
for rule in rules_with_opt: | |
self._create_opt_rule(rule) | |
self.cparser = yacc.yacc( | |
module=self, | |
start='translation_unit_or_empty', | |
debug=yacc_debug, | |
optimize=yacc_optimize, | |
tabmodule=yacctab, | |
outputdir=taboutputdir) | |
# Stack of scopes for keeping track of symbols. _scope_stack[-1] is | |
# the current (topmost) scope. Each scope is a dictionary that | |
# specifies whether a name is a type. If _scope_stack[n][name] is | |
# True, 'name' is currently a type in the scope. If it's False, | |
# 'name' is used in the scope but not as a type (for instance, if we | |
# saw: int name; | |
# If 'name' is not a key in _scope_stack[n] then 'name' was not defined | |
# in this scope at all. | |
self._scope_stack = [dict()] | |
# Keeps track of the last token given to yacc (the lookahead token) | |
self._last_yielded_token = None | |
def parse(self, text, filename='', debug=False): | |
""" Parses C code and returns an AST. | |
text: | |
A string containing the C source code | |
filename: | |
Name of the file being parsed (for meaningful | |
error messages) | |
debug: | |
Debug flag to YACC | |
""" | |
self.clex.filename = filename | |
self.clex.reset_lineno() | |
self._scope_stack = [dict()] | |
self._last_yielded_token = None | |
return self.cparser.parse( | |
input=text, | |
lexer=self.clex, | |
debug=debug) | |
######################-- PRIVATE --###################### | |
def _push_scope(self): | |
self._scope_stack.append(dict()) | |
def _pop_scope(self): | |
assert len(self._scope_stack) > 1 | |
self._scope_stack.pop() | |
def _add_typedef_name(self, name, coord): | |
""" Add a new typedef name (ie a TYPEID) to the current scope | |
""" | |
if not self._scope_stack[-1].get(name, True): | |
self._parse_error( | |
"Typedef %r previously declared as non-typedef " | |
"in this scope" % name, coord) | |
self._scope_stack[-1][name] = True | |
def _add_identifier(self, name, coord): | |
""" Add a new object, function, or enum member name (ie an ID) to the | |
current scope | |
""" | |
if self._scope_stack[-1].get(name, False): | |
self._parse_error( | |
"Non-typedef %r previously declared as typedef " | |
"in this scope" % name, coord) | |
self._scope_stack[-1][name] = False | |
def _is_type_in_scope(self, name): | |
""" Is *name* a typedef-name in the current scope? | |
""" | |
for scope in reversed(self._scope_stack): | |
# If name is an identifier in this scope it shadows typedefs in | |
# higher scopes. | |
in_scope = scope.get(name) | |
if in_scope is not None: return in_scope | |
return False | |
def _lex_error_func(self, msg, line, column): | |
self._parse_error(msg, self._coord(line, column)) | |
def _lex_on_lbrace_func(self): | |
self._push_scope() | |
def _lex_on_rbrace_func(self): | |
self._pop_scope() | |
def _lex_type_lookup_func(self, name): | |
""" Looks up types that were previously defined with | |
typedef. | |
Passed to the lexer for recognizing identifiers that | |
are types. | |
""" | |
is_type = self._is_type_in_scope(name) | |
return is_type | |
def _get_yacc_lookahead_token(self): | |
""" We need access to yacc's lookahead token in certain cases. | |
This is the last token yacc requested from the lexer, so we | |
ask the lexer. | |
""" | |
return self.clex.last_token | |
# To understand what's going on here, read sections A.8.5 and | |
# A.8.6 of K&R2 very carefully. | |
# | |
# A C type consists of a basic type declaration, with a list | |
# of modifiers. For example: | |
# | |
# int *c[5]; | |
# | |
# The basic declaration here is 'int c', and the pointer and | |
# the array are the modifiers. | |
# | |
# Basic declarations are represented by TypeDecl (from module c_ast) and the | |
# modifiers are FuncDecl, PtrDecl and ArrayDecl. | |
# | |
# The standard states that whenever a new modifier is parsed, it should be | |
# added to the end of the list of modifiers. For example: | |
# | |
# K&R2 A.8.6.2: Array Declarators | |
# | |
# In a declaration T D where D has the form | |
# D1 [constant-expression-opt] | |
# and the type of the identifier in the declaration T D1 is | |
# "type-modifier T", the type of the | |
# identifier of D is "type-modifier array of T" | |
# | |
# This is what this method does. The declarator it receives | |
# can be a list of declarators ending with TypeDecl. It | |
# tacks the modifier to the end of this list, just before | |
# the TypeDecl. | |
# | |
# Additionally, the modifier may be a list itself. This is | |
# useful for pointers, that can come as a chain from the rule | |
# p_pointer. In this case, the whole modifier list is spliced | |
# into the new location. | |
def _type_modify_decl(self, decl, modifier): | |
""" Tacks a type modifier on a declarator, and returns | |
the modified declarator. | |
Note: the declarator and modifier may be modified | |
""" | |
#~ print '****' | |
#~ decl.show(offset=3) | |
#~ modifier.show(offset=3) | |
#~ print '****' | |
modifier_head = modifier | |
modifier_tail = modifier | |
# The modifier may be a nested list. Reach its tail. | |
while modifier_tail.type: | |
modifier_tail = modifier_tail.type | |
# If the decl is a basic type, just tack the modifier onto it. | |
if isinstance(decl, c_ast.TypeDecl): | |
modifier_tail.type = decl | |
return modifier | |
else: | |
# Otherwise, the decl is a list of modifiers. Reach | |
# its tail and splice the modifier onto the tail, | |
# pointing to the underlying basic type. | |
decl_tail = decl | |
while not isinstance(decl_tail.type, c_ast.TypeDecl): | |
decl_tail = decl_tail.type | |
modifier_tail.type = decl_tail.type | |
decl_tail.type = modifier_head | |
return decl | |
# Due to the order in which declarators are constructed, | |
# they have to be fixed in order to look like a normal AST. | |
# | |
# When a declaration arrives from syntax construction, it has | |
# these problems: | |
# * The innermost TypeDecl has no type (because the basic | |
# type is only known at the uppermost declaration level) | |
# * The declaration has no variable name, since that is saved | |
# in the innermost TypeDecl | |
# * The typename of the declaration is a list of type | |
# specifiers, and not a node. Here, basic identifier types | |
# should be separated from more complex types like enums | |
# and structs. | |
# | |
# This method fixes these problems. | |
def _fix_decl_name_type(self, decl, typename): | |
""" Fixes a declaration. Modifies decl. | |
""" | |
# Reach the underlying basic type | |
# | |
type = decl | |
while not isinstance(type, c_ast.TypeDecl): | |
type = type.type | |
decl.name = type.declname | |
type.quals = decl.quals[:] | |
# The typename is a list of types. If any type in this | |
# list isn't an IdentifierType, it must be the only | |
# type in the list (it's illegal to declare "int enum ..") | |
# If all the types are basic, they're collected in the | |
# IdentifierType holder. | |
for tn in typename: | |
if not isinstance(tn, c_ast.IdentifierType): | |
if len(typename) > 1: | |
self._parse_error( | |
"Invalid multiple types specified", tn.coord) | |
else: | |
type.type = tn | |
return decl | |
if not typename: | |
# Functions default to returning int | |
# | |
if not isinstance(decl.type, c_ast.FuncDecl): | |
self._parse_error( | |
"Missing type in declaration", decl.coord) | |
type.type = c_ast.IdentifierType( | |
['int'], | |
coord=decl.coord) | |
else: | |
# At this point, we know that typename is a list of IdentifierType | |
# nodes. Concatenate all the names into a single list. | |
# | |
type.type = c_ast.IdentifierType( | |
[name for id in typename for name in id.names], | |
coord=typename[0].coord) | |
return decl | |
def _add_declaration_specifier(self, declspec, newspec, kind, append=False): | |
""" Declaration specifiers are represented by a dictionary | |
with the entries: | |
* qual: a list of type qualifiers | |
* storage: a list of storage type qualifiers | |
* type: a list of type specifiers | |
* function: a list of function specifiers | |
* alignment: a list of alignment specifiers | |
This method is given a declaration specifier, and a | |
new specifier of a given kind. | |
If `append` is True, the new specifier is added to the end of | |
the specifiers list, otherwise it's added at the beginning. | |
Returns the declaration specifier, with the new | |
specifier incorporated. | |
""" | |
spec = declspec or dict(qual=[], storage=[], type=[], function=[], alignment=[]) | |
if append: | |
spec[kind].append(newspec) | |
else: | |
spec[kind].insert(0, newspec) | |
return spec | |
def _build_declarations(self, spec, decls, typedef_namespace=False): | |
""" Builds a list of declarations all sharing the given specifiers. | |
If typedef_namespace is true, each declared name is added | |
to the "typedef namespace", which also includes objects, | |
functions, and enum constants. | |
""" | |
is_typedef = 'typedef' in spec['storage'] | |
declarations = [] | |
# Bit-fields are allowed to be unnamed. | |
if decls[0].get('bitsize') is not None: | |
pass | |
# When redeclaring typedef names as identifiers in inner scopes, a | |
# problem can occur where the identifier gets grouped into | |
# spec['type'], leaving decl as None. This can only occur for the | |
# first declarator. | |
elif decls[0]['decl'] is None: | |
if len(spec['type']) < 2 or len(spec['type'][-1].names) != 1 or \ | |
not self._is_type_in_scope(spec['type'][-1].names[0]): | |
coord = '?' | |
for t in spec['type']: | |
if hasattr(t, 'coord'): | |
coord = t.coord | |
break | |
self._parse_error('Invalid declaration', coord) | |
# Make this look as if it came from "direct_declarator:ID" | |
decls[0]['decl'] = c_ast.TypeDecl( | |
declname=spec['type'][-1].names[0], | |
type=None, | |
quals=None, | |
align=spec['alignment'], | |
coord=spec['type'][-1].coord) | |
# Remove the "new" type's name from the end of spec['type'] | |
del spec['type'][-1] | |
# A similar problem can occur where the declaration ends up looking | |
# like an abstract declarator. Give it a name if this is the case. | |
elif not isinstance(decls[0]['decl'], ( | |
c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType)): | |
decls_0_tail = decls[0]['decl'] | |
while not isinstance(decls_0_tail, c_ast.TypeDecl): | |
decls_0_tail = decls_0_tail.type | |
if decls_0_tail.declname is None: | |
decls_0_tail.declname = spec['type'][-1].names[0] | |
del spec['type'][-1] | |
for decl in decls: | |
assert decl['decl'] is not None | |
if is_typedef: | |
declaration = c_ast.Typedef( | |
name=None, | |
quals=spec['qual'], | |
storage=spec['storage'], | |
type=decl['decl'], | |
coord=decl['decl'].coord) | |
else: | |
declaration = c_ast.Decl( | |
name=None, | |
quals=spec['qual'], | |
align=spec['alignment'], | |
storage=spec['storage'], | |
funcspec=spec['function'], | |
type=decl['decl'], | |
init=decl.get('init'), | |
bitsize=decl.get('bitsize'), | |
coord=decl['decl'].coord) | |
if isinstance(declaration.type, ( | |
c_ast.Enum, c_ast.Struct, c_ast.Union, | |
c_ast.IdentifierType)): | |
fixed_decl = declaration | |
else: | |
fixed_decl = self._fix_decl_name_type(declaration, spec['type']) | |
# Add the type name defined by typedef to a | |
# symbol table (for usage in the lexer) | |
if typedef_namespace: | |
if is_typedef: | |
self._add_typedef_name(fixed_decl.name, fixed_decl.coord) | |
else: | |
self._add_identifier(fixed_decl.name, fixed_decl.coord) | |
fixed_decl = fix_atomic_specifiers(fixed_decl) | |
declarations.append(fixed_decl) | |
return declarations | |
def _build_function_definition(self, spec, decl, param_decls, body): | |
""" Builds a function definition. | |
""" | |
if 'typedef' in spec['storage']: | |
self._parse_error("Invalid typedef", decl.coord) | |
declaration = self._build_declarations( | |
spec=spec, | |
decls=[dict(decl=decl, init=None)], | |
typedef_namespace=True)[0] | |
return c_ast.FuncDef( | |
decl=declaration, | |
param_decls=param_decls, | |
body=body, | |
coord=decl.coord) | |
def _select_struct_union_class(self, token): | |
""" Given a token (either STRUCT or UNION), selects the | |
appropriate AST class. | |
""" | |
if token == 'struct': | |
return c_ast.Struct | |
else: | |
return c_ast.Union | |
## | |
## Precedence and associativity of operators | |
## | |
# If this changes, c_generator.CGenerator.precedence_map needs to change as | |
# well | |
precedence = ( | |
('left', 'LOR'), | |
('left', 'LAND'), | |
('left', 'OR'), | |
('left', 'XOR'), | |
('left', 'AND'), | |
('left', 'EQ', 'NE'), | |
('left', 'GT', 'GE', 'LT', 'LE'), | |
('left', 'RSHIFT', 'LSHIFT'), | |
('left', 'PLUS', 'MINUS'), | |
('left', 'TIMES', 'DIVIDE', 'MOD') | |
) | |
## | |
## Grammar productions | |
## Implementation of the BNF defined in K&R2 A.13 | |
## | |
# Wrapper around a translation unit, to allow for empty input. | |
# Not strictly part of the C99 Grammar, but useful in practice. | |
def p_translation_unit_or_empty(self, p): | |
""" translation_unit_or_empty : translation_unit | |
| empty | |
""" | |
if p[1] is None: | |
p[0] = c_ast.FileAST([]) | |
else: | |
p[0] = c_ast.FileAST(p[1]) | |
def p_translation_unit_1(self, p): | |
""" translation_unit : external_declaration | |
""" | |
# Note: external_declaration is already a list | |
p[0] = p[1] | |
def p_translation_unit_2(self, p): | |
""" translation_unit : translation_unit external_declaration | |
""" | |
p[1].extend(p[2]) | |
p[0] = p[1] | |
# Declarations always come as lists (because they can be | |
# several in one line), so we wrap the function definition | |
# into a list as well, to make the return value of | |
# external_declaration homogeneous. | |
def p_external_declaration_1(self, p): | |
""" external_declaration : function_definition | |
""" | |
p[0] = [p[1]] | |
def p_external_declaration_2(self, p): | |
""" external_declaration : declaration | |
""" | |
p[0] = p[1] | |
def p_external_declaration_3(self, p): | |
""" external_declaration : pp_directive | |
| pppragma_directive | |
""" | |
p[0] = [p[1]] | |
def p_external_declaration_4(self, p): | |
""" external_declaration : SEMI | |
""" | |
p[0] = [] | |
def p_external_declaration_5(self, p): | |
""" external_declaration : static_assert | |
""" | |
p[0] = p[1] | |
def p_static_assert_declaration(self, p): | |
""" static_assert : _STATIC_ASSERT LPAREN constant_expression COMMA unified_string_literal RPAREN | |
| _STATIC_ASSERT LPAREN constant_expression RPAREN | |
""" | |
if len(p) == 5: | |
p[0] = [c_ast.StaticAssert(p[3], None, self._token_coord(p, 1))] | |
else: | |
p[0] = [c_ast.StaticAssert(p[3], p[5], self._token_coord(p, 1))] | |
def p_pp_directive(self, p): | |
""" pp_directive : PPHASH | |
""" | |
self._parse_error('Directives not supported yet', | |
self._token_coord(p, 1)) | |
def p_pppragma_directive(self, p): | |
""" pppragma_directive : PPPRAGMA | |
| PPPRAGMA PPPRAGMASTR | |
""" | |
if len(p) == 3: | |
p[0] = c_ast.Pragma(p[2], self._token_coord(p, 2)) | |
else: | |
p[0] = c_ast.Pragma("", self._token_coord(p, 1)) | |
# In function definitions, the declarator can be followed by | |
# a declaration list, for old "K&R style" function definitios. | |
def p_function_definition_1(self, p): | |
""" function_definition : id_declarator declaration_list_opt compound_statement | |
""" | |
# no declaration specifiers - 'int' becomes the default type | |
spec = dict( | |
qual=[], | |
alignment=[], | |
storage=[], | |
type=[c_ast.IdentifierType(['int'], | |
coord=self._token_coord(p, 1))], | |
function=[]) | |
p[0] = self._build_function_definition( | |
spec=spec, | |
decl=p[1], | |
param_decls=p[2], | |
body=p[3]) | |
def p_function_definition_2(self, p): | |
""" function_definition : declaration_specifiers id_declarator declaration_list_opt compound_statement | |
""" | |
spec = p[1] | |
p[0] = self._build_function_definition( | |
spec=spec, | |
decl=p[2], | |
param_decls=p[3], | |
body=p[4]) | |
# Note, according to C18 A.2.2 6.7.10 static_assert-declaration _Static_assert | |
# is a declaration, not a statement. We additionally recognise it as a statement | |
# to fix parsing of _Static_assert inside the functions. | |
# | |
def p_statement(self, p): | |
""" statement : labeled_statement | |
| expression_statement | |
| compound_statement | |
| selection_statement | |
| iteration_statement | |
| jump_statement | |
| pppragma_directive | |
| static_assert | |
""" | |
p[0] = p[1] | |
# A pragma is generally considered a decorator rather than an actual | |
# statement. Still, for the purposes of analyzing an abstract syntax tree of | |
# C code, pragma's should not be ignored and were previously treated as a | |
# statement. This presents a problem for constructs that take a statement | |
# such as labeled_statements, selection_statements, and | |
# iteration_statements, causing a misleading structure in the AST. For | |
# example, consider the following C code. | |
# | |
# for (int i = 0; i < 3; i++) | |
# #pragma omp critical | |
# sum += 1; | |
# | |
# This code will compile and execute "sum += 1;" as the body of the for | |
# loop. Previous implementations of PyCParser would render the AST for this | |
# block of code as follows: | |
# | |
# For: | |
# DeclList: | |
# Decl: i, [], [], [] | |
# TypeDecl: i, [] | |
# IdentifierType: ['int'] | |
# Constant: int, 0 | |
# BinaryOp: < | |
# ID: i | |
# Constant: int, 3 | |
# UnaryOp: p++ | |
# ID: i | |
# Pragma: omp critical | |
# Assignment: += | |
# ID: sum | |
# Constant: int, 1 | |
# | |
# This AST misleadingly takes the Pragma as the body of the loop and the | |
# assignment then becomes a sibling of the loop. | |
# | |
# To solve edge cases like these, the pragmacomp_or_statement rule groups | |
# a pragma and its following statement (which would otherwise be orphaned) | |
# using a compound block, effectively turning the above code into: | |
# | |
# for (int i = 0; i < 3; i++) { | |
# #pragma omp critical | |
# sum += 1; | |
# } | |
def p_pragmacomp_or_statement(self, p): | |
""" pragmacomp_or_statement : pppragma_directive statement | |
| statement | |
""" | |
if isinstance(p[1], c_ast.Pragma) and len(p) == 3: | |
p[0] = c_ast.Compound( | |
block_items=[p[1], p[2]], | |
coord=self._token_coord(p, 1)) | |
else: | |
p[0] = p[1] | |
# In C, declarations can come several in a line: | |
# int x, *px, romulo = 5; | |
# | |
# However, for the AST, we will split them to separate Decl | |
# nodes. | |
# | |
# This rule splits its declarations and always returns a list | |
# of Decl nodes, even if it's one element long. | |
# | |
def p_decl_body(self, p): | |
""" decl_body : declaration_specifiers init_declarator_list_opt | |
| declaration_specifiers_no_type id_init_declarator_list_opt | |
""" | |
spec = p[1] | |
# p[2] (init_declarator_list_opt) is either a list or None | |
# | |
if p[2] is None: | |
# By the standard, you must have at least one declarator unless | |
# declaring a structure tag, a union tag, or the members of an | |
# enumeration. | |
# | |
ty = spec['type'] | |
s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum) | |
if len(ty) == 1 and isinstance(ty[0], s_u_or_e): | |
decls = [c_ast.Decl( | |
name=None, | |
quals=spec['qual'], | |
align=spec['alignment'], | |
storage=spec['storage'], | |
funcspec=spec['function'], | |
type=ty[0], | |
init=None, | |
bitsize=None, | |
coord=ty[0].coord)] | |
# However, this case can also occur on redeclared identifiers in | |
# an inner scope. The trouble is that the redeclared type's name | |
# gets grouped into declaration_specifiers; _build_declarations | |
# compensates for this. | |
# | |
else: | |
decls = self._build_declarations( | |
spec=spec, | |
decls=[dict(decl=None, init=None)], | |
typedef_namespace=True) | |
else: | |
decls = self._build_declarations( | |
spec=spec, | |
decls=p[2], | |
typedef_namespace=True) | |
p[0] = decls | |
# The declaration has been split to a decl_body sub-rule and | |
# SEMI, because having them in a single rule created a problem | |
# for defining typedefs. | |
# | |
# If a typedef line was directly followed by a line using the | |
# type defined with the typedef, the type would not be | |
# recognized. This is because to reduce the declaration rule, | |
# the parser's lookahead asked for the token after SEMI, which | |
# was the type from the next line, and the lexer had no chance | |
# to see the updated type symbol table. | |
# | |
# Splitting solves this problem, because after seeing SEMI, | |
# the parser reduces decl_body, which actually adds the new | |
# type into the table to be seen by the lexer before the next | |
# line is reached. | |
def p_declaration(self, p): | |
""" declaration : decl_body SEMI | |
""" | |
p[0] = p[1] | |
# Since each declaration is a list of declarations, this | |
# rule will combine all the declarations and return a single | |
# list | |
# | |
def p_declaration_list(self, p): | |
""" declaration_list : declaration | |
| declaration_list declaration | |
""" | |
p[0] = p[1] if len(p) == 2 else p[1] + p[2] | |
# To know when declaration-specifiers end and declarators begin, | |
# we require declaration-specifiers to have at least one | |
# type-specifier, and disallow typedef-names after we've seen any | |
# type-specifier. These are both required by the spec. | |
# | |
def p_declaration_specifiers_no_type_1(self, p): | |
""" declaration_specifiers_no_type : type_qualifier declaration_specifiers_no_type_opt | |
""" | |
p[0] = self._add_declaration_specifier(p[2], p[1], 'qual') | |
def p_declaration_specifiers_no_type_2(self, p): | |
""" declaration_specifiers_no_type : storage_class_specifier declaration_specifiers_no_type_opt | |
""" | |
p[0] = self._add_declaration_specifier(p[2], p[1], 'storage') | |
def p_declaration_specifiers_no_type_3(self, p): | |
""" declaration_specifiers_no_type : function_specifier declaration_specifiers_no_type_opt | |
""" | |
p[0] = self._add_declaration_specifier(p[2], p[1], 'function') | |
# Without this, `typedef _Atomic(T) U` will parse incorrectly because the | |
# _Atomic qualifier will match, instead of the specifier. | |
def p_declaration_specifiers_no_type_4(self, p): | |
""" declaration_specifiers_no_type : atomic_specifier declaration_specifiers_no_type_opt | |
""" | |
p[0] = self._add_declaration_specifier(p[2], p[1], 'type') | |
def p_declaration_specifiers_no_type_5(self, p): | |
""" declaration_specifiers_no_type : alignment_specifier declaration_specifiers_no_type_opt | |
""" | |
p[0] = self._add_declaration_specifier(p[2], p[1], 'alignment') | |
def p_declaration_specifiers_1(self, p): | |
""" declaration_specifiers : declaration_specifiers type_qualifier | |
""" | |
p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True) | |
def p_declaration_specifiers_2(self, p): | |
""" declaration_specifiers : declaration_specifiers storage_class_specifier | |
""" | |
p[0] = self._add_declaration_specifier(p[1], p[2], 'storage', append=True) | |
def p_declaration_specifiers_3(self, p): | |
""" declaration_specifiers : declaration_specifiers function_specifier | |
""" | |
p[0] = self._add_declaration_specifier(p[1], p[2], 'function', append=True) | |
def p_declaration_specifiers_4(self, p): | |
""" declaration_specifiers : declaration_specifiers type_specifier_no_typeid | |
""" | |
p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True) | |
def p_declaration_specifiers_5(self, p): | |
""" declaration_specifiers : type_specifier | |
""" | |
p[0] = self._add_declaration_specifier(None, p[1], 'type') | |
def p_declaration_specifiers_6(self, p): | |
""" declaration_specifiers : declaration_specifiers_no_type type_specifier | |
""" | |
p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True) | |
def p_declaration_specifiers_7(self, p): | |
""" declaration_specifiers : declaration_specifiers alignment_specifier | |
""" | |
p[0] = self._add_declaration_specifier(p[1], p[2], 'alignment', append=True) | |
def p_storage_class_specifier(self, p): | |
""" storage_class_specifier : AUTO | |
| REGISTER | |
| STATIC | |
| EXTERN | |
| TYPEDEF | |
| _THREAD_LOCAL | |
""" | |
p[0] = p[1] | |
def p_function_specifier(self, p): | |
""" function_specifier : INLINE | |
| _NORETURN | |
""" | |
p[0] = p[1] | |
def p_type_specifier_no_typeid(self, p): | |
""" type_specifier_no_typeid : VOID | |
| _BOOL | |
| CHAR | |
| SHORT | |
| INT | |
| LONG | |
| FLOAT | |
| DOUBLE | |
| _COMPLEX | |
| SIGNED | |
| UNSIGNED | |
| __INT128 | |
""" | |
p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1)) | |
def p_type_specifier(self, p): | |
""" type_specifier : typedef_name | |
| enum_specifier | |
| struct_or_union_specifier | |
| type_specifier_no_typeid | |
| atomic_specifier | |
""" | |
p[0] = p[1] | |
# See section 6.7.2.4 of the C11 standard. | |
def p_atomic_specifier(self, p): | |
""" atomic_specifier : _ATOMIC LPAREN type_name RPAREN | |
""" | |
typ = p[3] | |
typ.quals.append('_Atomic') | |
p[0] = typ | |
def p_type_qualifier(self, p): | |
""" type_qualifier : CONST | |
| RESTRICT | |
| VOLATILE | |
| _ATOMIC | |
""" | |
p[0] = p[1] | |
def p_init_declarator_list(self, p): | |
""" init_declarator_list : init_declarator | |
| init_declarator_list COMMA init_declarator | |
""" | |
p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] | |
# Returns a {decl=<declarator> : init=<initializer>} dictionary | |
# If there's no initializer, uses None | |
# | |
def p_init_declarator(self, p): | |
""" init_declarator : declarator | |
| declarator EQUALS initializer | |
""" | |
p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None)) | |
def p_id_init_declarator_list(self, p): | |
""" id_init_declarator_list : id_init_declarator | |
| id_init_declarator_list COMMA init_declarator | |
""" | |
p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] | |
def p_id_init_declarator(self, p): | |
""" id_init_declarator : id_declarator | |
| id_declarator EQUALS initializer | |
""" | |
p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None)) | |
# Require at least one type specifier in a specifier-qualifier-list | |
# | |
def p_specifier_qualifier_list_1(self, p): | |
""" specifier_qualifier_list : specifier_qualifier_list type_specifier_no_typeid | |
""" | |
p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True) | |
def p_specifier_qualifier_list_2(self, p): | |
""" specifier_qualifier_list : specifier_qualifier_list type_qualifier | |
""" | |
p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True) | |
def p_specifier_qualifier_list_3(self, p): | |
""" specifier_qualifier_list : type_specifier | |
""" | |
p[0] = self._add_declaration_specifier(None, p[1], 'type') | |
def p_specifier_qualifier_list_4(self, p): | |
""" specifier_qualifier_list : type_qualifier_list type_specifier | |
""" | |
p[0] = dict(qual=p[1], alignment=[], storage=[], type=[p[2]], function=[]) | |
def p_specifier_qualifier_list_5(self, p): | |
""" specifier_qualifier_list : alignment_specifier | |
""" | |
p[0] = dict(qual=[], alignment=[p[1]], storage=[], type=[], function=[]) | |
def p_specifier_qualifier_list_6(self, p): | |
""" specifier_qualifier_list : specifier_qualifier_list alignment_specifier | |
""" | |
p[0] = self._add_declaration_specifier(p[1], p[2], 'alignment') | |
# TYPEID is allowed here (and in other struct/enum related tag names), because | |
# struct/enum tags reside in their own namespace and can be named the same as types | |
# | |
def p_struct_or_union_specifier_1(self, p): | |
""" struct_or_union_specifier : struct_or_union ID | |
| struct_or_union TYPEID | |
""" | |
klass = self._select_struct_union_class(p[1]) | |
# None means no list of members | |
p[0] = klass( | |
name=p[2], | |
decls=None, | |
coord=self._token_coord(p, 2)) | |
def p_struct_or_union_specifier_2(self, p): | |
""" struct_or_union_specifier : struct_or_union brace_open struct_declaration_list brace_close | |
| struct_or_union brace_open brace_close | |
""" | |
klass = self._select_struct_union_class(p[1]) | |
if len(p) == 4: | |
# Empty sequence means an empty list of members | |
p[0] = klass( | |
name=None, | |
decls=[], | |
coord=self._token_coord(p, 2)) | |
else: | |
p[0] = klass( | |
name=None, | |
decls=p[3], | |
coord=self._token_coord(p, 2)) | |
def p_struct_or_union_specifier_3(self, p): | |
""" struct_or_union_specifier : struct_or_union ID brace_open struct_declaration_list brace_close | |
| struct_or_union ID brace_open brace_close | |
| struct_or_union TYPEID brace_open struct_declaration_list brace_close | |
| struct_or_union TYPEID brace_open brace_close | |
""" | |
klass = self._select_struct_union_class(p[1]) | |
if len(p) == 5: | |
# Empty sequence means an empty list of members | |
p[0] = klass( | |
name=p[2], | |
decls=[], | |
coord=self._token_coord(p, 2)) | |
else: | |
p[0] = klass( | |
name=p[2], | |
decls=p[4], | |
coord=self._token_coord(p, 2)) | |
def p_struct_or_union(self, p): | |
""" struct_or_union : STRUCT | |
| UNION | |
""" | |
p[0] = p[1] | |
# Combine all declarations into a single list | |
# | |
def p_struct_declaration_list(self, p): | |
""" struct_declaration_list : struct_declaration | |
| struct_declaration_list struct_declaration | |
""" | |
if len(p) == 2: | |
p[0] = p[1] or [] | |
else: | |
p[0] = p[1] + (p[2] or []) | |
def p_struct_declaration_1(self, p): | |
""" struct_declaration : specifier_qualifier_list struct_declarator_list_opt SEMI | |
""" | |
spec = p[1] | |
assert 'typedef' not in spec['storage'] | |
if p[2] is not None: | |
decls = self._build_declarations( | |
spec=spec, | |
decls=p[2]) | |
elif len(spec['type']) == 1: | |
# Anonymous struct/union, gcc extension, C1x feature. | |
# Although the standard only allows structs/unions here, I see no | |
# reason to disallow other types since some compilers have typedefs | |
# here, and pycparser isn't about rejecting all invalid code. | |
# | |
node = spec['type'][0] | |
if isinstance(node, c_ast.Node): | |
decl_type = node | |
else: | |
decl_type = c_ast.IdentifierType(node) | |
decls = self._build_declarations( | |
spec=spec, | |
decls=[dict(decl=decl_type)]) | |
else: | |
# Structure/union members can have the same names as typedefs. | |
# The trouble is that the member's name gets grouped into | |
# specifier_qualifier_list; _build_declarations compensates. | |
# | |
decls = self._build_declarations( | |
spec=spec, | |
decls=[dict(decl=None, init=None)]) | |
p[0] = decls | |
def p_struct_declaration_2(self, p): | |
""" struct_declaration : SEMI | |
""" | |
p[0] = None | |
def p_struct_declaration_3(self, p): | |
""" struct_declaration : pppragma_directive | |
""" | |
p[0] = [p[1]] | |
def p_struct_declarator_list(self, p): | |
""" struct_declarator_list : struct_declarator | |
| struct_declarator_list COMMA struct_declarator | |
""" | |
p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] | |
# struct_declarator passes up a dict with the keys: decl (for | |
# the underlying declarator) and bitsize (for the bitsize) | |
# | |
def p_struct_declarator_1(self, p): | |
""" struct_declarator : declarator | |
""" | |
p[0] = {'decl': p[1], 'bitsize': None} | |
def p_struct_declarator_2(self, p): | |
""" struct_declarator : declarator COLON constant_expression | |
| COLON constant_expression | |
""" | |
if len(p) > 3: | |
p[0] = {'decl': p[1], 'bitsize': p[3]} | |
else: | |
p[0] = {'decl': c_ast.TypeDecl(None, None, None, None), 'bitsize': p[2]} | |
def p_enum_specifier_1(self, p): | |
""" enum_specifier : ENUM ID | |
| ENUM TYPEID | |
""" | |
p[0] = c_ast.Enum(p[2], None, self._token_coord(p, 1)) | |
def p_enum_specifier_2(self, p): | |
""" enum_specifier : ENUM brace_open enumerator_list brace_close | |
""" | |
p[0] = c_ast.Enum(None, p[3], self._token_coord(p, 1)) | |
def p_enum_specifier_3(self, p): | |
""" enum_specifier : ENUM ID brace_open enumerator_list brace_close | |
| ENUM TYPEID brace_open enumerator_list brace_close | |
""" | |
p[0] = c_ast.Enum(p[2], p[4], self._token_coord(p, 1)) | |
def p_enumerator_list(self, p): | |
""" enumerator_list : enumerator | |
| enumerator_list COMMA | |
| enumerator_list COMMA enumerator | |
""" | |
if len(p) == 2: | |
p[0] = c_ast.EnumeratorList([p[1]], p[1].coord) | |
elif len(p) == 3: | |
p[0] = p[1] | |
else: | |
p[1].enumerators.append(p[3]) | |
p[0] = p[1] | |
def p_alignment_specifier(self, p): | |
""" alignment_specifier : _ALIGNAS LPAREN type_name RPAREN | |
| _ALIGNAS LPAREN constant_expression RPAREN | |
""" | |
p[0] = c_ast.Alignas(p[3], self._token_coord(p, 1)) | |
def p_enumerator(self, p): | |
""" enumerator : ID | |
| ID EQUALS constant_expression | |
""" | |
if len(p) == 2: | |
enumerator = c_ast.Enumerator( | |
p[1], None, | |
self._token_coord(p, 1)) | |
else: | |
enumerator = c_ast.Enumerator( | |
p[1], p[3], | |
self._token_coord(p, 1)) | |
self._add_identifier(enumerator.name, enumerator.coord) | |
p[0] = enumerator | |
def p_declarator(self, p): | |
""" declarator : id_declarator | |
| typeid_declarator | |
""" | |
p[0] = p[1] | |
def p_xxx_declarator_1(self, p): | |
""" xxx_declarator : direct_xxx_declarator | |
""" | |
p[0] = p[1] | |
def p_xxx_declarator_2(self, p): | |
""" xxx_declarator : pointer direct_xxx_declarator | |
""" | |
p[0] = self._type_modify_decl(p[2], p[1]) | |
def p_direct_xxx_declarator_1(self, p): | |
""" direct_xxx_declarator : yyy | |
""" | |
p[0] = c_ast.TypeDecl( | |
declname=p[1], | |
type=None, | |
quals=None, | |
align=None, | |
coord=self._token_coord(p, 1)) | |
def p_direct_xxx_declarator_2(self, p): | |
""" direct_xxx_declarator : LPAREN xxx_declarator RPAREN | |
""" | |
p[0] = p[2] | |
def p_direct_xxx_declarator_3(self, p): | |
""" direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET | |
""" | |
quals = (p[3] if len(p) > 5 else []) or [] | |
# Accept dimension qualifiers | |
# Per C99 6.7.5.3 p7 | |
arr = c_ast.ArrayDecl( | |
type=None, | |
dim=p[4] if len(p) > 5 else p[3], | |
dim_quals=quals, | |
coord=p[1].coord) | |
p[0] = self._type_modify_decl(decl=p[1], modifier=arr) | |
def p_direct_xxx_declarator_4(self, p): | |
""" direct_xxx_declarator : direct_xxx_declarator LBRACKET STATIC type_qualifier_list_opt assignment_expression RBRACKET | |
| direct_xxx_declarator LBRACKET type_qualifier_list STATIC assignment_expression RBRACKET | |
""" | |
# Using slice notation for PLY objects doesn't work in Python 3 for the | |
# version of PLY embedded with pycparser; see PLY Google Code issue 30. | |
# Work around that here by listing the two elements separately. | |
listed_quals = [item if isinstance(item, list) else [item] | |
for item in [p[3],p[4]]] | |
dim_quals = [qual for sublist in listed_quals for qual in sublist | |
if qual is not None] | |
arr = c_ast.ArrayDecl( | |
type=None, | |
dim=p[5], | |
dim_quals=dim_quals, | |
coord=p[1].coord) | |
p[0] = self._type_modify_decl(decl=p[1], modifier=arr) | |
# Special for VLAs | |
# | |
def p_direct_xxx_declarator_5(self, p): | |
""" direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt TIMES RBRACKET | |
""" | |
arr = c_ast.ArrayDecl( | |
type=None, | |
dim=c_ast.ID(p[4], self._token_coord(p, 4)), | |
dim_quals=p[3] if p[3] is not None else [], | |
coord=p[1].coord) | |
p[0] = self._type_modify_decl(decl=p[1], modifier=arr) | |
def p_direct_xxx_declarator_6(self, p): | |
""" direct_xxx_declarator : direct_xxx_declarator LPAREN parameter_type_list RPAREN | |
| direct_xxx_declarator LPAREN identifier_list_opt RPAREN | |
""" | |
func = c_ast.FuncDecl( | |
args=p[3], | |
type=None, | |
coord=p[1].coord) | |
# To see why _get_yacc_lookahead_token is needed, consider: | |
# typedef char TT; | |
# void foo(int TT) { TT = 10; } | |
# Outside the function, TT is a typedef, but inside (starting and | |
# ending with the braces) it's a parameter. The trouble begins with | |
# yacc's lookahead token. We don't know if we're declaring or | |
# defining a function until we see LBRACE, but if we wait for yacc to | |
# trigger a rule on that token, then TT will have already been read | |
# and incorrectly interpreted as TYPEID. We need to add the | |
# parameters to the scope the moment the lexer sees LBRACE. | |
# | |
if self._get_yacc_lookahead_token().type == "LBRACE": | |
if func.args is not None: | |
for param in func.args.params: | |
if isinstance(param, c_ast.EllipsisParam): break | |
self._add_identifier(param.name, param.coord) | |
p[0] = self._type_modify_decl(decl=p[1], modifier=func) | |
def p_pointer(self, p): | |
""" pointer : TIMES type_qualifier_list_opt | |
| TIMES type_qualifier_list_opt pointer | |
""" | |
coord = self._token_coord(p, 1) | |
# Pointer decls nest from inside out. This is important when different | |
# levels have different qualifiers. For example: | |
# | |
# char * const * p; | |
# | |
# Means "pointer to const pointer to char" | |
# | |
# While: | |
# | |
# char ** const p; | |
# | |
# Means "const pointer to pointer to char" | |
# | |
# So when we construct PtrDecl nestings, the leftmost pointer goes in | |
# as the most nested type. | |
nested_type = c_ast.PtrDecl(quals=p[2] or [], type=None, coord=coord) | |
if len(p) > 3: | |
tail_type = p[3] | |
while tail_type.type is not None: | |
tail_type = tail_type.type | |
tail_type.type = nested_type | |
p[0] = p[3] | |
else: | |
p[0] = nested_type | |
def p_type_qualifier_list(self, p): | |
""" type_qualifier_list : type_qualifier | |
| type_qualifier_list type_qualifier | |
""" | |
p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]] | |
def p_parameter_type_list(self, p): | |
""" parameter_type_list : parameter_list | |
| parameter_list COMMA ELLIPSIS | |
""" | |
if len(p) > 2: | |
p[1].params.append(c_ast.EllipsisParam(self._token_coord(p, 3))) | |
p[0] = p[1] | |
def p_parameter_list(self, p): | |
""" parameter_list : parameter_declaration | |
| parameter_list COMMA parameter_declaration | |
""" | |
if len(p) == 2: # single parameter | |
p[0] = c_ast.ParamList([p[1]], p[1].coord) | |
else: | |
p[1].params.append(p[3]) | |
p[0] = p[1] | |
# From ISO/IEC 9899:TC2, 6.7.5.3.11: | |
# "If, in a parameter declaration, an identifier can be treated either | |
# as a typedef name or as a parameter name, it shall be taken as a | |
# typedef name." | |
# | |
# Inside a parameter declaration, once we've reduced declaration specifiers, | |
# if we shift in an LPAREN and see a TYPEID, it could be either an abstract | |
# declarator or a declarator nested inside parens. This rule tells us to | |
# always treat it as an abstract declarator. Therefore, we only accept | |
# `id_declarator`s and `typeid_noparen_declarator`s. | |
def p_parameter_declaration_1(self, p): | |
""" parameter_declaration : declaration_specifiers id_declarator | |
| declaration_specifiers typeid_noparen_declarator | |
""" | |
spec = p[1] | |
if not spec['type']: | |
spec['type'] = [c_ast.IdentifierType(['int'], | |
coord=self._token_coord(p, 1))] | |
p[0] = self._build_declarations( | |
spec=spec, | |
decls=[dict(decl=p[2])])[0] | |
def p_parameter_declaration_2(self, p): | |
""" parameter_declaration : declaration_specifiers abstract_declarator_opt | |
""" | |
spec = p[1] | |
if not spec['type']: | |
spec['type'] = [c_ast.IdentifierType(['int'], | |
coord=self._token_coord(p, 1))] | |
# Parameters can have the same names as typedefs. The trouble is that | |
# the parameter's name gets grouped into declaration_specifiers, making | |
# it look like an old-style declaration; compensate. | |
# | |
if len(spec['type']) > 1 and len(spec['type'][-1].names) == 1 and \ | |
self._is_type_in_scope(spec['type'][-1].names[0]): | |
decl = self._build_declarations( | |
spec=spec, | |
decls=[dict(decl=p[2], init=None)])[0] | |
# This truly is an old-style parameter declaration | |
# | |
else: | |
decl = c_ast.Typename( | |
name='', | |
quals=spec['qual'], | |
align=None, | |
type=p[2] or c_ast.TypeDecl(None, None, None, None), | |
coord=self._token_coord(p, 2)) | |
typename = spec['type'] | |
decl = self._fix_decl_name_type(decl, typename) | |
p[0] = decl | |
def p_identifier_list(self, p): | |
""" identifier_list : identifier | |
| identifier_list COMMA identifier | |
""" | |
if len(p) == 2: # single parameter | |
p[0] = c_ast.ParamList([p[1]], p[1].coord) | |
else: | |
p[1].params.append(p[3]) | |
p[0] = p[1] | |
def p_initializer_1(self, p): | |
""" initializer : assignment_expression | |
""" | |
p[0] = p[1] | |
def p_initializer_2(self, p): | |
""" initializer : brace_open initializer_list_opt brace_close | |
| brace_open initializer_list COMMA brace_close | |
""" | |
if p[2] is None: | |
p[0] = c_ast.InitList([], self._token_coord(p, 1)) | |
else: | |
p[0] = p[2] | |
def p_initializer_list(self, p): | |
""" initializer_list : designation_opt initializer | |
| initializer_list COMMA designation_opt initializer | |
""" | |
if len(p) == 3: # single initializer | |
init = p[2] if p[1] is None else c_ast.NamedInitializer(p[1], p[2]) | |
p[0] = c_ast.InitList([init], p[2].coord) | |
else: | |
init = p[4] if p[3] is None else c_ast.NamedInitializer(p[3], p[4]) | |
p[1].exprs.append(init) | |
p[0] = p[1] | |
def p_designation(self, p): | |
""" designation : designator_list EQUALS | |
""" | |
p[0] = p[1] | |
# Designators are represented as a list of nodes, in the order in which | |
# they're written in the code. | |
# | |
def p_designator_list(self, p): | |
""" designator_list : designator | |
| designator_list designator | |
""" | |
p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]] | |
def p_designator(self, p): | |
""" designator : LBRACKET constant_expression RBRACKET | |
| PERIOD identifier | |
""" | |
p[0] = p[2] | |
def p_type_name(self, p): | |
""" type_name : specifier_qualifier_list abstract_declarator_opt | |
""" | |
typename = c_ast.Typename( | |
name='', | |
quals=p[1]['qual'][:], | |
align=None, | |
type=p[2] or c_ast.TypeDecl(None, None, None, None), | |
coord=self._token_coord(p, 2)) | |
p[0] = self._fix_decl_name_type(typename, p[1]['type']) | |
def p_abstract_declarator_1(self, p): | |
""" abstract_declarator : pointer | |
""" | |
dummytype = c_ast.TypeDecl(None, None, None, None) | |
p[0] = self._type_modify_decl( | |
decl=dummytype, | |
modifier=p[1]) | |
def p_abstract_declarator_2(self, p): | |
""" abstract_declarator : pointer direct_abstract_declarator | |
""" | |
p[0] = self._type_modify_decl(p[2], p[1]) | |
def p_abstract_declarator_3(self, p): | |
""" abstract_declarator : direct_abstract_declarator | |
""" | |
p[0] = p[1] | |
# Creating and using direct_abstract_declarator_opt here | |
# instead of listing both direct_abstract_declarator and the | |
# lack of it in the beginning of _1 and _2 caused two | |
# shift/reduce errors. | |
# | |
def p_direct_abstract_declarator_1(self, p): | |
""" direct_abstract_declarator : LPAREN abstract_declarator RPAREN """ | |
p[0] = p[2] | |
def p_direct_abstract_declarator_2(self, p): | |
""" direct_abstract_declarator : direct_abstract_declarator LBRACKET assignment_expression_opt RBRACKET | |
""" | |
arr = c_ast.ArrayDecl( | |
type=None, | |
dim=p[3], | |
dim_quals=[], | |
coord=p[1].coord) | |
p[0] = self._type_modify_decl(decl=p[1], modifier=arr) | |
def p_direct_abstract_declarator_3(self, p): | |
""" direct_abstract_declarator : LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET | |
""" | |
quals = (p[2] if len(p) > 4 else []) or [] | |
p[0] = c_ast.ArrayDecl( | |
type=c_ast.TypeDecl(None, None, None, None), | |
dim=p[3] if len(p) > 4 else p[2], | |
dim_quals=quals, | |
coord=self._token_coord(p, 1)) | |
def p_direct_abstract_declarator_4(self, p): | |
""" direct_abstract_declarator : direct_abstract_declarator LBRACKET TIMES RBRACKET | |
""" | |
arr = c_ast.ArrayDecl( | |
type=None, | |
dim=c_ast.ID(p[3], self._token_coord(p, 3)), | |
dim_quals=[], | |
coord=p[1].coord) | |
p[0] = self._type_modify_decl(decl=p[1], modifier=arr) | |
def p_direct_abstract_declarator_5(self, p): | |
""" direct_abstract_declarator : LBRACKET TIMES RBRACKET | |
""" | |
p[0] = c_ast.ArrayDecl( | |
type=c_ast.TypeDecl(None, None, None, None), | |
dim=c_ast.ID(p[3], self._token_coord(p, 3)), | |
dim_quals=[], | |
coord=self._token_coord(p, 1)) | |
def p_direct_abstract_declarator_6(self, p): | |
""" direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN | |
""" | |
func = c_ast.FuncDecl( | |
args=p[3], | |
type=None, | |
coord=p[1].coord) | |
p[0] = self._type_modify_decl(decl=p[1], modifier=func) | |
def p_direct_abstract_declarator_7(self, p): | |
""" direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN | |
""" | |
p[0] = c_ast.FuncDecl( | |
args=p[2], | |
type=c_ast.TypeDecl(None, None, None, None), | |
coord=self._token_coord(p, 1)) | |
# declaration is a list, statement isn't. To make it consistent, block_item | |
# will always be a list | |
# | |
def p_block_item(self, p): | |
""" block_item : declaration | |
| statement | |
""" | |
p[0] = p[1] if isinstance(p[1], list) else [p[1]] | |
# Since we made block_item a list, this just combines lists | |
# | |
def p_block_item_list(self, p): | |
""" block_item_list : block_item | |
| block_item_list block_item | |
""" | |
# Empty block items (plain ';') produce [None], so ignore them | |
p[0] = p[1] if (len(p) == 2 or p[2] == [None]) else p[1] + p[2] | |
def p_compound_statement_1(self, p): | |
""" compound_statement : brace_open block_item_list_opt brace_close """ | |
p[0] = c_ast.Compound( | |
block_items=p[2], | |
coord=self._token_coord(p, 1)) | |
def p_labeled_statement_1(self, p): | |
""" labeled_statement : ID COLON pragmacomp_or_statement """ | |
p[0] = c_ast.Label(p[1], p[3], self._token_coord(p, 1)) | |
def p_labeled_statement_2(self, p): | |
""" labeled_statement : CASE constant_expression COLON pragmacomp_or_statement """ | |
p[0] = c_ast.Case(p[2], [p[4]], self._token_coord(p, 1)) | |
def p_labeled_statement_3(self, p): | |
""" labeled_statement : DEFAULT COLON pragmacomp_or_statement """ | |
p[0] = c_ast.Default([p[3]], self._token_coord(p, 1)) | |
def p_selection_statement_1(self, p): | |
""" selection_statement : IF LPAREN expression RPAREN pragmacomp_or_statement """ | |
p[0] = c_ast.If(p[3], p[5], None, self._token_coord(p, 1)) | |
def p_selection_statement_2(self, p): | |
""" selection_statement : IF LPAREN expression RPAREN statement ELSE pragmacomp_or_statement """ | |
p[0] = c_ast.If(p[3], p[5], p[7], self._token_coord(p, 1)) | |
def p_selection_statement_3(self, p): | |
""" selection_statement : SWITCH LPAREN expression RPAREN pragmacomp_or_statement """ | |
p[0] = fix_switch_cases( | |
c_ast.Switch(p[3], p[5], self._token_coord(p, 1))) | |
def p_iteration_statement_1(self, p): | |
""" iteration_statement : WHILE LPAREN expression RPAREN pragmacomp_or_statement """ | |
p[0] = c_ast.While(p[3], p[5], self._token_coord(p, 1)) | |
def p_iteration_statement_2(self, p): | |
""" iteration_statement : DO pragmacomp_or_statement WHILE LPAREN expression RPAREN SEMI """ | |
p[0] = c_ast.DoWhile(p[5], p[2], self._token_coord(p, 1)) | |
def p_iteration_statement_3(self, p): | |
""" iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """ | |
p[0] = c_ast.For(p[3], p[5], p[7], p[9], self._token_coord(p, 1)) | |
def p_iteration_statement_4(self, p): | |
""" iteration_statement : FOR LPAREN declaration expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """ | |
p[0] = c_ast.For(c_ast.DeclList(p[3], self._token_coord(p, 1)), | |
p[4], p[6], p[8], self._token_coord(p, 1)) | |
def p_jump_statement_1(self, p): | |
""" jump_statement : GOTO ID SEMI """ | |
p[0] = c_ast.Goto(p[2], self._token_coord(p, 1)) | |
def p_jump_statement_2(self, p): | |
""" jump_statement : BREAK SEMI """ | |
p[0] = c_ast.Break(self._token_coord(p, 1)) | |
def p_jump_statement_3(self, p): | |
""" jump_statement : CONTINUE SEMI """ | |
p[0] = c_ast.Continue(self._token_coord(p, 1)) | |
def p_jump_statement_4(self, p): | |
""" jump_statement : RETURN expression SEMI | |
| RETURN SEMI | |
""" | |
p[0] = c_ast.Return(p[2] if len(p) == 4 else None, self._token_coord(p, 1)) | |
def p_expression_statement(self, p): | |
""" expression_statement : expression_opt SEMI """ | |
if p[1] is None: | |
p[0] = c_ast.EmptyStatement(self._token_coord(p, 2)) | |
else: | |
p[0] = p[1] | |
def p_expression(self, p): | |
""" expression : assignment_expression | |
| expression COMMA assignment_expression | |
""" | |
if len(p) == 2: | |
p[0] = p[1] | |
else: | |
if not isinstance(p[1], c_ast.ExprList): | |
p[1] = c_ast.ExprList([p[1]], p[1].coord) | |
p[1].exprs.append(p[3]) | |
p[0] = p[1] | |
def p_parenthesized_compound_expression(self, p): | |
""" assignment_expression : LPAREN compound_statement RPAREN """ | |
p[0] = p[2] | |
def p_typedef_name(self, p): | |
""" typedef_name : TYPEID """ | |
p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1)) | |
def p_assignment_expression(self, p): | |
""" assignment_expression : conditional_expression | |
| unary_expression assignment_operator assignment_expression | |
""" | |
if len(p) == 2: | |
p[0] = p[1] | |
else: | |
p[0] = c_ast.Assignment(p[2], p[1], p[3], p[1].coord) | |
# K&R2 defines these as many separate rules, to encode | |
# precedence and associativity. Why work hard ? I'll just use | |
# the built in precedence/associativity specification feature | |
# of PLY. (see precedence declaration above) | |
# | |
def p_assignment_operator(self, p): | |
""" assignment_operator : EQUALS | |
| XOREQUAL | |
| TIMESEQUAL | |
| DIVEQUAL | |
| MODEQUAL | |
| PLUSEQUAL | |
| MINUSEQUAL | |
| LSHIFTEQUAL | |
| RSHIFTEQUAL | |
| ANDEQUAL | |
| OREQUAL | |
""" | |
p[0] = p[1] | |
def p_constant_expression(self, p): | |
""" constant_expression : conditional_expression """ | |
p[0] = p[1] | |
def p_conditional_expression(self, p): | |
""" conditional_expression : binary_expression | |
| binary_expression CONDOP expression COLON conditional_expression | |
""" | |
if len(p) == 2: | |
p[0] = p[1] | |
else: | |
p[0] = c_ast.TernaryOp(p[1], p[3], p[5], p[1].coord) | |
def p_binary_expression(self, p): | |
""" binary_expression : cast_expression | |
| binary_expression TIMES binary_expression | |
| binary_expression DIVIDE binary_expression | |
| binary_expression MOD binary_expression | |
| binary_expression PLUS binary_expression | |
| binary_expression MINUS binary_expression | |
| binary_expression RSHIFT binary_expression | |
| binary_expression LSHIFT binary_expression | |
| binary_expression LT binary_expression | |
| binary_expression LE binary_expression | |
| binary_expression GE binary_expression | |
| binary_expression GT binary_expression | |
| binary_expression EQ binary_expression | |
| binary_expression NE binary_expression | |
| binary_expression AND binary_expression | |
| binary_expression OR binary_expression | |
| binary_expression XOR binary_expression | |
| binary_expression LAND binary_expression | |
| binary_expression LOR binary_expression | |
""" | |
if len(p) == 2: | |
p[0] = p[1] | |
else: | |
p[0] = c_ast.BinaryOp(p[2], p[1], p[3], p[1].coord) | |
def p_cast_expression_1(self, p): | |
""" cast_expression : unary_expression """ | |
p[0] = p[1] | |
def p_cast_expression_2(self, p): | |
""" cast_expression : LPAREN type_name RPAREN cast_expression """ | |
p[0] = c_ast.Cast(p[2], p[4], self._token_coord(p, 1)) | |
def p_unary_expression_1(self, p): | |
""" unary_expression : postfix_expression """ | |
p[0] = p[1] | |
def p_unary_expression_2(self, p): | |
""" unary_expression : PLUSPLUS unary_expression | |
| MINUSMINUS unary_expression | |
| unary_operator cast_expression | |
""" | |
p[0] = c_ast.UnaryOp(p[1], p[2], p[2].coord) | |
def p_unary_expression_3(self, p): | |
""" unary_expression : SIZEOF unary_expression | |
| SIZEOF LPAREN type_name RPAREN | |
| _ALIGNOF LPAREN type_name RPAREN | |
""" | |
p[0] = c_ast.UnaryOp( | |
p[1], | |
p[2] if len(p) == 3 else p[3], | |
self._token_coord(p, 1)) | |
def p_unary_operator(self, p): | |
""" unary_operator : AND | |
| TIMES | |
| PLUS | |
| MINUS | |
| NOT | |
| LNOT | |
""" | |
p[0] = p[1] | |
def p_postfix_expression_1(self, p): | |
""" postfix_expression : primary_expression """ | |
p[0] = p[1] | |
def p_postfix_expression_2(self, p): | |
""" postfix_expression : postfix_expression LBRACKET expression RBRACKET """ | |
p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord) | |
def p_postfix_expression_3(self, p): | |
""" postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN | |
| postfix_expression LPAREN RPAREN | |
""" | |
p[0] = c_ast.FuncCall(p[1], p[3] if len(p) == 5 else None, p[1].coord) | |
def p_postfix_expression_4(self, p): | |
""" postfix_expression : postfix_expression PERIOD ID | |
| postfix_expression PERIOD TYPEID | |
| postfix_expression ARROW ID | |
| postfix_expression ARROW TYPEID | |
""" | |
field = c_ast.ID(p[3], self._token_coord(p, 3)) | |
p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord) | |
def p_postfix_expression_5(self, p): | |
""" postfix_expression : postfix_expression PLUSPLUS | |
| postfix_expression MINUSMINUS | |
""" | |
p[0] = c_ast.UnaryOp('p' + p[2], p[1], p[1].coord) | |
def p_postfix_expression_6(self, p): | |
""" postfix_expression : LPAREN type_name RPAREN brace_open initializer_list brace_close | |
| LPAREN type_name RPAREN brace_open initializer_list COMMA brace_close | |
""" | |
p[0] = c_ast.CompoundLiteral(p[2], p[5]) | |
def p_primary_expression_1(self, p): | |
""" primary_expression : identifier """ | |
p[0] = p[1] | |
def p_primary_expression_2(self, p): | |
""" primary_expression : constant """ | |
p[0] = p[1] | |
def p_primary_expression_3(self, p): | |
""" primary_expression : unified_string_literal | |
| unified_wstring_literal | |
""" | |
p[0] = p[1] | |
def p_primary_expression_4(self, p): | |
""" primary_expression : LPAREN expression RPAREN """ | |
p[0] = p[2] | |
def p_primary_expression_5(self, p): | |
""" primary_expression : OFFSETOF LPAREN type_name COMMA offsetof_member_designator RPAREN | |
""" | |
coord = self._token_coord(p, 1) | |
p[0] = c_ast.FuncCall(c_ast.ID(p[1], coord), | |
c_ast.ExprList([p[3], p[5]], coord), | |
coord) | |
def p_offsetof_member_designator(self, p): | |
""" offsetof_member_designator : identifier | |
| offsetof_member_designator PERIOD identifier | |
| offsetof_member_designator LBRACKET expression RBRACKET | |
""" | |
if len(p) == 2: | |
p[0] = p[1] | |
elif len(p) == 4: | |
p[0] = c_ast.StructRef(p[1], p[2], p[3], p[1].coord) | |
elif len(p) == 5: | |
p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord) | |
else: | |
raise NotImplementedError("Unexpected parsing state. len(p): %u" % len(p)) | |
def p_argument_expression_list(self, p): | |
""" argument_expression_list : assignment_expression | |
| argument_expression_list COMMA assignment_expression | |
""" | |
if len(p) == 2: # single expr | |
p[0] = c_ast.ExprList([p[1]], p[1].coord) | |
else: | |
p[1].exprs.append(p[3]) | |
p[0] = p[1] | |
def p_identifier(self, p): | |
""" identifier : ID """ | |
p[0] = c_ast.ID(p[1], self._token_coord(p, 1)) | |
def p_constant_1(self, p): | |
""" constant : INT_CONST_DEC | |
| INT_CONST_OCT | |
| INT_CONST_HEX | |
| INT_CONST_BIN | |
| INT_CONST_CHAR | |
""" | |
uCount = 0 | |
lCount = 0 | |
for x in p[1][-3:]: | |
if x in ('l', 'L'): | |
lCount += 1 | |
elif x in ('u', 'U'): | |
uCount += 1 | |
t = '' | |
if uCount > 1: | |
raise ValueError('Constant cannot have more than one u/U suffix.') | |
elif lCount > 2: | |
raise ValueError('Constant cannot have more than two l/L suffix.') | |
prefix = 'unsigned ' * uCount + 'long ' * lCount | |
p[0] = c_ast.Constant( | |
prefix + 'int', p[1], self._token_coord(p, 1)) | |
def p_constant_2(self, p): | |
""" constant : FLOAT_CONST | |
| HEX_FLOAT_CONST | |
""" | |
if 'x' in p[1].lower(): | |
t = 'float' | |
else: | |
if p[1][-1] in ('f', 'F'): | |
t = 'float' | |
elif p[1][-1] in ('l', 'L'): | |
t = 'long double' | |
else: | |
t = 'double' | |
p[0] = c_ast.Constant( | |
t, p[1], self._token_coord(p, 1)) | |
def p_constant_3(self, p): | |
""" constant : CHAR_CONST | |
| WCHAR_CONST | |
| U8CHAR_CONST | |
| U16CHAR_CONST | |
| U32CHAR_CONST | |
""" | |
p[0] = c_ast.Constant( | |
'char', p[1], self._token_coord(p, 1)) | |
# The "unified" string and wstring literal rules are for supporting | |
# concatenation of adjacent string literals. | |
# I.e. "hello " "world" is seen by the C compiler as a single string literal | |
# with the value "hello world" | |
# | |
def p_unified_string_literal(self, p): | |
""" unified_string_literal : STRING_LITERAL | |
| unified_string_literal STRING_LITERAL | |
""" | |
if len(p) == 2: # single literal | |
p[0] = c_ast.Constant( | |
'string', p[1], self._token_coord(p, 1)) | |
else: | |
p[1].value = p[1].value[:-1] + p[2][1:] | |
p[0] = p[1] | |
def p_unified_wstring_literal(self, p): | |
""" unified_wstring_literal : WSTRING_LITERAL | |
| U8STRING_LITERAL | |
| U16STRING_LITERAL | |
| U32STRING_LITERAL | |
| unified_wstring_literal WSTRING_LITERAL | |
| unified_wstring_literal U8STRING_LITERAL | |
| unified_wstring_literal U16STRING_LITERAL | |
| unified_wstring_literal U32STRING_LITERAL | |
""" | |
if len(p) == 2: # single literal | |
p[0] = c_ast.Constant( | |
'string', p[1], self._token_coord(p, 1)) | |
else: | |
p[1].value = p[1].value.rstrip()[:-1] + p[2][2:] | |
p[0] = p[1] | |
def p_brace_open(self, p): | |
""" brace_open : LBRACE | |
""" | |
p[0] = p[1] | |
p.set_lineno(0, p.lineno(1)) | |
def p_brace_close(self, p): | |
""" brace_close : RBRACE | |
""" | |
p[0] = p[1] | |
p.set_lineno(0, p.lineno(1)) | |
def p_empty(self, p): | |
'empty : ' | |
p[0] = None | |
def p_error(self, p): | |
# If error recovery is added here in the future, make sure | |
# _get_yacc_lookahead_token still works! | |
# | |
if p: | |
self._parse_error( | |
'before: %s' % p.value, | |
self._coord(lineno=p.lineno, | |
column=self.clex.find_tok_column(p))) | |
else: | |
self._parse_error('At end of input', self.clex.filename) | |