34 std::list<irep_idt> linker_defined_symbols;
36 linker_defined_symbols,
39 linker_def_outfile());
48 error() <<
"Problem parsing linker script JSON data" <<
eom;
55 error() <<
"Malformed linker-script JSON document" <<
eom;
67 error() <<
"Unable to read goto binary for linker script merging" <<
eom;
104 error() <<
"Could not pointerize all linker-defined expressions" <<
eom;
111 error() <<
"Could not write linkerscript-augmented binary" <<
eom;
118 const std::string &elf_binary,
119 const std::string &goto_binary,
122 messaget(message_handler), compiler(compiler),
123 elf_binary(elf_binary), goto_binary(goto_binary),
125 replacement_predicates(
132 return expr.
id()==ID_address_of &&
133 expr.
type().
id()==ID_pointer &&
135 expr.
op0().
id()==ID_index &&
141 expr.
op0().
op1().
id()==ID_constant &&
149 return expr.
id()==ID_address_of &&
150 expr.
type().
id()==ID_pointer &&
152 expr.
op0().
id()==ID_symbol &&
160 return expr.
id()==ID_address_of &&
161 expr.
type().
id()==ID_pointer &&
163 expr.
op0().
id()==ID_symbol &&
164 ns.follow(expr.
op0().
type()).
id()==ID_struct;
171 return expr.
id()==ID_symbol &&
172 expr.
type().
id()==ID_array;
179 return expr.
id()==ID_symbol &&
180 expr.
type().
id()==ID_pointer;
193 for(
const auto &pair : linker_values)
201 entry.
value=pair.second.second;
208 std::list<symbol_exprt> to_pointerize;
211 if(to_pointerize.empty())
213 debug() <<
"Pointerizing the symbol-table value of symbol " << pair.first
220 if(to_pointerize.empty() && fail==0)
223 for(
const auto &sym : to_pointerize)
224 error() <<
" Could not pointerize '" << sym.get_identifier()
225 <<
"' in symbol table entry " << pair.first <<
". Pretty:\n" 226 << sym.pretty() <<
"\n";
237 for(
exprt *insts : std::list<exprt *>({&(iit->code), &(iit->guard)}))
239 std::list<symbol_exprt> to_pointerize;
241 if(to_pointerize.empty())
243 debug() <<
"Pointerizing a program expression..." <<
eom;
245 *insts, to_pointerize, linker_values, ns);
246 if(to_pointerize.empty() && fail==0)
249 for(
const auto &sym : to_pointerize)
251 error() <<
" Could not pointerize '" << sym.get_identifier()
252 <<
"' in function " << gf.first <<
". Pretty:\n" 253 << sym.pretty() <<
"\n";
268 const std::string &shape)
270 auto it=linker_values.find(ident);
271 if(it==linker_values.end())
273 error() <<
"Could not find a new expression for linker script-defined " 274 <<
"symbol '" << ident <<
"'" <<
eom;
279 debug() <<
"Pointerizing linker-defined symbol '" << ident <<
"' of shape <" 280 << shape <<
">." <<
eom;
287 std::list<symbol_exprt> &to_pointerize,
292 for(
auto const &pair : linker_values)
295 if(!pattern.match(expr, ns))
298 const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
301 tmp=
replace_expr(expr, linker_values, inner_symbol, pair.first,
302 pattern.description());
304 auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
306 if(
result==to_pointerize.end())
313 to_pointerize.erase(
result);
332 std::list<symbol_exprt> &to_pointerize)
const 334 for(
const auto &op : expr.
operands())
336 if(op.id()!=ID_symbol)
340 if(pair!=linker_values.end())
341 to_pointerize.push_back(sym_exp);
343 for(
const auto &op : expr.
operands())
348 The current implementation of
this function is less precise than the
349 commented-out version below. To understand the difference between these
350 implementations, consider the following example:
352 Suppose we have a section in the linker script, 100 bytes long, where the
353 address of the symbol sec_start is the start of the section (value 4096) and the
354 address of sec_end is the end of that section (value 4196).
356 The current implementation synthesizes the
goto-version of the following C:
358 char __sec_array[100];
359 char *sec_start=(&__sec_array[0]);
360 char *sec_end=((&__sec_array[0])+100);
364 This is imprecise
for the following reason: the actual address of the array and
365 the pointers shall be some random CBMC-
internal address, instead of being 4096
366 and 4196. The linker script, on the other hand, would have specified the exact
367 position of the section, and we even know what the actual values of sec_start
368 and sec_end are from the
object file (these values are in the `addresses` list
369 of the `
data` argument to
this function). If the correctness of the code depends
370 on these actual values, then CBMCs model of the code is too imprecise to verify
373 The commented-out version of
this function below synthesizes the following:
375 char *sec_start=4096;
379 This code has both the actual addresses of the start and end of the section and
380 tells CBMC that the intermediate region is valid. However, the allocated_memory
381 macro does not currently allocate an actual
object at the address 4096, so
382 symbolic execution can fail. In particular, the
'allocated memory' is part of
383 __CPROVER_memory, which does not have a bounded size;
this means that (
for 384 example) calls to memcpy or memset fail, because the first and third arguments
385 do not have know n size. The commented-out implementation should be reinstated
388 In either
case, no other changes to the rest of the code (outside
this function)
389 should be necessary. The rest of
this file converts expressions containing the
390 linker-defined symbol into pointer types
if they were not already, and
this is
391 the right behaviour
for both implementations.
395 const std::string &linker_script,
402 std::map<irep_idt, std::size_t> truncated_symbols;
403 for(
auto &d :
data[
"regions"].array)
405 bool has_end=d[
"has-end-symbol"].is_true();
409 << d[
"start-symbol"].value;
418 warning() <<
"Object section '" << d[
"section"].value <<
"' of size " 419 << array_size <<
" is too large to model. Truncating to " 432 std::ostringstream array_comment;
433 array_comment <<
"Object section '" << d[
"section"].value <<
"' of size " 434 << array_size <<
" bytes";
435 array_loc.set_comment(array_comment.str());
436 array_expr.add_source_location()=array_loc;
444 linker_values[d[
"start-symbol"].value]=std::make_pair(start_sym,
449 auto it=std::find_if(
data[
"addresses"].array.begin(),
450 data[
"addresses"].array.end(),
451 [&d](
const jsont &add)
452 {
return add[
"sym"].value==d[
"start-symbol"].value; });
453 if(it==
data[
"addresses"].array.end())
455 error() <<
"Start: Could not find address corresponding to symbol '" 456 << d[
"start-symbol"].value <<
"' (start of section)" << eom;
461 std::ostringstream start_comment;
462 start_comment <<
"Pointer to beginning of object section '" 463 << d[
"section"].value <<
"'. Original address in object file" 464 <<
" is " << (*it)[
"val"].value;
474 initialize_instructions.push_front(start_assign_i);
478 plus_exprt array_end(array_start, array_size_expr);
481 linker_values[d[
"end-symbol"].value]=std::make_pair(end_sym, array_end);
483 auto entry = std::find_if(
484 data[
"addresses"].array.begin(),
485 data[
"addresses"].array.end(),
486 [&d](
const jsont &add) {
487 return add[
"sym"].value == d[
"end-symbol"].value;
489 if(entry ==
data[
"addresses"].array.end())
491 error() <<
"Could not find address corresponding to symbol '" 492 << d[
"end-symbol"].value <<
"' (end of section)" << eom;
497 std::ostringstream end_comment;
498 end_comment <<
"Pointer to end of object section '" << d[
"section"].value
499 <<
"'. Original address in object file" 500 <<
" is " << (*entry)[
"val"].value;
509 initialize_instructions.push_front(end_assign_i);
520 array_sym.
type=array_type;
522 symbol_table.
add(array_sym);
530 array_assign.add_source_location()=array_loc;
534 initialize_instructions.push_front(array_assign_i);
542 for(
const auto &d :
data[
"addresses"].array)
545 if(it!=linker_values.end())
552 const auto &pair=truncated_symbols.find(d[
"sym"].value);
553 if(pair==truncated_symbols.end())
554 symbol_value=d[
"val"].value;
557 debug() <<
"Truncating the value of symbol " << d[
"sym"].value <<
" from " 559 <<
" because it corresponds to the size of a too-large section." 565 loc.set_file(linker_script);
566 loc.set_comment(
"linker script-defined symbol: char *"+
567 d[
"sym"].value+
"="+
"(char *)"+
id2string(symbol_value)+
"u;");
580 linker_values[
irep_idt(d[
"sym"].value)]=std::make_pair(lhs, rhs_tc);
587 initialize_instructions.push_front(assign_i);
594 for(
const auto &d :
data[
"regions"].array)
605 loc.set_file(linker_script);
606 loc.set_comment(
"linker script-defined region:\n"+d[
"commt"].value+
":\n"+
608 f.add_source_location()=
loc;
612 initialize_instructions.push_front(i);
623 symbol_table.
add(sym);
626 for(
const auto &d :
data[
"addresses"].array)
629 loc.set_file(linker_script);
630 loc.set_comment(
"linker script-defined symbol: char *"+
631 d[
"sym"].value+
"="+
"(char *)"+d[
"val"].value+
"u;");
643 linker_values[
irep_idt(d[
"sym"].value)]=std::make_pair(lhs, rhs_tc);
646 assign.add_source_location()=
loc;
649 initialize_instructions.push_front(assign_i);
656 std::list<irep_idt> &linker_defined_symbols,
658 const std::string &out_file,
659 const std::string &def_out_file)
661 for(
auto const &pair : symbol_table.
symbols)
664 pair.second.is_extern && pair.second.value.is_nil() &&
667 linker_defined_symbols.push_back(pair.second.name);
671 std::ostringstream linker_def_str;
673 linker_defined_symbols.begin(),
674 linker_defined_symbols.end(),
675 std::ostream_iterator<irep_idt>(linker_def_str,
"\n"));
676 debug() <<
"Linker-defined symbols: [" << linker_def_str.str() <<
"]\n" 680 std::ofstream linker_def_file(linker_def_infile());
681 linker_def_file << linker_def_str.str();
682 linker_def_file.close();
684 std::vector<std::string> argv=
688 "--object", out_file,
689 "--sym-file", linker_def_infile(),
690 "--out-file", def_out_file
694 argv.push_back(
"--very-verbose");
696 argv.push_back(
"--verbose");
699 for(std::size_t i=0; i<argv.size(); i++)
700 debug() <<
" " << argv[i];
703 int rc =
run(argv[0], argv, linker_def_infile(), def_out_file,
"");
705 warning() <<
"Problem parsing linker script" <<
eom;
711 const std::list<irep_idt> &linker_defined_symbols,
715 for(
const auto &sym : linker_defined_symbols)
716 if(linker_values.find(sym)==linker_values.end())
719 error() <<
"Variable '" << sym <<
"' was declared extern but never given " 720 <<
"a value, even in a linker script" <<
eom;
723 for(
const auto &pair : linker_values)
725 auto it=std::find(linker_defined_symbols.begin(),
726 linker_defined_symbols.end(), pair.first);
727 if(it==linker_defined_symbols.end())
730 error() <<
"Linker script-defined symbol '" << pair.first <<
"' was " 731 <<
"either defined in the C source code, not declared extern in " 732 <<
"the C source code, or does not appear in the C source code" 742 !(
data.is_object() &&
data.object.find(
"regions") !=
data.object.end() &&
743 data.object.find(
"addresses") !=
data.object.end() &&
744 data[
"regions"].is_array() &&
data[
"addresses"].is_array() &&
746 data[
"addresses"].array.begin(),
747 data[
"addresses"].array.end(),
749 return j.is_object() && j.object.find(
"val") != j.object.end() &&
750 j.object.find(
"sym") != j.object.end() &&
751 j[
"val"].is_number() && j[
"sym"].is_string();
754 data[
"regions"].array.begin(),
755 data[
"regions"].array.end(),
757 return j.is_object() && j.object.find(
"start") != j.object.end() &&
758 j.object.find(
"size") != j.object.end() &&
759 j.object.find(
"annot") != j.object.end() &&
760 j.object.find(
"commt") != j.object.end() &&
761 j.object.find(
"start-symbol") != j.object.end() &&
762 j.object.find(
"has-end-symbol") != j.object.end() &&
763 j.object.find(
"section") != j.object.end() &&
764 j[
"start"].is_number() && j[
"size"].is_number() &&
765 j[
"annot"].is_string() && j[
"start-symbol"].is_string() &&
766 j[
"section"].is_string() && j[
"commt"].is_string() &&
767 ((j[
"has-end-symbol"].is_true() &&
768 j.object.find(
"end-symbol") != j.object.end() &&
769 j[
"end-symbol"].is_string()) ||
771 j.object.find(
"size-symbol") != j.object.end() &&
772 j.object.find(
"end-symbol") == j.object.end() &&
773 j[
"size-symbol"].is_string()));
irep_idt name
The unique identifier.
const std::string & id2string(const irep_idt &d)
pointer_typet pointer_type(const typet &subtype)
const mp_integer string2integer(const std::string &n, unsigned base)
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
void symbols_to_pointerize(const linker_valuest &linker_values, const exprt &expr, std::list< symbol_exprt > &to_pointerize) const
fill to_pointerize with names of linker symbols appearing in expr
unsignedbv_typet unsigned_int_type()
void set_comment(const irep_idt &comment)
const irep_idt & get_identifier() const
linker_script_merget(compilet &, const std::string &elf_binary, const std::string &goto_binary, const cmdlinet &, message_handlert &)
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
exprt value
Initial value of symbol.
std::string get_value(char option) const
function_mapt function_map
irep_idt pretty_name
Language-specific display name.
typet & type()
Return the type of the expression.
unsignedbv_typet size_type()
symbol_tablet symbol_table
Symbol table.
Magic numbers used throughout the codebase.
A constant literal expression.
#define CHECK_RETURN(CONDITION)
int pointerize_linker_defined_symbols(goto_modelt &, const linker_valuest &)
convert the type of linker script-defined symbols to char*
std::map< irep_idt, std::pair< symbol_exprt, exprt > > linker_valuest
void make_function_call(const codet &_code)
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
mstreamt & warning() const
This class represents an instruction in the GOTO intermediate representation.
virtual symbolt * get_writeable(const irep_idt &name) override
Find a symbol in the symbol table for read-write access.
std::list< instructiont > instructionst
int pointerize_subexprs_of(exprt &expr, std::list< symbol_exprt > &to_pointerize, const linker_valuest &linker_values, const namespacet &ns)
Expression Initialization.
bool read_goto_binary(const std::string &filename, goto_modelt &dest, message_handlert &message_handler)
Read a goto binary from a file, but do not update config.
const irep_idt & id() const
void set_value(const irep_idt &value)
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
bool is_false(const literalt &l)
virtual bool isset(char option) const
bool write_object_file(const std::string &, const goto_modelt &)
writes the goto functions in the function table to a binary format object file.
const std::string & elf_binary
source_locationt source_location
void set_file(const irep_idt &file)
#define INITIALIZE_FUNCTION
instructionst instructions
The list of instructions in the goto program.
Patterns of expressions that should be replaced.
std::list< replacement_predicatet > replacement_predicates
The "shapes" of expressions to be replaced by a pointer.
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
int run(const std::string &what, const std::vector< std::string > &argv)
codet representation of a function call statement.
The plus expression Associativity is not specified.
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Class that provides messages with a built-in verbosity 'level'.
dstringt has one field, an unsigned integer no which is an index into a static table of strings...
Operator to return the address of an object.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
int linker_data_is_malformed(const jsont &data) const
Validate output of the scripts/ls_parse.py tool.
A generic container class for the GOTO intermediate representation of one function.
int replace_expr(exprt &old_expr, const linker_valuest &linker_values, const symbol_exprt &old_symbol, const irep_idt &ident, const std::string &shape)
do the actual replacement of an expr with a new pointer expr
unsigned safe_string2unsigned(const std::string &str, int base)
typet type
Type of symbol.
source_locationt location
Source code location of definition of symbol.
message_handlert & get_message_handler()
mstreamt & result() const
source_locationt source_location
The location of the instruction in the source file.
Base class for all expressions.
int add_linker_script_definitions()
Add values of linkerscript-defined symbols to the goto-binary.
const source_locationt & source_location() const
int goto_and_object_mismatch(const std::list< irep_idt > &linker_defined_symbols, const linker_valuest &linker_values)
one-to-one correspondence between extern & linker symbols
source_locationt & add_source_location()
#define Forall_goto_program_instructions(it, program)
Expression to hold a symbol (variable)
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
Merge linker script-defined symbols into a goto-program.
int ls_data2instructions(jsont &data, const std::string &linker_script, goto_programt &gp, symbol_tablet &symbol_table, linker_valuest &linker_values)
Write a list of definitions derived from data into gp's instructions member.
const std::size_t MAX_FLATTENED_ARRAY_SIZE
const std::string & goto_binary
void make_typecast(const typet &_type)
Create a typecast_exprt to the given type.
const irept & find(const irep_namet &name) const
goto_functionst goto_functions
GOTO functions.
bitvector_typet char_type()
A codet representing an assignment in the program.
int get_linker_script_data(std::list< irep_idt > &linker_defined_symbols, const symbol_tablet &symbol_table, const std::string &out_file, const std::string &def_out_file)
Write linker script definitions to linker_data.
std::string array_name(const namespacet &ns, const exprt &expr)