cprover
propagate_const_function_pointers.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Constant Function Pointer Propagation
4 
5 Author: Vincent Nimal
6 
7 \*******************************************************************/
8 
11 
13 
14 #include <util/std_code.h>
15 #include <util/std_expr.h>
16 #include <util/std_types.h>
17 #include <util/irep.h>
18 #include <util/namespace.h>
19 #include <util/message.h>
20 
23 
24 #include <map>
25 #include <list>
26 #include <cassert>
27 
29 {
30 protected:
33  const namespacet ns;
35 
36  std::unordered_map<irep_idt, unsigned, irep_id_hash> map_unique;
37 
38  /* maps const pointer to function (expression + arguments at call) */
39  std::unordered_map<irep_idt, symbol_exprt, irep_id_hash> pointer_to_fun;
40 
41  /* maps const pointer to where it was defined in the call function stack */
42  std::unordered_map<irep_idt, unsigned, irep_id_hash> pointer_to_stack;
43 
44  /* where a const function to inline was the first time invoked */
45  std::unordered_map<irep_idt, unsigned, irep_id_hash> fun_id_to_invok;
46 
47  /* stack of callsites: provides functions and location in the goto-program */
49 
50  bool resolve(
51  const irep_idt &symb,
52  symbol_exprt &goto_function,
53  unsigned &stack_scope)
54  {
55  message.debug() << "I resolve " << symb << " with "
56  << pointer_to_fun[symb].get_identifier() << messaget::eom;
57  if(pointer_to_fun.find(symb)==pointer_to_fun.end())
58  return false;
59  else
60  {
61  goto_function=pointer_to_fun[symb];
62  stack_scope=pointer_to_stack[symb];
63  return true;
64  }
65  }
66 
67  /* TODO: use the whole name rather than the base one, to avoid
68  overwriting?
69  Yes! Pure propagation of pointers only intra-proc. At call, arguments
70  and pointed functions stored in the table -- no need to keep the match
71  between two pointers inter-proc. */
72  bool add(const irep_idt &symb, const symbol_exprt &goto_function)
73  {
74  return add(symb, goto_function, callsite_stack.size());
75  }
76 
77  bool add(const irep_idt &symb,
78  const symbol_exprt &goto_function,
79  unsigned scope)
80  {
81  pointer_to_fun[symb]=goto_function;
82  pointer_to_stack[symb]=scope;
83 
84  const symbolt &function_symb=ns.lookup(goto_function.get_identifier());
85  if(fun_id_to_invok.find(function_symb.base_name)==fun_id_to_invok.end())
86  fun_id_to_invok[function_symb.base_name]=scope;
87 
88  return true;
89  }
90 
91  bool remove(const irep_idt &symb)
92  {
93  assert(pointer_to_fun.find(symb)!=pointer_to_fun.end());
94  pointer_to_fun.erase(symb);
95  return true;
96  }
97 
98  bool has(const irep_idt &symb) const
99  {
100  return pointer_to_fun.find(symb)!=pointer_to_fun.end();
101  }
102 
103  symbol_exprt get(const irep_idt &symb)
104  {
105  return pointer_to_fun[symb];
106  }
107 
108  /* to keep track of the visited functions and avoid recursion */
109  std::set<irep_idt> functions_met;
110 
111  void propagate(const irep_idt &function);
112 
113  void dup_caller_and_inline_callee(const symbol_exprt &function,
114  unsigned stack_scope);
115 
116  /* to keep track of the constant function pointers passed as arguments */
117  class arg_stackt: public std::set<irep_idt>
118  {
119  protected:
121 
122  public:
124  cfpp(_cfpp)
125  {}
126  void add_args(const symbol_exprt &const_function,
127  goto_programt::instructionst::iterator it);
128  void remove_args();
129  };
130 
131 public:
133  goto_functionst &_goto_functions, messaget &_message)
134  :symbol_table(_symbol_table), goto_functions(_goto_functions),
135  ns(_symbol_table), message(_message)
136  {}
137 
138  /* Note that it only propagates from MAIN, following the CFG, without
139  resolving non-constant function pointers. */
140  void propagate()
141  {
143  }
144 };
145 
151  const symbol_exprt &const_function,
152  unsigned stack_scope)
153 {
154  assert(!callsite_stack.empty());
155 
156  /* for the reconstruction of the {call,callsite}_stacks after the
157  duplication */
158  goto_programt::const_targetst new_callsite_stack;
159 
160  goto_programt::const_targetst::const_iterator
161  callsite=callsite_stack.begin();
162 
163  std::string last_suffix="";
164 
165  for(unsigned current_call=callsite_stack.size();
166  current_call>=stack_scope;
167  --current_call)
168  {
169  message.debug() << "current_call=" << current_call << " stack_scope="
170  << stack_scope << " callsite_stack.size()=" << callsite_stack.size()
171  << messaget::eom;
172  assert(callsite!=callsite_stack.end());
173 
174  message.debug() << callsite_stack.size() << messaget::eom;
175 
176  const irep_idt function_id=(*callsite)->function;
177 
179  .function_map[function_id];
180 
181  std::string suffix="$";
182 
183  if(current_call>stack_scope)
184  {
185  /* unique suffix */
186  if(map_unique.find(function_id)!=map_unique.end())
187  {
188  suffix+=std::to_string(map_unique[function_id]);
189  ++map_unique[function_id];
190  }
191  else
192  {
193  map_unique[function_id]=0;
194  suffix+=std::to_string(map_unique[function_id]);
195  ++map_unique[function_id];
196  }
197 
198  /* creates a new, unique copy of the function */
199  message.debug() << "duplicate function: " << function_id
200  << messaget::eom;
201  const irep_idt function_new_id=id2string(function_id)+suffix;
202 
203  goto_functionst::goto_functiont &function_dup=
204  goto_functions.function_map[function_new_id];
205  function_dup.copy_from(goto_functions.function_map[function_id]);
206  pfunction_dup=&function_dup;
207 
208  for(goto_programt::targett it=function_dup.body.instructions.begin();
209  it!=function_dup.body.instructions.end(); ++it)
210  it->function=function_new_id;
211 
212  assert(goto_functions.function_map[function_new_id].body.empty());
213 
214  /* removes in definition the argument leading to the const_function */
215  code_typet::parameterst &args=function_dup.type.parameters();
216  for(code_typet::parameterst::iterator it=args.begin();
217  it!=args.end(); ++it)
218  {
219  if(pointer_to_fun.find(it->get_identifier())!=pointer_to_fun.end()
220  && pointer_to_fun[it->get_identifier()]==const_function)
221  args.erase(it);
222  }
223 
224  /* updates the table of symbols */
225  const symbolt &fun_symb=ns.lookup(function_id);
226  symbolt dup_fun_symb;
227  dup_fun_symb=fun_symb;
228  dup_fun_symb.name=id2string(dup_fun_symb.name)+suffix;
229  dup_fun_symb.base_name=id2string(dup_fun_symb.base_name)+suffix;
230  dup_fun_symb.pretty_name=id2string(dup_fun_symb.pretty_name)+suffix;
231  symbol_table.add(dup_fun_symb);
232 
234  message.debug() << "new function " << function_new_id << " created."
235  << messaget::eom;
236  message.debug() << "new function "
237  << goto_functions.function_map[function_new_id].body.instructions
238  .front().function << " created." << messaget::eom;
239  }
240 
241  if(current_call<callsite_stack.size())
242  {
243  message.debug() << "we then modify the previous callers"
244  << messaget::eom;
245 
246  /* sets the call to the previous newly duplicated function */
247  goto_programt::instructionst &new_instructions=
248  pfunction_dup->body.instructions;
249  goto_programt::targett it=new_instructions.begin();
250  // beurk -- should use location_number or something unique per
251  // instruction but shared between two copies of a same goto-program
252  for( ;
253  it->source_location!=(*callsite)->source_location &&
254  it!=new_instructions.end();
255  ++it)
256  {
257  }
258 
259  assert(it->source_location==(*callsite)->source_location);
260  exprt &function_called=to_code_function_call(it->code).function();
261  assert(function_called.id()==ID_symbol);
262  symbol_exprt &symbol_fun_called=to_symbol_expr(function_called);
263  symbol_fun_called.set_identifier(
264  id2string(symbol_fun_called.get_identifier())+last_suffix);
265 
266  /* removes the constant pointer from the arguments passed at call */
268  .arguments();
269  for(code_function_callt::argumentst::iterator arg_it=args.begin();
270  arg_it!=args.end(); ++arg_it)
271  {
272  if(arg_it->id()==ID_symbol)
273  {
274  const symbol_exprt &symb_arg=to_symbol_expr(*arg_it);
275  if(symb_arg.get_identifier()==const_function.get_identifier()
276  || (pointer_to_fun.find(symb_arg.get_identifier())
277  !=pointer_to_fun.end()
278  && pointer_to_fun[symb_arg.get_identifier()]==const_function) )
279  args.erase(arg_it);
280  }
281  else if(arg_it->id()==ID_address_of)
282  {
283  const address_of_exprt &add_arg=to_address_of_expr(*arg_it);
284  if(add_arg.object().id()==ID_symbol
285  && to_symbol_expr(add_arg.object()).get_identifier()
286  ==const_function.get_identifier())
287  args.erase(arg_it);
288  }
289  }
290 
291  new_callsite_stack.push_back(it);
292  }
293  else
294  {
295  message.debug() << "we first modify the first caller" << messaget::eom;
296 
297  /* initially, inlines the callee function in the duplicate of the last
298  caller */
299  goto_programt::instructionst &new_instructions=
300  pfunction_dup->body.instructions;
301  goto_programt::targett it=new_instructions.begin();
302  // beurk -- should use location_number or something unique per
303  // instruction but shared between two copies of a same goto-program
304  for( ;
305  it->source_location!=(*callsite)->source_location &&
306  it!=new_instructions.end();
307  ++it)
308  {
309  }
310 
311  message.debug() << "callsite targeted: " << (*callsite)->source_location
312  << " function: " << const_function.get_identifier() << messaget::eom;
313 
314  assert(it->source_location==(*callsite)->source_location);
315  message.debug() << it->code.get_statement() << messaget::eom;
316  to_code_function_call(it->code).function()=const_function;
317 
318  new_callsite_stack.push_back(it);
319  }
320 
321  last_suffix=suffix;
322  ++callsite;
323  }
324 
325  /* and updates the call_stack and callsite_stack */
326  // new_callsite_stack.splice(new_callsite_stack.end(), callsite_stack,
327  // callsite, callsite_stack.end());
328  for(goto_programt::const_targetst::const_iterator it=callsite;
329  it!=callsite_stack.end(); ++it)
330  new_callsite_stack.push_back(*it);
331 
332  assert(callsite_stack.size()==new_callsite_stack.size());
333  callsite_stack.swap(new_callsite_stack);
334 }
335 
339  const symbol_exprt &const_function,
340  goto_programt::instructionst::iterator it)
341 {
342  /* if constant pointers passed as arguments, add the names of the parameters
343  in the function definition to the map */
345  to_code_function_call(it->code).arguments();
346 
347  /* retrieve the corresponding parameters expressions in the
348  function definition */
349  assert(cfpp.goto_functions.function_map.find(
350  const_function.get_identifier())!=cfpp.goto_functions.function_map.end());
351 
352  goto_functionst::goto_functiont &cor_function=
354 
355  code_typet::parameterst::const_iterator cor_arg_it=
356  cor_function.type.parameters().begin();
357 
358  for(code_function_callt::argumentst::const_iterator arg_it=arg.begin();
359  arg_it!=arg.end();
360  ++arg_it, ++cor_arg_it)
361  {
362  assert(cor_arg_it!=cor_function.type.parameters().end());
363 
364  if(arg_it->id()!=ID_symbol && arg_it->id()!=ID_address_of)
365  continue;
366 
367  if(arg_it->id()==ID_address_of)
368  {
369  if(to_address_of_expr(*arg_it).object().id()!=ID_symbol)
370  continue;
371 
372  const exprt &arg_expr=to_address_of_expr(*arg_it).object();
373  assert(arg_expr.id()==ID_symbol);
374  const symbol_exprt &arg_symbol_expr=to_symbol_expr(arg_expr);
375 
376  // const symbolt &arg_symbol=
377  // cfpp.symbol_table.lookup(arg_symbol_expr.get_identifier());
378 
379  // debug
380  for(std::unordered_map<irep_idt, unsigned, irep_id_hash>::const_iterator
381  it=cfpp.fun_id_to_invok.begin();
382  it!=cfpp.fun_id_to_invok.end();
383  ++it)
384  cfpp.message.debug() << it->first << ":" << it->second << " ";
386 
387  cfpp.add(cor_arg_it->get_base_name(), arg_symbol_expr);
388  insert(cor_arg_it->get_base_name());
389  cfpp.message.debug() << "SET: insert " << cor_arg_it->get_base_name()
390  << messaget::eom;
391  }
392  else
393  {
394  cfpp.message.debug() << "fun: " << const_function.get_identifier()
395  << " - arg: (symb) " << cfpp.symbol_table
396  .lookup(to_symbol_expr(*arg_it).get_identifier()).base_name
397  << messaget::eom;
398 
399  const symbol_exprt &arg_symbol_expr=to_symbol_expr(*arg_it);
400  const symbolt &arg_symbol=
401  cfpp.symbol_table.lookup(arg_symbol_expr.get_identifier());
402 
403  if(cfpp.has(arg_symbol.base_name))
404  {
405  cfpp.add(cor_arg_it->get_base_name(), cfpp.get(arg_symbol.base_name),
406  cfpp.fun_id_to_invok[arg_symbol.base_name]);
407  insert(cor_arg_it->get_base_name());
408  cfpp.message.debug() << "SET: insert " << cor_arg_it->get_base_name()
409  << messaget::eom;
410  }
411  }
412  }
413 }
414 
416 {
417  /* remove the parameter names */
418  for(const_iterator arg_it=begin(); arg_it!=end(); ++arg_it)
419  {
420  cfpp.remove(*arg_it);
421  cfpp.message.debug() << "SET: remove " << *arg_it << messaget::eom;
422  }
423 }
424 
426  const irep_idt &function_id)
427 {
428  if(goto_functions.function_map.find(function_id)==
430  return;
431 
433  goto_functions.function_map[function_id];
434 
435  if(functions_met.find(function_id)!=functions_met.end())
436  return;
437 
438  functions_met.insert(function_id);
439 
440  Forall_goto_program_instructions(it, function.body)
441  {
442  if(it->is_assign())
443  {
444  /* is it an assignment of function pointer? */
445  const code_assignt &assign=to_code_assign(it->code);
446  const exprt &lhs=assign.lhs();
447  const exprt &rhs=assign.rhs();
448 
449  /* rhs has to be an address to a function */
450  if(rhs.id()!=ID_address_of)
451  continue;
452  const address_of_exprt &addr_rhs=to_address_of_expr(rhs);
453 
454  if(addr_rhs.object().id()!=ID_symbol
455  || addr_rhs.object().type().id()!=ID_code)
456  continue;
457  const symbol_exprt &symbol_rhs=to_symbol_expr(addr_rhs.object());
458 
459  /* lhs must be a pointer */
460  if(lhs.id()!=ID_symbol || lhs.type().id()!=ID_pointer)
461  continue;
462  const symbol_exprt &symbol_expr_lhs=to_symbol_expr(lhs);
463  const symbolt &symbol_lhs=
464  symbol_table.lookup(symbol_expr_lhs.get_identifier());
465 
466  add(symbol_lhs.base_name, symbol_rhs);
467  }
468  else if(it->is_function_call())
469  {
470  callsite_stack.push_front(it);
471 
472  const exprt &fun=to_code_function_call(it->code).function();
473 
474  /* if it is a function pointer */
475  if(fun.id()==ID_dereference)
476  {
477  const exprt &fun_pointer=to_dereference_expr(fun).pointer();
478  if(fun_pointer.id()!=ID_symbol)
479  {
480  callsite_stack.pop_front();
481  continue;
482  }
483 
484  const symbol_exprt &fun_symbol_expr=to_symbol_expr(fun_pointer);
485  const symbolt &fun_symbol=
486  symbol_table.lookup(fun_symbol_expr.get_identifier());
487  symbol_exprt const_function;
488  unsigned stack_scope=0;
489 
490  /* is it a constant pointer? */
491  if(resolve(fun_symbol.base_name, const_function, stack_scope))
492  {
493  /* saves the current context (stack of --unduplicated-- callsites) */
495 
496  /* it is. Inline it and explore it. */
497  dup_caller_and_inline_callee(const_function, stack_scope);
498  message.debug() << "I substitute " << const_function.get_identifier()
499  << messaget::eom;
500 
501  arg_stackt arg_stack(*this);
502  arg_stack.add_args(const_function, it);
503 
504  propagate(const_function.get_identifier());
505 
506  arg_stack.remove_args();
507 
508  /* restores the context */
509  callsite_stack.swap(context);
510  }
511  else
512  {
513  /* no. Ignore it and leave it to the remove_function_pointers */
514  }
515  }
516  else if(fun.id()==ID_symbol)
517  {
518  message.debug() << "Propagates through " << to_symbol_expr(fun)
520 
521  /* just propagate */
522  const symbol_exprt &fun_symbol_expr=to_symbol_expr(fun);
523  const irep_idt &fun_id=fun_symbol_expr.get_identifier();
524 
525  arg_stackt arg_stack(*this);
526  arg_stack.add_args(fun_symbol_expr, it);
527 
528  propagate(fun_id);
529 
530  arg_stack.remove_args();
531  }
532 
533  callsite_stack.pop_front();
534  }
535  else if(it->is_end_function())
536  {
537  functions_met.erase(function_id);
538  return;
539  }
540  }
541 
542  functions_met.erase(function_id);
543 }
544 
548  message_handlert &message_handler)
549 {
550  messaget message(message_handler);
551  const_function_pointer_propagationt propagation(symbol_table,
552  goto_functions, message);
553  propagation.propagate();
554  goto_functions.update();
555 }
irep_idt name
The unique identifier.
Definition: symbol.h:46
virtual bool lookup(const irep_idt &name, const symbolt *&symbol) const
Definition: namespace.cpp:139
symbolt & lookup(const irep_idt &identifier)
Find a symbol in the symbol table.
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
exprt & object()
Definition: std_expr.h:2608
std::unordered_map< irep_idt, unsigned, irep_id_hash > fun_id_to_invok
const irep_idt & get_identifier() const
Definition: std_expr.h:120
Goto Programs with Functions.
bool resolve(const irep_idt &symb, symbol_exprt &goto_function, unsigned &stack_scope)
std::vector< parametert > parameterst
Definition: std_types.h:829
std::unordered_map< irep_idt, unsigned, irep_id_hash > map_unique
void add_args(const symbol_exprt &const_function, goto_programt::instructionst::iterator it)
adds const pointers (instantiated here or propagated) passed as arguments in the map ...
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:58
typet & type()
Definition: expr.h:60
exprt::operandst argumentst
Definition: std_code.h:687
Symbol table entry.This is a symbol in the symbol table, stored in an object of type symbol_tablet...
Definition: symbol.h:33
static mstreamt & eom(mstreamt &m)
Definition: message.h:193
const dereference_exprt & to_dereference_expr(const exprt &expr)
Cast a generic exprt to a dereference_exprt.
Definition: std_expr.h:2748
bool add(const irep_idt &symb, const symbol_exprt &goto_function, unsigned scope)
Constant Function Pointer Propagation.
const code_assignt & to_code_assign(const codet &code)
Definition: std_code.h:178
const irep_idt & id() const
Definition: irep.h:189
exprt & lhs()
Definition: std_code.h:157
void copy_from(const goto_function_templatet &other)
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast a generic exprt to an address_of_exprt.
Definition: std_expr.h:2629
argumentst & arguments()
Definition: std_code.h:689
std::unordered_map< irep_idt, unsigned, irep_id_hash > pointer_to_stack
exprt & rhs()
Definition: std_code.h:162
source_locationt source_location
Definition: message.h:175
void dup_caller_and_inline_callee(const symbol_exprt &function, unsigned stack_scope)
API to expression classes.
The symbol table.
Definition: symbol_table.h:52
TO_BE_DOCUMENTED.
Definition: namespace.h:62
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
Operator to return the address of an object.
Definition: std_expr.h:2593
const_function_pointer_propagationt(symbol_tablet &_symbol_table, goto_functionst &_goto_functions, messaget &_message)
mstreamt & debug()
Definition: message.h:253
API to type classes.
void propagate_const_function_pointers(symbol_tablet &symbol_table, goto_functionst &goto_functions, message_handlert &message_handler)
exprt & function()
Definition: std_code.h:677
Base class for all expressions.
Definition: expr.h:46
exprt & pointer()
Definition: std_expr.h:2727
const parameterst & parameters() const
Definition: std_types.h:841
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast a generic exprt to a symbol_exprt.
Definition: std_expr.h:202
irep_idt base_name
Base (non-scoped) name.
Definition: symbol.h:52
std::unordered_map< irep_idt, symbol_exprt, irep_id_hash > pointer_to_fun
#define Forall_goto_program_instructions(it, program)
Definition: goto_program.h:73
Expression to hold a symbol (variable)
Definition: std_expr.h:82
bool add(const irep_idt &symb, const symbol_exprt &goto_function)
Concrete Goto Program.
Assignment.
Definition: std_code.h:144
const code_function_callt & to_code_function_call(const codet &code)
Definition: std_code.h:700