cprover
taint_analysis.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Taint Analysis
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #include "taint_analysis.h"
13 
14 #include <iostream>
15 #include <fstream>
16 
17 #include <util/invariant.h>
18 #include <util/json.h>
19 #include <util/prefix.h>
20 #include <util/simplify_expr.h>
21 #include <util/string_constant.h>
22 
24 
26 
27 #include "taint_parser.h"
28 
30 {
31 public:
33  {
34  }
35 
36  bool operator()(
37  const std::string &taint_file_name,
38  const symbol_tablet &,
40  bool show_full,
41  const std::string &json_file_name);
42 
43 protected:
46 
47  void instrument(const namespacet &, goto_functionst &);
49 };
50 
52  const namespacet &ns,
53  goto_functionst &goto_functions)
54 {
55  for(auto &function : goto_functions.function_map)
56  instrument(ns, function.second);
57 }
58 
60  const namespacet &ns,
61  goto_functionst::goto_functiont &goto_function)
62 {
63  for(goto_programt::instructionst::iterator
64  it=goto_function.body.instructions.begin();
65  it!=goto_function.body.instructions.end();
66  it++)
67  {
68  const goto_programt::instructiont &instruction=*it;
69 
70  goto_programt insert_before, insert_after;
71 
72  switch(instruction.type)
73  {
74  case FUNCTION_CALL:
75  {
76  const code_function_callt &function_call=
77  to_code_function_call(instruction.code);
78  const exprt &function=function_call.function();
79 
80  if(function.id()==ID_symbol)
81  {
82  const irep_idt &identifier=
83  to_symbol_expr(function).get_identifier();
84 
85  std::set<irep_idt> identifiers;
86 
87  identifiers.insert(identifier);
88 
89  irep_idt class_id=function.get(ID_C_class);
90  if(class_id.empty())
91  {
92  }
93  else
94  {
95  std::string suffix=
96  std::string(
97  id2string(identifier), class_id.size(), std::string::npos);
98 
99  class_hierarchyt::idst parents=
101  for(const auto &p : parents)
102  identifiers.insert(id2string(p)+suffix);
103  }
104 
105  for(const auto &rule : taint.rules)
106  {
107  bool match=false;
108  for(const auto &i : identifiers)
109  if(i==rule.function_identifier ||
110  has_prefix(
111  id2string(i),
112  "java::"+id2string(rule.function_identifier)+":"))
113  {
114  match=true;
115  break;
116  }
117 
118  if(match)
119  {
120  debug() << "MATCH " << rule.id << " on " << identifier << eom;
121 
122  exprt where=nil_exprt();
123 
124  const code_typet &code_type=to_code_type(function.type());
125 
126  bool have_this=
127  !code_type.parameters().empty() &&
128  code_type.parameters().front().get_bool(ID_C_this);
129 
130  switch(rule.where)
131  {
133  {
134  const symbolt &return_value_symbol=
135  ns.lookup(id2string(identifier)+"#return_value");
136  where=return_value_symbol.symbol_expr();
137  }
138  break;
139 
141  {
142  unsigned nr=
143  have_this?rule.parameter_number:rule.parameter_number-1;
144  if(function_call.arguments().size()>nr)
145  where=function_call.arguments()[nr];
146  }
147  break;
148 
150  if(have_this)
151  {
153  !function_call.arguments().empty(),
154  "`this` implies at least one argument in function call");
155  where=function_call.arguments()[0];
156  }
157  break;
158  }
159 
160  switch(rule.kind)
161  {
163  {
164  codet code_set_may("set_may");
165  code_set_may.operands().resize(2);
166  code_set_may.op0()=where;
167  code_set_may.op1()=
168  address_of_exprt(string_constantt(rule.taint));
169  goto_programt::targett t=insert_after.add_instruction();
170  t->make_other(code_set_may);
171  t->source_location=instruction.source_location;
172  }
173  break;
174 
176  {
177  goto_programt::targett t=insert_before.add_instruction();
178  binary_predicate_exprt get_may("get_may");
179  get_may.op0()=where;
180  get_may.op1()=address_of_exprt(string_constantt(rule.taint));
181  t->make_assertion(not_exprt(get_may));
182  t->source_location=instruction.source_location;
183  t->source_location.set_property_class(
184  "taint rule "+id2string(rule.id));
185  t->source_location.set_comment(rule.message);
186  }
187  break;
188 
190  {
191  codet code_clear_may("clear_may");
192  code_clear_may.operands().resize(2);
193  code_clear_may.op0()=where;
194  code_clear_may.op1()=
195  address_of_exprt(string_constantt(rule.taint));
196  goto_programt::targett t=insert_after.add_instruction();
197  t->make_other(code_clear_may);
198  t->source_location=instruction.source_location;
199  }
200  break;
201  }
202  }
203  }
204  }
205  }
206  break;
207 
208  default:
209  {
210  }
211  }
212 
213  if(!insert_before.empty())
214  {
215  goto_function.body.insert_before_swap(it, insert_before);
216  // advance until we get back to the call
217  while(!it->is_function_call()) ++it;
218  }
219 
220  if(!insert_after.empty())
221  {
222  goto_function.body.destructive_insert(
223  std::next(it), insert_after);
224  }
225  }
226 }
227 
229  const std::string &taint_file_name,
230  const symbol_tablet &symbol_table,
231  goto_functionst &goto_functions,
232  bool show_full,
233  const std::string &json_file_name)
234 {
235  try
236  {
237  json_arrayt json_result;
238  bool use_json=!json_file_name.empty();
239 
240  status() << "Reading taint file `" << taint_file_name
241  << "'" << eom;
242 
243  if(taint_parser(taint_file_name, taint, get_message_handler()))
244  {
245  error() << "Failed to read taint definition file" << eom;
246  return true;
247  }
248 
249  status() << "Got " << taint.rules.size()
250  << " taint definitions" << eom;
251 
252  taint.output(debug());
253  debug() << eom;
254 
255  status() << "Instrumenting taint" << eom;
256 
257  class_hierarchy(symbol_table);
258 
259  const namespacet ns(symbol_table);
260  instrument(ns, goto_functions);
261  goto_functions.update();
262 
263  bool have_entry_point=
264  goto_functions.function_map.find(goto_functionst::entry_point())!=
265  goto_functions.function_map.end();
266 
267  // do we have an entry point?
268  if(have_entry_point)
269  {
270  status() << "Working from entry point" << eom;
271  }
272  else
273  {
274  status() << "No entry point found; "
275  << "we will consider the heads of all functions as reachable"
276  << eom;
277 
278  goto_programt end, gotos, calls;
279 
281 
282  forall_goto_functions(f_it, goto_functions)
283  if(f_it->second.body_available() &&
284  f_it->first!=goto_functionst::entry_point())
285  {
287  const code_function_callt call(ns.lookup(f_it->first).symbol_expr());
288  t->make_function_call(call);
289  calls.add_instruction()->make_goto(end.instructions.begin());
291  g->make_goto(t, side_effect_expr_nondett(bool_typet()));
292  }
293 
295  goto_functions.function_map[goto_functionst::entry_point()];
296 
297  goto_programt &body=entry.body;
298 
299  body.destructive_append(gotos);
300  body.destructive_append(calls);
301  body.destructive_append(end);
302 
303  goto_functions.update();
304  }
305 
306  status() << "Data-flow analysis" << eom;
307 
308  custom_bitvector_analysist custom_bitvector_analysis;
309  custom_bitvector_analysis(goto_functions, ns);
310 
311  if(show_full)
312  {
313  custom_bitvector_analysis.output(ns, goto_functions, std::cout);
314  return false;
315  }
316 
317  forall_goto_functions(f_it, goto_functions)
318  {
319  if(!f_it->second.body.has_assertion())
320  continue;
321 
322  const symbolt &symbol=ns.lookup(f_it->first);
323 
324  if(f_it->first=="__actual_thread_spawn")
325  continue;
326 
327  bool first=true;
328 
329  forall_goto_program_instructions(i_it, f_it->second.body)
330  {
331  if(!i_it->is_assert())
332  continue;
334  continue;
335 
336  if(custom_bitvector_analysis[i_it].has_values.is_false())
337  continue;
338 
339  exprt result=custom_bitvector_analysis.eval(i_it->guard, i_it);
340  exprt result2=simplify_expr(result, ns);
341 
342  if(result2.is_true())
343  continue;
344 
345  if(first)
346  {
347  first=false;
348  if(!use_json)
349  std::cout << "\n"
350  << "******** Function " << symbol.display_name() << '\n';
351  }
352 
353  if(use_json)
354  {
356  json["bugClass"] =
357  json_stringt(i_it->source_location.get_property_class());
358  json["file"] = json_stringt(i_it->source_location.get_file());
359  json["line"]=
360  json_numbert(id2string(i_it->source_location.get_line()));
361  json_result.array.push_back(json);
362  }
363  else
364  {
365  std::cout << i_it->source_location;
366  if(!i_it->source_location.get_comment().empty())
367  std::cout << ": " << i_it->source_location.get_comment();
368 
369  if(!i_it->source_location.get_property_class().empty())
370  std::cout << " ("
371  << i_it->source_location.get_property_class() << ")";
372 
373  std::cout << '\n';
374  }
375  }
376  }
377 
378  if(use_json)
379  {
380  std::ofstream json_out(json_file_name);
381 
382  if(!json_out)
383  {
384  error() << "Failed to open json output `"
385  << json_file_name << "'" << eom;
386  return true;
387  }
388 
389  status() << "Analysis result is written to `"
390  << json_file_name << "'" << eom;
391 
392  json_out << json_result << '\n';
393  }
394 
395  return false;
396  }
397  catch(const char *error_msg)
398  {
399  error() << error_msg << eom;
400  return true;
401  }
402  catch(const std::string &error_msg)
403  {
404  error() << error_msg << eom;
405  return true;
406  }
407  catch(...)
408  {
409  error() << "Caught unexpected error in taint_analysist::operator()" << eom;
410  return true;
411  }
412 }
413 
415  goto_modelt &goto_model,
416  const std::string &taint_file_name,
417  message_handlert &message_handler,
418  bool show_full,
419  const std::string &json_file_name)
420 {
422  taint_analysis.set_message_handler(message_handler);
423  return taint_analysis(
424  taint_file_name,
425  goto_model.symbol_table,
426  goto_model.goto_functions,
427  show_full,
428  json_file_name);
429 }
Boolean negation.
Definition: std_expr.h:3308
Field-insensitive, location-sensitive bitvector analysis.
Base type of functions.
Definition: std_types.h:751
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
exprt & op0()
Definition: expr.h:84
void set_property_class(const irep_idt &property_class)
Non-graph-based representation of the class hierarchy.
exprt simplify_expr(const exprt &src, const namespacet &ns)
goto_program_instruction_typet type
What kind of instruction?
Definition: goto_program.h:190
void output(std::ostream &) const
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition: std_types.h:982
const irep_idt & get_identifier() const
Definition: std_expr.h:176
void destructive_append(goto_programt &p)
Appends the given program p to *this. p is destroyed.
Definition: goto_program.h:524
Taint Parser.
bool is_true() const
Return whether the expression is a constant representing true.
Definition: expr.cpp:90
function_mapt function_map
The Boolean type.
Definition: std_types.h:28
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:29
Symbol table entry.
Definition: symbol.h:27
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:178
Class Hierarchy.
virtual void output(const namespacet &ns, const goto_functionst &goto_functions, std::ostream &out) const
Output the abstract states for a whole program.
Definition: ai.cpp:24
bool taint_analysis(goto_modelt &goto_model, const std::string &taint_file_name, message_handlert &message_handler, bool show_full, const std::string &json_file_name)
class_hierarchyt class_hierarchy
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
argumentst & arguments()
Definition: std_code.h:1109
void instrument(const namespacet &, goto_functionst &)
instructionst::iterator targett
Definition: goto_program.h:414
The NIL expression.
Definition: std_expr.h:4461
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:420
static bool has_get_must_or_may(const exprt &)
exprt & op1()
Definition: expr.h:87
The symbol table.
Definition: symbol_table.h:19
mstreamt & error() const
Definition: message.h:386
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:93
::goto_functiont goto_functiont
A side_effect_exprt that returns a non-deterministically chosen value.
Definition: std_code.h:1633
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
codet representation of a function call statement.
Definition: std_code.h:1036
A collection of goto functions.
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:251
size_t size() const
Definition: dstring.h:91
Class that provides messages with a built-in verbosity &#39;level&#39;.
Definition: message.h:144
dstringt has one field, an unsigned integer no which is an index into a static table of strings...
Definition: dstring.h:35
Operator to return the address of an object.
Definition: std_expr.h:3255
std::vector< irep_idt > idst
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:72
const irep_idt & display_name() const
Return language specific display name if present.
Definition: symbol.h:55
static eomt eom
Definition: message.h:284
bool taint_parser(const std::string &file_name, taint_parse_treet &dest, message_handlert &message_handler)
message_handlert & get_message_handler()
Definition: message.h:174
static irep_idt entry_point()
Get the identifier of the entry point to a goto model.
taint_parse_treet taint
mstreamt & result() const
Definition: message.h:396
mstreamt & status() const
Definition: message.h:401
exprt & function()
Definition: std_code.h:1099
source_locationt source_location
The location of the instruction in the source file.
Definition: goto_program.h:187
targett add_instruction()
Adds an instruction at the end.
Definition: goto_program.h:541
Base class for all expressions.
Definition: expr.h:54
const parameterst & parameters() const
Definition: std_types.h:893
bool empty() const
Is the program empty?
Definition: goto_program.h:626
bool operator()(const std::string &taint_file_name, const symbol_tablet &, goto_functionst &, bool show_full, const std::string &json_file_name)
exprt eval(const exprt &src, locationt loc)
Taint Analysis.
mstreamt & debug() const
Definition: message.h:416
Data structure for representing an arbitrary statement in a program.
Definition: std_code.h:34
A base class for expressions that are predicates, i.e., Boolean-typed, and that take exactly two argu...
Definition: std_expr.h:835
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions, goto_programs, exprts, etc.
Definition: invariant.h:485
arrayt array
Definition: json.h:129
#define forall_goto_functions(it, functions)
operandst & operands()
Definition: expr.h:78
#define forall_goto_program_instructions(it, program)
Definition: goto_program.h:804
bool empty() const
Definition: dstring.h:75
idst get_parents_trans(const irep_idt &id) const
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:32
json_objectt json(const source_locationt &location)
Definition: json_expr.cpp:87
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:166
const code_function_callt & to_code_function_call(const codet &code)
Definition: std_code.h:1173