cprover
taint_analysis.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Taint Analysis
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #include "taint_analysis.h"
13 
14 #include <iostream>
15 #include <fstream>
16 
17 #include <util/prefix.h>
18 #include <util/simplify_expr.h>
19 #include <util/json.h>
20 
21 #include <ansi-c/string_constant.h>
22 
24 
26 
27 #include "taint_parser.h"
28 
30 {
31 public:
33  {
34  }
35 
36  bool operator()(
37  const std::string &taint_file_name,
38  const symbol_tablet &,
40  bool show_full,
41  const std::string &json_file_name);
42 
43 protected:
46 
47  void instrument(const namespacet &, goto_functionst &);
49 };
50 
52  const namespacet &ns,
53  goto_functionst &goto_functions)
54 {
55  for(auto &function : goto_functions.function_map)
56  instrument(ns, function.second);
57 }
58 
60  const namespacet &ns,
61  goto_functionst::goto_functiont &goto_function)
62 {
63  for(goto_programt::instructionst::iterator
64  it=goto_function.body.instructions.begin();
65  it!=goto_function.body.instructions.end();
66  it++)
67  {
68  const goto_programt::instructiont &instruction=*it;
69 
70  goto_programt tmp;
71 
72  switch(instruction.type)
73  {
74  case FUNCTION_CALL:
75  {
76  const code_function_callt &function_call=
77  to_code_function_call(instruction.code);
78  const exprt &function=function_call.function();
79 
80  if(function.id()==ID_symbol)
81  {
82  const irep_idt &identifier=
83  to_symbol_expr(function).get_identifier();
84 
85  std::set<irep_idt> identifiers;
86 
87  identifiers.insert(identifier);
88 
89  irep_idt class_id=function.get(ID_C_class);
90  if(class_id.empty())
91  {
92  }
93  else
94  {
95  std::string suffix=
96  std::string(
97  id2string(identifier), class_id.size(), std::string::npos);
98 
99  class_hierarchyt::idst parents=
101  for(const auto &p : parents)
102  identifiers.insert(id2string(p)+suffix);
103  }
104 
105  for(const auto &rule : taint.rules)
106  {
107  bool match=false;
108  for(const auto &i : identifiers)
109  if(i==rule.function_identifier ||
110  has_prefix(
111  id2string(i),
112  "java::"+id2string(rule.function_identifier)+":"))
113  {
114  match=true;
115  break;
116  }
117 
118  if(match)
119  {
120  debug() << "MATCH " << rule.id << " on " << identifier << eom;
121 
122  exprt where=nil_exprt();
123 
124  const code_typet &code_type=to_code_type(function.type());
125 
126  bool have_this=
127  !code_type.parameters().empty() &&
128  code_type.parameters().front().get_bool(ID_C_this);
129 
130  switch(rule.where)
131  {
133  {
134  const symbolt &return_value_symbol=
135  ns.lookup(id2string(identifier)+"#return_value");
136  where=return_value_symbol.symbol_expr();
137  }
138  break;
139 
141  {
142  unsigned nr=
143  have_this?rule.parameter_number:rule.parameter_number-1;
144  if(function_call.arguments().size()>nr)
145  where=function_call.arguments()[nr];
146  }
147  break;
148 
150  if(have_this)
151  {
152  assert(!function_call.arguments().empty());
153  where=function_call.arguments()[0];
154  }
155  break;
156  }
157 
158  switch(rule.kind)
159  {
161  {
162  codet code_set_may("set_may");
163  code_set_may.operands().resize(2);
164  code_set_may.op0()=where;
165  code_set_may.op1()=
166  address_of_exprt(string_constantt(rule.taint));
168  t->make_other(code_set_may);
169  t->source_location=instruction.source_location;
170  }
171  break;
172 
174  {
176  binary_predicate_exprt get_may("get_may");
177  get_may.op0()=where;
178  get_may.op1()=address_of_exprt(string_constantt(rule.taint));
179  t->make_assertion(not_exprt(get_may));
180  t->source_location=instruction.source_location;
181  t->source_location.set_property_class(
182  "taint rule "+id2string(rule.id));
183  t->source_location.set_comment(rule.message);
184  }
185  break;
186 
188  {
189  codet code_clear_may("clear_may");
190  code_clear_may.operands().resize(2);
191  code_clear_may.op0()=where;
192  code_clear_may.op1()=
193  address_of_exprt(string_constantt(rule.taint));
195  t->make_other(code_clear_may);
196  t->source_location=instruction.source_location;
197  }
198  break;
199  }
200  }
201  }
202  }
203  }
204  break;
205 
206  default:
207  {
208  }
209  }
210 
211  if(!tmp.empty())
212  {
213  goto_programt::targett next=it;
214  next++;
215  goto_function.body.destructive_insert(next, tmp);
216  }
217  }
218 }
219 
221  const std::string &taint_file_name,
222  const symbol_tablet &symbol_table,
223  goto_functionst &goto_functions,
224  bool show_full,
225  const std::string &json_file_name)
226 {
227  try
228  {
229  json_arrayt json_result;
230  bool use_json=!json_file_name.empty();
231 
232  status() << "Reading taint file `" << taint_file_name
233  << "'" << eom;
234 
235  if(taint_parser(taint_file_name, taint, get_message_handler()))
236  {
237  error() << "Failed to read taint definition file" << eom;
238  return true;
239  }
240 
241  status() << "Got " << taint.rules.size()
242  << " taint definitions" << eom;
243 
244  taint.output(debug());
245  debug() << eom;
246 
247  status() << "Instrumenting taint" << eom;
248 
249  class_hierarchy(symbol_table);
250 
251  const namespacet ns(symbol_table);
252  instrument(ns, goto_functions);
253  goto_functions.update();
254 
255  bool have_entry_point=
256  goto_functions.function_map.find(goto_functionst::entry_point())!=
257  goto_functions.function_map.end();
258 
259  // do we have an entry point?
260  if(have_entry_point)
261  {
262  status() << "Working from entry point" << eom;
263  }
264  else
265  {
266  status() << "No entry point found; "
267  << "we will consider the heads of all functions as reachable"
268  << eom;
269 
270  goto_programt end, gotos, calls;
271 
273 
274  forall_goto_functions(f_it, goto_functions)
275  if(f_it->second.body_available() &&
276  f_it->first!=goto_functionst::entry_point())
277  {
279  code_function_callt call;
280  call.function()=ns.lookup(f_it->first).symbol_expr();
281  t->make_function_call(call);
282  calls.add_instruction()->make_goto(end.instructions.begin());
284  g->make_goto(t, side_effect_expr_nondett(bool_typet()));
285  }
286 
288  goto_functions.function_map[goto_functionst::entry_point()];
289 
290  goto_programt &body=entry.body;
291 
292  body.destructive_append(gotos);
293  body.destructive_append(calls);
294  body.destructive_append(end);
295 
296  goto_functions.update();
297  }
298 
299  status() << "Data-flow analysis" << eom;
300 
301  custom_bitvector_analysist custom_bitvector_analysis;
302  custom_bitvector_analysis(goto_functions, ns);
303 
304  if(show_full)
305  {
306  custom_bitvector_analysis.output(ns, goto_functions, std::cout);
307  return false;
308  }
309 
310  forall_goto_functions(f_it, goto_functions)
311  {
312  if(!f_it->second.body.has_assertion())
313  continue;
314 
315  const symbolt &symbol=ns.lookup(f_it->first);
316 
317  if(f_it->first=="__actual_thread_spawn")
318  continue;
319 
320  bool first=true;
321 
322  forall_goto_program_instructions(i_it, f_it->second.body)
323  {
324  if(!i_it->is_assert())
325  continue;
327  continue;
328 
329  if(custom_bitvector_analysis[i_it].has_values.is_false())
330  continue;
331 
332  exprt result=custom_bitvector_analysis.eval(i_it->guard, i_it);
333  exprt result2=simplify_expr(result, ns);
334 
335  if(result2.is_true())
336  continue;
337 
338  if(first)
339  {
340  first=false;
341  if(!use_json)
342  std::cout << "\n"
343  << "******** Function " << symbol.display_name() << '\n';
344  }
345 
346  if(use_json)
347  {
349  json["bugClass"]=
350  json_stringt(id2string(i_it->source_location.get_property_class()));
351  json["file"]=
352  json_stringt(id2string(i_it->source_location.get_file()));
353  json["line"]=
354  json_numbert(id2string(i_it->source_location.get_line()));
355  json_result.array.push_back(json);
356  }
357  else
358  {
359  std::cout << i_it->source_location;
360  if(!i_it->source_location.get_comment().empty())
361  std::cout << ": " << i_it->source_location.get_comment();
362 
363  if(!i_it->source_location.get_property_class().empty())
364  std::cout << " ("
365  << i_it->source_location.get_property_class() << ")";
366 
367  std::cout << '\n';
368  }
369  }
370  }
371 
372  if(use_json)
373  {
374  std::ofstream json_out(json_file_name);
375 
376  if(!json_out)
377  {
378  error() << "Failed to open json output `"
379  << json_file_name << "'" << eom;
380  return true;
381  }
382 
383  status() << "Analysis result is written to `"
384  << json_file_name << "'" << eom;
385 
386  json_out << json_result << '\n';
387  }
388 
389  return false;
390  }
391  catch(const char *error_msg)
392  {
393  error() << error_msg << eom;
394  return true;
395  }
396  catch(const std::string &error_msg)
397  {
398  error() << error_msg << eom;
399  return true;
400  }
401  catch(...)
402  {
403  return true;
404  }
405 }
406 
408  goto_modelt &goto_model,
409  const std::string &taint_file_name,
410  message_handlert &message_handler,
411  bool show_full,
412  const std::string &json_file_name)
413 {
415  taint_analysis.set_message_handler(message_handler);
416  return taint_analysis(
417  taint_file_name,
418  goto_model.symbol_table,
419  goto_model.goto_functions,
420  show_full,
421  json_file_name);
422 }
mstreamt & result()
Definition: message.h:233
Boolean negation.
Definition: std_expr.h:2648
Field-insensitive, location-sensitive bitvector analysis.
virtual bool lookup(const irep_idt &name, const symbolt *&symbol) const
Definition: namespace.cpp:139
targett add_instruction()
Adds an instruction at the end.
Base type of functions.
Definition: std_types.h:734
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
exprt & op0()
Definition: expr.h:84
exprt simplify_expr(const exprt &src, const namespacet &ns)
mstreamt & status()
Definition: message.h:238
void output(std::ostream &) const
instructionst instructions
The list of instructions in the goto program.
const irep_idt & get_identifier() const
Definition: std_expr.h:120
Taint Parser.
bool is_true() const
Definition: expr.cpp:133
bool empty() const
Is the program empty?
The proper Booleans.
Definition: std_types.h:33
symbol_tablet symbol_table
Definition: goto_model.h:25
Symbol table entry.This is a symbol in the symbol table, stored in an object of type symbol_tablet...
Definition: symbol.h:33
static mstreamt & eom(mstreamt &m)
Definition: message.h:193
Class Hierarchy.
virtual void output(const namespacet &ns, const goto_functionst &goto_functions, std::ostream &out) const
Definition: ai.cpp:92
bool taint_analysis(goto_modelt &goto_model, const std::string &taint_file_name, message_handlert &message_handler, bool show_full, const std::string &json_file_name)
class_hierarchyt class_hierarchy
class symbol_exprt symbol_expr() const
produces a symbol_exprt for a symbol
Definition: symbol.cpp:191
argumentst & arguments()
Definition: std_code.h:689
void instrument(const namespacet &, goto_functionst &)
The NIL expression.
Definition: std_expr.h:3764
static bool has_get_must_or_may(const exprt &)
exprt & op1()
Definition: expr.h:87
The symbol table.
Definition: symbol_table.h:52
TO_BE_DOCUMENTED.
Definition: namespace.h:62
A side effect that returns a non-deterministically chosen value.
Definition: std_code.h:1037
A function call.
Definition: std_code.h:657
size_t size() const
Definition: dstring.h:77
goto_function_templatet< goto_programt > goto_functiont
Operator to return the address of an object.
Definition: std_expr.h:2593
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
std::vector< irep_idt > idst
A specialization of goto_program_templatet over goto programs in which instructions have codet type...
Definition: goto_program.h:24
const irep_idt & display_name() const
Definition: symbol.h:60
bool taint_parser(const std::string &file_name, taint_parse_treet &dest, message_handlert &message_handler)
mstreamt & debug()
Definition: message.h:253
message_handlert & get_message_handler()
Definition: message.h:127
taint_parse_treet taint
exprt & function()
Definition: std_code.h:677
Base class for all expressions.
Definition: expr.h:46
const parameterst & parameters() const
Definition: std_types.h:841
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast a generic exprt to a symbol_exprt.
Definition: std_expr.h:202
bool operator()(const std::string &taint_file_name, const symbol_tablet &, goto_functionst &, bool show_full, const std::string &json_file_name)
exprt eval(const exprt &src, locationt loc)
const code_typet & to_code_type(const typet &type)
Cast a generic typet to a code_typet.
Definition: std_types.h:884
mstreamt & error()
Definition: message.h:223
Taint Analysis.
void destructive_append(goto_program_templatet< codeT, guardT > &p)
Appends the given program, which is destroyed.
A statement in a programming language.
Definition: std_code.h:19
A generic base class for expressions that are predicates, i.e., boolean-typed, and that take exactly ...
Definition: std_expr.h:546
arrayt array
Definition: json.h:126
#define forall_goto_functions(it, functions)
operandst & operands()
Definition: expr.h:70
#define forall_goto_program_instructions(it, program)
Definition: goto_program.h:68
bool empty() const
Definition: dstring.h:61
idst get_parents_trans(const irep_idt &id) const
goto_functionst goto_functions
Definition: goto_model.h:26
json_objectt json(const source_locationt &location)
Definition: json_expr.cpp:23
const code_function_callt & to_code_function_call(const codet &code)
Definition: std_code.h:700