cprover
symex_dereference.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Symbolic Execution of ANSI-C
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #include "goto_symex.h"
13 
14 #include <util/arith_tools.h>
15 #include <util/base_type.h>
16 #include <util/byte_operators.h>
17 #include <util/c_types.h>
18 #include <util/exception_utils.h>
19 #include <util/invariant.h>
21 
23 
25 
27  exprt &expr,
28  statet &state,
29  guardt &guard)
30 {
31  // Could be member, could be if, could be index.
32 
33  if(expr.id()==ID_member)
35  to_member_expr(expr).struct_op(), state, guard);
36  else if(expr.id()==ID_if)
37  {
38  // the condition is not an address
40  to_if_expr(expr).cond(), state, guard, false);
41 
42  // add to guard?
44  to_if_expr(expr).true_case(), state, guard);
46  to_if_expr(expr).false_case(), state, guard);
47  }
48  else if(expr.id()==ID_index)
49  {
50  // the index is not an address
52  to_index_expr(expr).index(), state, guard, false);
53 
54  // the array _is_ an address
56  to_index_expr(expr).array(), state, guard);
57  }
58  else
59  {
60  // give up and dereference
61 
62  dereference_rec(expr, state, guard, false);
63  }
64 }
65 
67 {
68  // Could be member, could be if, could be index.
69 
70  if(expr.id()==ID_member)
71  {
73  to_member_expr(expr).struct_op());
74  }
75  else if(expr.id()==ID_if)
76  {
77  return
78  is_index_member_symbol_if(to_if_expr(expr).true_case()) &&
79  is_index_member_symbol_if(to_if_expr(expr).false_case());
80  }
81  else if(expr.id()==ID_index)
82  {
83  return is_index_member_symbol_if(to_index_expr(expr).array());
84  }
85  else if(expr.id()==ID_symbol)
86  return true;
87  else
88  return false;
89 }
90 
93  const exprt &expr,
94  statet &state,
95  guardt &guard,
96  bool keep_array)
97 {
98  exprt result;
99 
100  if(expr.id()==ID_byte_extract_little_endian ||
101  expr.id()==ID_byte_extract_big_endian)
102  {
103  // address_of(byte_extract(op, offset, t)) is
104  // address_of(op) + offset with adjustments for arrays
105 
107 
108  // recursive call
109  result=address_arithmetic(be.op(), state, guard, keep_array);
110 
111  if(ns.follow(be.op().type()).id()==ID_array &&
112  result.id()==ID_address_of)
113  {
115 
116  // turn &a of type T[i][j] into &(a[0][0])
117  for(const typet *t=&(ns.follow(a.type().subtype()));
118  t->id()==ID_array && !base_type_eq(expr.type(), *t, ns);
119  t=&(ns.follow(*t).subtype()))
121  }
122 
123  // do (expr.type() *)(((char *)op)+offset)
124  result=typecast_exprt(result, pointer_type(char_type()));
125 
126  // there could be further dereferencing in the offset
127  exprt offset=be.offset();
128  dereference_rec(offset, state, guard, false);
129 
130  result=plus_exprt(result, offset);
131 
132  // treat &array as &array[0]
133  const typet &expr_type=ns.follow(expr.type());
134  typet dest_type_subtype;
135 
136  if(expr_type.id()==ID_array && !keep_array)
137  dest_type_subtype=expr_type.subtype();
138  else
139  dest_type_subtype=expr_type;
140 
141  result=typecast_exprt(result, pointer_type(dest_type_subtype));
142  }
143  else if(expr.id()==ID_index ||
144  expr.id()==ID_member)
145  {
147  ode.build(expr, ns);
148 
149  const byte_extract_exprt be(
150  byte_extract_id(), ode.root_object(), ode.offset(), expr.type());
151 
152  // recursive call
153  result=address_arithmetic(be, state, guard, keep_array);
154 
155  do_simplify(result);
156  }
157  else if(expr.id()==ID_dereference)
158  {
159  // ANSI-C guarantees &*p == p no matter what p is,
160  // even if it's complete garbage
161  // just grab the pointer, but be wary of further dereferencing
162  // in the pointer itself
163  result=to_dereference_expr(expr).pointer();
164  dereference_rec(result, state, guard, false);
165  }
166  else if(expr.id()==ID_if)
167  {
168  if_exprt if_expr=to_if_expr(expr);
169 
170  // the condition is not an address
171  dereference_rec(if_expr.cond(), state, guard, false);
172 
173  // recursive call
174  if_expr.true_case()=
175  address_arithmetic(if_expr.true_case(), state, guard, keep_array);
176  if_expr.false_case()=
177  address_arithmetic(if_expr.false_case(), state, guard, keep_array);
178 
179  result=if_expr;
180  }
181  else if(expr.id()==ID_symbol ||
182  expr.id()==ID_string_constant ||
183  expr.id()==ID_label ||
184  expr.id()==ID_array)
185  {
186  // give up, just dereference
187  result=expr;
188  dereference_rec(result, state, guard, false);
189 
190  // turn &array into &array[0]
191  if(ns.follow(result.type()).id()==ID_array && !keep_array)
192  result=index_exprt(result, from_integer(0, index_type()));
193 
194  // handle field-sensitive SSA symbol
195  mp_integer offset=0;
196  if(expr.id()==ID_symbol &&
197  expr.get_bool(ID_C_SSA_symbol))
198  {
199  auto offset_opt = compute_pointer_offset(expr, ns);
200  PRECONDITION(offset_opt.has_value());
201  offset = *offset_opt;
202  }
203 
204  if(offset>0)
205  {
206  const byte_extract_exprt be(
207  byte_extract_id(),
208  to_ssa_expr(expr).get_l1_object(),
209  from_integer(offset, index_type()),
210  expr.type());
211 
212  result=address_arithmetic(be, state, guard, keep_array);
213 
214  do_simplify(result);
215  }
216  else
217  result=address_of_exprt(result);
218  }
219  else if(expr.id() == ID_typecast)
220  {
221  const typecast_exprt &tc_expr = to_typecast_expr(expr);
222 
223  result = address_arithmetic(tc_expr.op(), state, guard, keep_array);
224 
225  // treat &array as &array[0]
226  const typet &expr_type = ns.follow(expr.type());
227  typet dest_type_subtype;
228 
229  if(expr_type.id() == ID_array && !keep_array)
230  dest_type_subtype = expr_type.subtype();
231  else
232  dest_type_subtype = expr_type;
233 
234  result = typecast_exprt(result, pointer_type(dest_type_subtype));
235  }
236  else
238  "goto_symext::address_arithmetic does not handle " + expr.id_string());
239 
240  const typet &expr_type=ns.follow(expr.type());
241  INVARIANT((expr_type.id()==ID_array && !keep_array) ||
242  base_type_eq(pointer_type(expr_type), result.type(), ns),
243  "either non-persistent array or pointer to result");
244 
245  return result;
246 }
247 
249  exprt &expr,
250  statet &state,
251  guardt &guard,
252  const bool write)
253 {
254  if(expr.id()==ID_dereference)
255  {
256  dereference_exprt to_check = to_dereference_expr(expr);
257  bool expr_is_not_null = false;
258 
259  if(state.threads.size() == 1)
260  {
261  const irep_idt &expr_function = state.source.function;
262  if(!expr_function.empty())
263  {
264  state.get_original_name(to_check);
265 
266  expr_is_not_null =
267  state.safe_pointers.at(expr_function).is_safe_dereference(
268  to_check, state.source.pc);
269  }
270  }
271 
272  exprt tmp1;
273  tmp1.swap(to_dereference_expr(expr).pointer());
274 
275  // first make sure there are no dereferences in there
276  dereference_rec(tmp1, state, guard, false);
277 
278  // we need to set up some elaborate call-backs
279  symex_dereference_statet symex_dereference_state(*this, state);
280 
282  ns,
283  state.symbol_table,
284  symex_dereference_state,
286  expr_is_not_null);
287 
288  // std::cout << "**** " << format(tmp1) << '\n';
289  exprt tmp2=
290  dereference.dereference(
291  tmp1,
292  guard,
293  write?
296  // std::cout << "**** " << format(tmp2) << '\n';
297 
298  expr.swap(tmp2);
299 
300  // this may yield a new auto-object
301  trigger_auto_object(expr, state);
302  }
303  else if(expr.id()==ID_index &&
304  to_index_expr(expr).array().id()==ID_member &&
305  to_array_type(ns.follow(to_index_expr(expr).array().type())).
306  size().is_zero())
307  {
308  // This is an expression of the form x.a[i],
309  // where a is a zero-sized array. This gets
310  // re-written into *(&x.a+i)
311 
312  index_exprt index_expr=to_index_expr(expr);
313 
314  address_of_exprt address_of_expr(index_expr.array());
315  address_of_expr.type()=pointer_type(expr.type());
316 
317  dereference_exprt tmp;
318  tmp.pointer()=plus_exprt(address_of_expr, index_expr.index());
319  tmp.type()=expr.type();
321 
322  // recursive call
323  dereference_rec(tmp, state, guard, write);
324 
325  expr.swap(tmp);
326  }
327  else if(expr.id()==ID_index &&
328  to_index_expr(expr).array().type().id()==ID_pointer)
329  {
330  // old stuff, will go away
331  UNREACHABLE;
332  }
333  else if(expr.id()==ID_address_of)
334  {
335  address_of_exprt &address_of_expr=to_address_of_expr(expr);
336 
337  exprt &object=address_of_expr.object();
338 
339  const typet &expr_type=ns.follow(expr.type());
340  expr = address_arithmetic(
341  object,
342  state,
343  guard,
344  to_pointer_type(expr_type).subtype().id() == ID_array);
345  }
346  else if(expr.id()==ID_typecast)
347  {
348  exprt &tc_op=to_typecast_expr(expr).op();
349 
350  // turn &array into &array[0] when casting to pointer-to-element-type
351  if(tc_op.id()==ID_address_of &&
352  to_address_of_expr(tc_op).object().type().id()==ID_array &&
353  base_type_eq(
354  expr.type(),
355  pointer_type(to_address_of_expr(tc_op).object().type().subtype()),
356  ns))
357  {
358  expr=
360  index_exprt(
361  to_address_of_expr(tc_op).object(),
362  from_integer(0, index_type())));
363 
364  dereference_rec(expr, state, guard, write);
365  }
366  else
367  {
368  dereference_rec(tc_op, state, guard, write);
369  }
370  }
371  else
372  {
373  Forall_operands(it, expr)
374  dereference_rec(*it, state, guard, write);
375  }
376 }
377 
379  exprt &expr,
380  statet &state,
381  const bool write)
382 {
383  // The expression needs to be renamed to level 1
384  // in order to distinguish addresses of local variables
385  // from different frames. Would be enough to rename
386  // symbols whose address is taken.
387  PRECONDITION(!state.call_stack().empty());
388  state.rename(expr, ns, goto_symex_statet::L1);
389 
390  // start the recursion!
391  guardt guard;
392  dereference_rec(expr, state, guard, write);
393  // dereferencing may introduce new symbol_exprt
394  // (like __CPROVER_memory)
395  state.rename(expr, ns, goto_symex_statet::L1);
396 }
const if_exprt & to_if_expr(const exprt &expr)
Cast an exprt to an if_exprt.
Definition: std_expr.h:3482
The type of an expression, extends irept.
Definition: type.h:27
exprt & true_case()
Definition: std_expr.h:3455
Semantic type conversion.
Definition: std_expr.h:2277
BigInt mp_integer
Definition: mp_arith.h:22
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:243
goto_programt::const_targett pc
Definition: symex_target.h:41
exprt & object()
Definition: std_expr.h:3265
const exprt & op() const
Definition: std_expr.h:371
bool base_type_eq(const typet &type1, const typet &type2, const namespacet &ns)
Check types for equality across all levels of hierarchy.
Definition: base_type.cpp:332
void rename(exprt &expr, const namespacet &ns, levelt level=L2)
static bool is_index_member_symbol_if(const exprt &expr)
Thrown when we encounter an instruction, parameters to an instruction etc.
The trinary if-then-else operator.
Definition: std_expr.h:3427
exprt & cond()
Definition: std_expr.h:3445
Definition: guard.h:19
typet & type()
Return the type of the expression.
Definition: expr.h:68
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
Definition: std_expr.h:3282
virtual void do_simplify(exprt &)
Definition: goto_symex.cpp:18
bool get_bool(const irep_namet &name) const
Definition: irep.cpp:239
Pointer Dereferencing.
const dereference_exprt & to_dereference_expr(const exprt &expr)
Cast an exprt to a dereference_exprt.
Definition: std_expr.h:3397
void get_original_name(exprt &expr) const
namespacet ns
Initialized just before symbolic execution begins, to point to both outer_symbol_table and the symbol...
Definition: goto_symex.h:215
const irep_idt & id() const
Definition: irep.h:259
const ssa_exprt & to_ssa_expr(const exprt &expr)
Cast a generic exprt to an ssa_exprt.
Definition: ssa_expr.h:160
std::unordered_map< irep_idt, local_safe_pointerst > safe_pointers
Expression classes for byte-level operators.
void build(const exprt &expr, const namespacet &ns)
Build an object_descriptor_exprt from a given expr.
Definition: std_expr.cpp:123
Operator to dereference a pointer.
Definition: std_expr.h:3355
Symbolic Execution.
exprt address_arithmetic(const exprt &, statet &, guardt &, bool keep_array)
Evaluate an ID_address_of expression.
#define PRECONDITION(CONDITION)
Definition: invariant.h:438
const member_exprt & to_member_expr(const exprt &expr)
Cast an exprt to a member_exprt.
Definition: std_expr.h:3959
Split an expression into a base object and a (byte) offset.
Definition: std_expr.h:2136
void dereference_rec(exprt &, statet &, guardt &, bool write)
std::vector< threadt > threads
The plus expression Associativity is not specified.
Definition: std_expr.h:1042
virtual void dereference(exprt &, statet &, bool write)
const typet & follow(const typet &) const
Resolve type symbol to the type it points to.
Definition: namespace.cpp:62
bitvector_typet index_type()
Definition: c_types.cpp:16
dstringt has one field, an unsigned integer no which is an index into a static table of strings...
Definition: dstring.h:35
Operator to return the address of an object.
Definition: std_expr.h:3255
exprt & false_case()
Definition: std_expr.h:3465
Pointer Logic.
exprt & index()
Definition: std_expr.h:1631
irep_idt language_mode
language_mode: ID_java, ID_C or another language identifier if we know the source language in use...
Definition: goto_symex.h:197
void dereference_rec_address_of(exprt &, statet &, guardt &)
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition: std_types.h:1048
Base class for all expressions.
Definition: expr.h:54
exprt & pointer()
Definition: std_expr.h:3380
const exprt & root_object() const
Definition: std_expr.cpp:199
const exprt & struct_op() const
Definition: std_expr.h:3931
void trigger_auto_object(const exprt &, statet &)
call_stackt & call_stack()
optionalt< mp_integer > compute_pointer_offset(const exprt &expr, const namespacet &ns)
source_locationt & add_source_location()
Definition: type.h:67
const source_locationt & source_location() const
Definition: expr.h:228
#define UNREACHABLE
This should be used to mark dead code.
Definition: invariant.h:478
const typecast_exprt & to_typecast_expr(const exprt &expr)
Cast an exprt to a typecast_exprt.
Definition: std_expr.h:2306
symbol_tablet symbol_table
contains symbols that are minted during symbolic execution, such as dynamically created objects etc...
const std::string & id_string() const
Definition: irep.h:262
void swap(irept &irep)
Definition: irep.h:303
#define Forall_operands(it, expr)
Definition: expr.h:26
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: std_types.h:1544
Base Type Computation.
const typet & subtype() const
Definition: type.h:38
TO_BE_DOCUMENTED.
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
const byte_extract_exprt & to_byte_extract_expr(const exprt &expr)
Wrapper for a function dereferencing pointer expressions using a value set.
bool empty() const
Definition: dstring.h:75
exprt & array()
Definition: std_expr.h:1621
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition: std_expr.h:1648
bitvector_typet char_type()
Definition: c_types.cpp:114
Symbolic Execution of ANSI-C.
irep_idt byte_extract_id()
Array index operator.
Definition: std_expr.h:1595
symex_targett::sourcet source