cprover
value_set_dereference.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Symbolic Execution of ANSI-C
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #include "value_set_dereference.h"
13 
14 #ifdef DEBUG
15 #include <iostream>
16 #include <util/format_expr.h>
17 #endif
18 
19 #include <util/arith_tools.h>
20 #include <util/array_name.h>
21 #include <util/base_type.h>
22 #include <util/byte_operators.h>
23 #include <util/c_types.h>
24 #include <util/config.h>
25 #include <util/cprover_prefix.h>
26 #include <util/format_type.h>
27 #include <util/fresh_symbol.h>
28 #include <util/guard.h>
29 #include <util/options.h>
32 #include <util/simplify_expr.h>
33 #include <util/ssa_expr.h>
34 
36  const exprt &pointer,
37  const guardt &guard,
38  const modet mode)
39 {
40  if(pointer.type().id()!=ID_pointer)
41  throw "dereference expected pointer type, but got "+
42  pointer.type().pretty();
43 
44  // we may get ifs due to recursive calls
45  if(pointer.id()==ID_if)
46  {
47  const if_exprt &if_expr=to_if_expr(pointer);
48  // push down the if
49  guardt true_guard=guard;
50  guardt false_guard=guard;
51 
52  true_guard.add(if_expr.cond());
53  false_guard.add(not_exprt(if_expr.cond()));
54 
55  exprt true_case=dereference(if_expr.true_case(), true_guard, mode);
56  exprt false_case=dereference(if_expr.false_case(), false_guard, mode);
57 
58  return if_exprt(if_expr.cond(), true_case, false_case);
59  }
60 
61  // type of the object
62  const typet &type=pointer.type().subtype();
63 
64 #if 0
65  std::cout << "DEREF: " << format(pointer) << '\n';
66 #endif
67 
68  // collect objects the pointer may point to
69  value_setst::valuest points_to_set;
70 
71  dereference_callback.get_value_set(pointer, points_to_set);
72 
73 #if 0
74  for(value_setst::valuest::const_iterator
75  it=points_to_set.begin();
76  it!=points_to_set.end();
77  it++)
78  std::cout << "P: " << format(*it) << '\n';
79 #endif
80 
81  // get the values of these
82 
83  std::list<valuet> values;
84 
85  for(value_setst::valuest::const_iterator
86  it=points_to_set.begin();
87  it!=points_to_set.end();
88  it++)
89  {
90  valuet value = build_reference_to(*it, pointer);
91 
92 #if 0
93  std::cout << "V: " << format(value.pointer_guard) << " --> ";
94  std::cout << format(value.value);
95  if(value.ignore)
96  std::cout << " (ignored)";
97  std::cout << '\n';
98 #endif
99 
100  if(!value.ignore)
101  values.push_back(value);
102  }
103 
104  // can this fail?
105  bool may_fail;
106 
107  if(values.empty())
108  {
109  may_fail=true;
110  }
111  else
112  {
113  may_fail=false;
114  for(std::list<valuet>::const_iterator
115  it=values.begin();
116  it!=values.end();
117  it++)
118  if(it->value.is_nil())
119  may_fail=true;
120  }
121 
122  if(may_fail)
123  {
124  // first see if we have a "failed object" for this pointer
125 
126  const symbolt *failed_symbol;
127  exprt failure_value;
128 
130  pointer, failed_symbol))
131  {
132  // yes!
133  failure_value=failed_symbol->symbol_expr();
134  failure_value.set(ID_C_invalid_object, true);
135  }
136  else
137  {
138  // else: produce new symbol
139  symbolt &symbol = get_fresh_aux_symbol(
140  type,
141  "symex",
142  "invalid_object",
143  pointer.source_location(),
146 
147  // make it a lvalue, so we can assign to it
148  symbol.is_lvalue=true;
149 
150  failure_value=symbol.symbol_expr();
151  failure_value.set(ID_C_invalid_object, true);
152  }
153 
154  valuet value;
155  value.value=failure_value;
156  value.pointer_guard=true_exprt();
157  values.push_front(value);
158  }
159 
160  // now build big case split, but we only do "good" objects
161 
162  exprt value=nil_exprt();
163 
164  for(std::list<valuet>::const_iterator
165  it=values.begin();
166  it!=values.end();
167  it++)
168  {
169  if(it->value.is_not_nil())
170  {
171  if(value.is_nil()) // first?
172  value=it->value;
173  else
174  value=if_exprt(it->pointer_guard, it->value, value);
175  }
176  }
177 
178  #if 0
179  std::cout << "R: " << format(value) << "\n\n";
180  #endif
181 
182  return value;
183 }
184 
194  const typet &object_type,
195  const typet &dereference_type) const
196 {
197  const typet *object_unwrapped = &object_type;
198  const typet *dereference_unwrapped = &dereference_type;
199  while(object_unwrapped->id() == ID_pointer &&
200  dereference_unwrapped->id() == ID_pointer)
201  {
202  object_unwrapped = &object_unwrapped->subtype();
203  dereference_unwrapped = &dereference_unwrapped->subtype();
204  }
205  if(dereference_unwrapped->id() == ID_empty)
206  {
207  return true;
208  }
209  else if(dereference_unwrapped->id() == ID_pointer &&
210  object_unwrapped->id() != ID_pointer)
211  {
212 #ifdef DEBUG
213  std::cout << "value_set_dereference: the dereference type has "
214  "too many ID_pointer levels"
215  << std::endl;
216  std::cout << " object_type: " << object_type.pretty() << std::endl;
217  std::cout << " dereference_type: " << dereference_type.pretty()
218  << std::endl;
219 #endif
220  }
221 
222  if(base_type_eq(object_type, dereference_type, ns))
223  return true; // ok, they just match
224 
225  // check for struct prefixes
226 
227  const typet ot_base=ns.follow(object_type),
228  dt_base=ns.follow(dereference_type);
229 
230  if(ot_base.id()==ID_struct &&
231  dt_base.id()==ID_struct)
232  {
233  if(to_struct_type(dt_base).is_prefix_of(
234  to_struct_type(ot_base)))
235  return true; // ok, dt is a prefix of ot
236  }
237 
238  // we are generous about code pointers
239  if(dereference_type.id()==ID_code &&
240  object_type.id()==ID_code)
241  return true;
242 
243  // bitvectors of same width are ok
244  if((dereference_type.id()==ID_signedbv ||
245  dereference_type.id()==ID_unsignedbv) &&
246  (object_type.id()==ID_signedbv ||
247  object_type.id()==ID_unsignedbv) &&
248  to_bitvector_type(dereference_type).get_width()==
249  to_bitvector_type(object_type).get_width())
250  return true;
251 
252  // really different
253 
254  return false;
255 }
256 
272  const exprt &what,
273  const exprt &pointer_expr)
274 {
275  const typet &dereference_type=
276  ns.follow(pointer_expr.type()).subtype();
277 
278  if(what.id()==ID_unknown ||
279  what.id()==ID_invalid)
280  {
281  return valuet();
282  }
283 
284  if(what.id()!=ID_object_descriptor)
285  throw "unknown points-to: "+what.id_string();
286 
288 
289  const exprt &root_object=o.root_object();
290  const exprt &object=o.object();
291 
292  #if 0
293  std::cout << "O: " << format(root_object) << '\n';
294  #endif
295 
296  valuet result;
297 
298  if(root_object.id() == ID_null_object)
299  {
301  {
302  result.ignore = true;
303  }
304  }
305  else if(root_object.id()==ID_dynamic_object)
306  {
307  // const dynamic_object_exprt &dynamic_object=
308  // to_dynamic_object_expr(root_object);
309 
310  // the object produced by malloc
312  ns.lookup(CPROVER_PREFIX "malloc_object").symbol_expr();
313 
314  exprt is_malloc_object=same_object(pointer_expr, malloc_object);
315 
316  // constraint that it actually is a dynamic object
317  // this is also our guard
318  result.pointer_guard = dynamic_object(pointer_expr);
319 
320  // can't remove here, turn into *p
321  result.value=dereference_exprt(pointer_expr, dereference_type);
322  }
323  else if(root_object.id()==ID_integer_address)
324  {
325  // This is stuff like *((char *)5).
326  // This is turned into an access to __CPROVER_memory[...].
327 
328  if(language_mode == ID_java)
329  {
330  result.ignore = true;
331  return result;
332  }
333 
334  const symbolt &memory_symbol=ns.lookup(CPROVER_PREFIX "memory");
335  const symbol_exprt symbol_expr(memory_symbol.name, memory_symbol.type);
336 
337  if(base_type_eq(
338  ns.follow(memory_symbol.type).subtype(),
339  dereference_type, ns))
340  {
341  // Types match already, what a coincidence!
342  // We can use an index expression.
343 
344  const index_exprt index_expr(
345  symbol_expr,
346  pointer_offset(pointer_expr),
347  ns.follow(memory_symbol.type).subtype());
348 
349  result.value=index_expr;
350  }
351  else if(dereference_type_compare(
352  ns.follow(memory_symbol.type).subtype(),
353  dereference_type))
354  {
355  const index_exprt index_expr(
356  symbol_expr,
357  pointer_offset(pointer_expr),
358  ns.follow(memory_symbol.type).subtype());
359  result.value=typecast_exprt(index_expr, dereference_type);
360  }
361  else
362  {
363  // We need to use byte_extract.
364  // Won't do this without a commitment to an endianness.
365 
367  {
368  }
369  else
370  {
371  result.value = byte_extract_exprt(
372  byte_extract_id(),
373  symbol_expr,
374  pointer_offset(pointer_expr),
375  dereference_type);
376  }
377  }
378  }
379  else
380  {
381  // something generic -- really has to be a symbol
382  address_of_exprt object_pointer(object);
383 
384  if(o.offset().is_zero())
385  {
386  equal_exprt equality(pointer_expr, object_pointer);
387 
388  if(ns.follow(equality.lhs().type())!=ns.follow(equality.rhs().type()))
389  equality.lhs().make_typecast(equality.rhs().type());
390 
391  result.pointer_guard=equality;
392  }
393  else
394  {
395  result.pointer_guard=same_object(pointer_expr, object_pointer);
396  }
397 
398  const typet &object_type=ns.follow(object.type());
399  const typet &root_object_type=ns.follow(root_object.type());
400 
401  exprt root_object_subexpression=root_object;
402 
403  if(dereference_type_compare(object_type, dereference_type) &&
404  o.offset().is_zero())
405  {
406  // The simplest case: types match, and offset is zero!
407  // This is great, we are almost done.
408 
409  result.value=object;
410 
411  if(object_type!=ns.follow(dereference_type))
412  result.value.make_typecast(dereference_type);
413  }
414  else if(root_object_type.id()==ID_array &&
416  root_object_type.subtype(),
417  dereference_type))
418  {
419  // We have an array with a subtype that matches
420  // the dereferencing type.
421  // We will require well-alignedness!
422 
423  exprt offset;
424 
425  // this should work as the object is essentially the root object
426  if(o.offset().is_constant())
427  offset=o.offset();
428  else
429  offset=pointer_offset(pointer_expr);
430 
431  exprt adjusted_offset;
432 
433  // are we doing a byte?
434  auto element_size = pointer_offset_size(root_object_type.subtype(), ns);
435 
436  if(!element_size.has_value() || *element_size == 0)
437  {
438  throw "unknown or invalid type size of:\n" +
439  root_object_type.subtype().pretty();
440  }
441  else if(*element_size == 1)
442  {
443  // no need to adjust offset
444  adjusted_offset = offset;
445  }
446  else
447  {
448  exprt element_size_expr = from_integer(*element_size, offset.type());
449 
450  adjusted_offset=binary_exprt(
451  offset, ID_div, element_size_expr, offset.type());
452 
453  // TODO: need to assert well-alignedness
454  }
455 
456  index_exprt index_expr=
457  index_exprt(root_object, adjusted_offset, root_object_type.subtype());
458 
459  result.value=index_expr;
460 
461  if(ns.follow(result.value.type())!=ns.follow(dereference_type))
462  result.value.make_typecast(dereference_type);
463  }
464  else
465  {
466  // try to build a member/index expression - do not use byte_extract
468  root_object_subexpression, o.offset(), dereference_type, ns);
469  if(subexpr.is_not_nil())
470  simplify(subexpr, ns);
471  if(subexpr.is_not_nil() && subexpr.id() != byte_extract_id())
472  {
473  // Successfully found a member, array index, or combination thereof
474  // that matches the desired type and offset:
475  result.value = subexpr;
476  return result;
477  }
478 
479  // we extract something from the root object
480  result.value=o.root_object();
481 
482  // this is relative to the root object
483  exprt offset;
484  if(o.offset().id()==ID_unknown)
485  offset=pointer_offset(pointer_expr);
486  else
487  offset=o.offset();
488 
489  if(memory_model(result.value, dereference_type, offset))
490  {
491  // ok, done
492  }
493  else
494  {
495  return valuet(); // give up, no way that this is ok
496  }
497  }
498  }
499 
500  return result;
501 }
502 
503 static bool is_a_bv_type(const typet &type)
504 {
505  return type.id()==ID_unsignedbv ||
506  type.id()==ID_signedbv ||
507  type.id()==ID_bv ||
508  type.id()==ID_fixedbv ||
509  type.id()==ID_floatbv ||
510  type.id()==ID_c_enum_tag;
511 }
512 
522  exprt &value,
523  const typet &to_type,
524  const exprt &offset)
525 {
526  // we will allow more or less arbitrary pointer type cast
527 
528  const typet from_type=value.type();
529 
530  // first, check if it's really just a type coercion,
531  // i.e., both have exactly the same (constant) size,
532  // for bit-vector types or pointers
533 
534  if(
535  (is_a_bv_type(from_type) && is_a_bv_type(to_type)) ||
536  (from_type.id() == ID_pointer && to_type.id() == ID_pointer))
537  {
539  {
540  // avoid semantic conversion in case of
541  // cast to float or fixed-point,
542  // or cast from float or fixed-point
543 
544  if(
545  to_type.id() != ID_fixedbv && to_type.id() != ID_floatbv &&
546  from_type.id() != ID_fixedbv && from_type.id() != ID_floatbv)
547  {
548  value = typecast_exprt::conditional_cast(value, to_type);
549  return true;
550  }
551  }
552  }
553 
554  // otherwise, we will stitch it together from bytes
555 
556  return memory_model_bytes(value, to_type, offset);
557 }
558 
568  exprt &value,
569  const typet &to_type,
570  const exprt &offset)
571 {
572  const typet from_type=value.type();
573 
574  // We simply refuse to convert to/from code.
575  if(from_type.id()==ID_code || to_type.id()==ID_code)
576  return false;
577 
578  // We won't do this without a commitment to an endianness.
580  return false;
581 
582  // But everything else we will try!
583  // We just rely on byte_extract to do the job!
584 
585  exprt result;
586 
587  // See if we have an array of bytes already,
588  // and we want something byte-sized.
589  auto from_type_subtype_size =
591 
592  auto to_type_size = pointer_offset_size(to_type, ns);
593 
594  if(
595  ns.follow(from_type).id() == ID_array &&
596  from_type_subtype_size.has_value() && *from_type_subtype_size == 1 &&
597  to_type_size.has_value() && *to_type_size == 1 &&
599  {
600  // yes, can use 'index'
601  result=index_exprt(value, offset, ns.follow(from_type).subtype());
602 
603  // possibly need to convert
604  if(!base_type_eq(result.type(), to_type, ns))
605  result.make_typecast(to_type);
606  }
607  else
608  {
609  // no, use 'byte_extract'
610  result = byte_extract_exprt(byte_extract_id(), value, offset, to_type);
611  }
612 
613  value=result;
614 
615  return true;
616 }
const if_exprt & to_if_expr(const exprt &expr)
Cast an exprt to an if_exprt.
Definition: std_expr.h:3482
The type of an expression, extends irept.
Definition: type.h:27
irep_idt name
The unique identifier.
Definition: symbol.h:40
struct configt::ansi_ct ansi_c
Boolean negation.
Definition: std_expr.h:3308
exprt & true_case()
Definition: std_expr.h:3455
Semantic type conversion.
Definition: std_expr.h:2277
bool is_nil() const
Definition: irep.h:172
bool is_not_nil() const
Definition: irep.h:173
std::string pretty(unsigned indent=0, unsigned max_indent=0) const
Definition: irep.cpp:640
#define CPROVER_PREFIX
const bitvector_typet & to_bitvector_type(const typet &type)
Cast a typet to a bitvector_typet.
Definition: std_types.h:1158
bool base_type_eq(const typet &type1, const typet &type2, const namespacet &ns)
Check types for equality across all levels of hierarchy.
Definition: base_type.cpp:332
virtual exprt dereference(const exprt &pointer, const guardt &guard, const modet mode)
Dereference the given pointer-expression.
dereference_callbackt & dereference_callback
endiannesst endianness
Definition: config.h:76
Fresh auxiliary symbol creation.
const object_descriptor_exprt & to_object_descriptor_expr(const exprt &expr)
Cast an exprt to an object_descriptor_exprt.
Definition: std_expr.h:2176
The trinary if-then-else operator.
Definition: std_expr.h:3427
exprt & cond()
Definition: std_expr.h:3445
Definition: guard.h:19
typet & type()
Return the type of the expression.
Definition: expr.h:68
Symbol table entry.
Definition: symbol.h:27
Pointer Dereferencing.
configt config
Definition: config.cpp:24
virtual bool has_failed_symbol(const exprt &expr, const symbolt *&symbol)=0
Equality.
Definition: std_expr.h:1484
const irep_idt & id() const
Definition: irep.h:259
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
Expression classes for byte-level operators.
The Boolean constant true.
Definition: std_expr.h:4443
A base class for binary expressions.
Definition: std_expr.h:738
exprt get_subexpression_at_offset(const exprt &expr, const mp_integer &offset_bytes, const typet &target_type_raw, const namespacet &ns)
optionalt< mp_integer > pointer_offset_size(const typet &type, const namespacet &ns)
Compute the size of a type in bytes, rounding up to full bytes.
The NIL expression.
Definition: std_expr.h:4461
Operator to dereference a pointer.
Definition: std_expr.h:3355
Split an expression into a base object and a (byte) offset.
Definition: std_expr.h:2136
Guard Data Structure.
const typet & follow(const typet &) const
Resolve type symbol to the type it points to.
Definition: namespace.cpp:62
static bool is_a_bv_type(const typet &type)
const struct_typet & to_struct_type(const typet &type)
Cast a typet to a struct_typet.
Definition: std_types.h:349
Operator to return the address of an object.
Definition: std_expr.h:3255
exprt & false_case()
Definition: std_expr.h:3465
Various predicates over pointers in programs.
std::string from_type(const namespacet &ns, const irep_idt &identifier, const typet &type)
bool is_constant() const
Return whether the expression is a constant.
Definition: expr.cpp:83
std::size_t get_width() const
Definition: std_types.h:1117
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:2291
Pointer Logic.
virtual void get_value_set(const exprt &expr, value_setst::valuest &value_set)=0
exprt malloc_object(const exprt &pointer, const namespacet &ns)
typet type
Type of symbol.
Definition: symbol.h:31
bool is_prefix_of(const struct_typet &other) const
Returns true if the struct is a prefix of other, i.e., if this struct has n components then the compo...
Definition: std_types.cpp:106
bool memory_model_bytes(exprt &value, const typet &type, const exprt &offset)
Replace value by an expression of type to_type corresponding to the value at memory address value + o...
symbolt & get_fresh_aux_symbol(const typet &type, const std::string &name_prefix, const std::string &basename_prefix, const source_locationt &source_location, const irep_idt &symbol_mode, symbol_table_baset &symbol_table)
Installs a fresh-named symbol with the requested name pattern.
Base class for all expressions.
Definition: expr.h:54
const exprt & root_object() const
Definition: std_expr.cpp:199
exprt pointer_offset(const exprt &pointer)
const source_locationt & source_location() const
Definition: expr.h:228
symbol_tablet & new_symbol_table
const std::string & id_string() const
Definition: irep.h:262
bool is_zero() const
Return whether the expression is a constant representing 0.
Definition: expr.cpp:130
bool memory_model(exprt &value, const typet &type, const exprt &offset)
Replace value by an expression of type to_type corresponding to the value at memory address value + o...
Expression to hold a symbol (variable)
Definition: std_expr.h:143
Options.
Misc Utilities.
exprt dynamic_object(const exprt &pointer)
valuet build_reference_to(const exprt &what, const exprt &pointer)
optionalt< mp_integer > pointer_offset_bits(const typet &type, const namespacet &ns)
Base Type Computation.
const typet & subtype() const
Definition: type.h:38
TO_BE_DOCUMENTED.
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
const irep_idt language_mode
language_mode: ID_java, ID_C or another language identifier if we know the source language in use...
std::list< exprt > valuest
Definition: value_sets.h:28
exprt same_object(const exprt &p1, const exprt &p2)
void make_typecast(const typet &_type)
Create a typecast_exprt to the given type.
Definition: expr.cpp:74
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:166
void add(const exprt &expr)
Definition: guard.cpp:43
void set(const irep_namet &name, const irep_idt &value)
Definition: irep.h:286
const bool exclude_null_derefs
Flag indicating whether value_set_dereferencet::dereference should disregard an apparent attempt to d...
bool dereference_type_compare(const typet &object_type, const typet &dereference_type) const
Check if the two types have matching number of ID_pointer levels, with the dereference type eventuall...
bool simplify(exprt &expr, const namespacet &ns)
Return value for build_reference_to; see that method for documentation.
irep_idt byte_extract_id()
bool is_lvalue
Definition: symbol.h:66
Array index operator.
Definition: std_expr.h:1595
static format_containert< T > format(const T &o)
Definition: format.h:35