cprover
string_constraint_generator.h
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Generates string constraints to link results from string functions
4  with their arguments. This is inspired by the PASS paper at HVC'13:
5  "PASS: String Solving with Parameterized Array and Interval Automaton"
6  by Guodong Li and Indradeep Ghosh, which gives examples of constraints
7  for several functions.
8 
9 Author: Romain Brenguier, romain.brenguier@diffblue.com
10 
11 \*******************************************************************/
12 
19 
20 #ifndef CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H
21 #define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H
22 
23 #include <util/string_expr.h>
26 
28 {
29 public:
30  // This module keeps a list of axioms. It has methods which generate
31  // string constraints for different string funcitons and add them
32  // to the axiom list.
33 
35  mode(ID_unknown)
36  { }
37 
38  void set_mode(irep_idt _mode)
39  {
40  // only C and java modes supported
41  assert((_mode==ID_java) || (_mode==ID_C));
42  mode=_mode;
43  }
44 
45  irep_idt &get_mode() { return mode; }
46 
47  // Axioms are of three kinds: universally quantified string constraint,
48  // not contains string constraints and simple formulas.
49  std::list<exprt> axioms;
50 
51  // Boolean symbols for the results of some string functions
52  std::list<symbol_exprt> boolean_symbols;
53 
54  // Symbols used in existential quantifications
55  std::list<symbol_exprt> index_symbols;
56 
57  // Used to store information about witnesses for not_contains constraints
58  std::map<string_not_contains_constraintt, symbol_exprt> witness;
59 
62  const exprt &univ_val) const
63  {
64  return index_exprt(witness.at(c), univ_val);
65  }
66 
67  static unsigned next_symbol_id;
68 
70  const irep_idt &prefix, const typet &type=bool_typet());
71  symbol_exprt fresh_exist_index(const irep_idt &prefix, const typet &type);
72  symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type);
73  symbol_exprt fresh_boolean(const irep_idt &prefix);
75 
76  // We maintain a map from symbols to strings.
77  std::map<irep_idt, string_exprt> symbol_to_string;
78 
80 
82  const symbol_exprt &sym, const string_exprt &expr)
83  {
84  symbol_to_string[sym.get_identifier()]=expr;
85  }
86 
89  const symbol_exprt &sym, const exprt &str);
90 
92  const function_application_exprt &expr);
93 
94  static constant_exprt constant_char(int i, const typet &char_type);
95 
96 private:
97  // The integer with the longest string is Integer.MIN_VALUE which is -2^31,
98  // that is -2147483648 so takes 11 characters to write.
99  // The long with the longest string is Long.MIN_VALUE which is -2^63,
100  // approximately -9.223372037*10^18 so takes 20 characters to write.
101  const std::size_t MAX_INTEGER_LENGTH=11;
102  const std::size_t MAX_LONG_LENGTH=20;
103  const std::size_t MAX_FLOAT_LENGTH=15;
104  const std::size_t MAX_DOUBLE_LENGTH=30;
105 
106  static irep_idt extract_java_string(const symbol_exprt &s);
107 
109 
110  // The following functions add axioms for the returned value
111  // to be equal to the result of the function given as argument.
112  // They are not accessed directly from other classes: they call
113  // `add_axioms_for_function_application` which determines which of
114  // these methods should be called.
115 
122 
123  // Add axioms corresponding to the String.hashCode java function
124  // The specification is partial: the actual value is not actually computed
125  // but we ensure that hash codes of equal strings are equal.
127 
130  const string_exprt &prefix, const string_exprt &str, const exprt &offset);
132  const function_application_exprt &f, bool swap_arguments=false);
134  const function_application_exprt &f, bool swap_arguments=false);
140  const string_exprt &s1, const string_exprt &s2);
147  const function_application_exprt &f);
150  const function_application_exprt &f);
152  irep_idt sval, const refined_string_typet &ref_type);
154  const string_exprt &str, const exprt &start, const exprt &end);
157  const function_application_exprt &expr);
159  const string_exprt &s1, const string_exprt &s2, const exprt &offset);
166  const function_application_exprt &f);
169  const function_application_exprt &f);
173  const exprt &i, size_t max_size, const refined_string_typet &ref_type);
175  const exprt &i, const refined_string_typet &ref_type);
178  string_exprt add_axioms_from_long(const exprt &i, size_t max_size);
181  const exprt &i, const refined_string_typet &ref_type);
184  const exprt &i, const refined_string_typet &ref_type);
187  const exprt &length,
188  const exprt &data,
189  const exprt &offset,
190  const exprt &count);
192  const string_exprt &str,
193  const exprt &c,
194  const exprt &from_index);
195 
196  // Add axioms corresponding to the String.indexOf:(String;I) java function
197  // TODO: the specifications are only partial:
198  // we add axioms stating that the returned value is either -1 or greater than
199  // from_index and the string beggining there has prefix substring
201  const string_exprt &str,
202  const string_exprt &substring,
203  const exprt &from_index);
204 
205  // Add axioms corresponding to the String.indexOf java functions
206  // TODO: the specifications are only partial for the ones that look for
207  // strings
209 
210  // Add axioms corresponding to the String.lastIndexOf:(String;I) java function
211  // TODO: the specifications are only partial
213  const string_exprt &str,
214  const string_exprt &substring,
215  const exprt &from_index);
216 
217  // Add axioms corresponding to the String.lastIndexOf:(CI) java function
219  const string_exprt &str,
220  const exprt &c,
221  const exprt &from_index);
222 
223  // Add axioms corresponding to the String.lastIndexOf java functions
224  // TODO: the specifications is only partial
226 
227  // TODO: the specifications of these functions is only partial
228  // We currently only specify that the string for NaN is "NaN", for infinity
229  // and minus infinity the string are "Infinity" and "-Infinity respectively
230  // otherwise the string contains only characters in [0123456789.] and '-' at
231  // the start for negative number
234  const exprt &f, bool double_precision=false);
235 
236  // Add axioms corresponding to the String.valueOf(D) java function
237  // TODO: the specifications is only partial
239 
242 
243  // TODO: the specification may not be correct for the case where the
244  // string is shorter than end. An actual java program should throw an
245  // exception in that case
247  const string_exprt &str, const exprt &start, const exprt &end);
249 
251  const function_application_exprt &expr);
253  const function_application_exprt &expr);
255 
256  // Add axioms corresponding to the String.valueOf([CII) function
257  // TODO: not working correctly at the moment
259 
261  const exprt &code_point, const refined_string_typet &ref_type);
265 
266  // Add axioms corresponding the String.codePointCount java function
267  // TODO: this function is underspecified, we do not compute the exact value
268  // but over approximate it.
270 
271  // Add axioms corresponding the String.offsetByCodePointCount java function
272  // TODO: this function is underspecified, it should return the index within
273  // this String that is offset from the given first argument by second argument
274  // code points and we approximate this by saying the result is
275  // between index + offset and index + 2 * offset
277  const function_application_exprt &f);
278 
282 
283  // Add axioms corresponding to the String.intern java function
284  // TODO: this does not work at the moment because of the way we treat
285  // string pointers
287 
288  // Tells which language is used. C and Java are supported
290 
291  // assert that the number of argument is equal to nb and extract them
293  const function_application_exprt &expr, size_t nb)
294  {
296  assert(args.size()==nb);
297  return args;
298  }
299 
300  exprt int_of_hex_char(const exprt &chr) const;
301  exprt is_high_surrogate(const exprt &chr) const;
302  exprt is_low_surrogate(const exprt &chr) const;
304  exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z);
305 
306  // Pool used for the intern method
307  std::map<string_exprt, symbol_exprt> pool;
308 
309  // Used to determine whether hashcode should be equal
310  std::map<string_exprt, symbol_exprt> hash;
311 };
312 
313 #endif
exprt add_axioms_for_compare_to(const function_application_exprt &f)
add axioms corresponding to the String.compareTo java function
The type of an expression.
Definition: type.h:20
string_exprt add_axioms_for_insert_char(const function_application_exprt &f)
add axioms corresponding to the StringBuilder.insert(C) java function
string_exprt add_axioms_for_insert_int(const function_application_exprt &f)
add axioms corresponding to the StringBuilder.insert(I) java function
string_exprt add_axioms_from_literal(const function_application_exprt &f)
add axioms to say that the returned string expression is equal to the string literal ...
exprt add_axioms_for_is_prefix(const string_exprt &prefix, const string_exprt &str, const exprt &offset)
add axioms stating that the returned expression is true exactly when the first string is a prefix of ...
string_exprt add_axioms_for_java_char_array(const exprt &char_array)
add axioms corresponding to the String.valueOf([C) java function
void assign_to_symbol(const symbol_exprt &sym, const string_exprt &expr)
exprt add_axioms_for_code_point_at(const function_application_exprt &f)
add axioms corresponding to the String.codePointAt java function
string_exprt add_axioms_for_concat_double(const function_application_exprt &f)
add axioms corresponding to the StringBuilder.append(D) java function
std::list< symbol_exprt > boolean_symbols
string_exprt add_axioms_for_delete(const string_exprt &str, const exprt &start, const exprt &end)
add axioms stating that the returned string corresponds to the input one where we removed characters ...
string_exprt add_axioms_for_delete_char_at(const function_application_exprt &expr)
add axioms corresponding to the StringBuilder.deleteCharAt java function
application of (mathematical) function
Definition: std_expr.h:3785
exprt get_witness_of(const string_not_contains_constraintt &c, const exprt &univ_val) const
string_exprt add_axioms_from_bool(const function_application_exprt &f)
add axioms corresponding to the String.valueOf(Z) java function
string_exprt add_axioms_for_concat_int(const function_application_exprt &f)
add axioms corresponding to the StringBuilder.append(I) java function
exprt add_axioms_for_equals(const function_application_exprt &f)
add axioms stating that the result is true exactly when the strings represented by the arguments are ...
exprt add_axioms_for_length(const function_application_exprt &f)
add axioms corresponding to the String.length java function
std::map< string_exprt, symbol_exprt > pool
Type for string expressions used by the string solver.
const irep_idt & get_identifier() const
Definition: std_expr.h:120
string_exprt add_axioms_for_copy(const function_application_exprt &f)
add axioms to say that the returned string expression is equal to the argument of the function applic...
string_exprt add_axioms_for_insert_float(const function_application_exprt &f)
add axioms corresponding to the StringBuilder.insert(F) java function
argumentst & arguments()
Definition: std_expr.h:3805
string_exprt add_axioms_for_concat_code_point(const function_application_exprt &f)
Add axioms corresponding to the StringBuilder.appendCodePoint(I) function.
static symbol_exprt fresh_symbol(const irep_idt &prefix, const typet &type=bool_typet())
generate a new symbol expression of the given type with some prefix
exprt add_axioms_for_contains(const function_application_exprt &f)
add axioms corresponding to the String.contains java function
exprt add_axioms_for_code_point_count(const function_application_exprt &f)
add axioms giving approximate bounds on the result of the String.codePointCount java function ...
exprt add_axioms_for_offset_by_code_point(const function_application_exprt &f)
add axioms giving approximate bounds on the result of the String.offsetByCodePointCount java function...
symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type)
generate an index symbol to be used as an universaly quantified variable
string_exprt add_axioms_from_double(const function_application_exprt &f)
add axioms corresponding to the String.valueOf(D) java function
string_exprt add_axioms_for_insert_long(const function_application_exprt &f)
add axioms corresponding to the StringBuilder.insert(J) java function
The trinary if-then-else operator.
Definition: std_expr.h:2771
string_exprt add_axioms_from_long(const function_application_exprt &f)
add axioms corresponding to the String.valueOf(J) java function
The proper Booleans.
Definition: std_types.h:33
string_exprt add_axioms_for_trim(const function_application_exprt &expr)
add axioms corresponding to the String.trim java function
A constant literal expression.
Definition: std_expr.h:3685
exprt add_axioms_for_is_empty(const function_application_exprt &f)
add axioms stating that the returned value is true exactly when the argument string is empty ...
exprt add_axioms_for_last_index_of_string(const string_exprt &str, const string_exprt &substring, const exprt &from_index)
string_exprt add_axioms_for_empty_string(const function_application_exprt &f)
add axioms to say that the returned string expression is empty
string_exprt add_axioms_for_if(const if_exprt &expr)
add axioms for an if expression which should return a string
string_exprt add_axioms_for_concat_float(const function_application_exprt &f)
add axioms corresponding to the StringBuilder.append(F) java function
exprt add_axioms_for_index_of(const string_exprt &str, const exprt &c, const exprt &from_index)
add axioms that the returned value is either -1 or greater than from_index and the character at that ...
exprt add_axioms_for_last_index_of(const string_exprt &str, const exprt &c, const exprt &from_index)
exprt add_axioms_for_function_application(const function_application_exprt &expr)
strings contained in this call are converted to objects of type string_exprt, through adding axioms...
symbol_exprt fresh_boolean(const irep_idt &prefix)
generate a Boolean symbol which is existentially quantified
string_exprt add_axioms_for_insert_double(const function_application_exprt &f)
add axioms corresponding to the StringBuilder.insert(D) java function
std::map< irep_idt, string_exprt > symbol_to_string
string_exprt fresh_string(const refined_string_typet &type)
construct a string expression whose length and content are new variables
string_exprt add_axioms_for_char_set(const function_application_exprt &expr)
add axioms corresponding stating that the result is similar to that of the StringBuilder.setCharAt java function Warning: this may be underspecified in the case wher the index exceed the length of the string
string_exprt add_axioms_for_concat_bool(const function_application_exprt &f)
add axioms corresponding to the StringBuilder.append(Z) java function
Defines string constraints.
exprt add_axioms_for_hash_code(const function_application_exprt &f)
add axioms stating that if two strings are equal then their hash codes are equals ...
string_exprt add_axioms_from_float(const function_application_exprt &f)
add axioms corresponding to the String.valueOf(F) java function
exprt::operandst argumentst
Definition: std_expr.h:3803
string_exprt add_axioms_for_value_of(const function_application_exprt &f)
add axioms corresponding to the String.valueOf([C) and String.valueOf([CII) functions ...
string_exprt add_axioms_for_string_expr(const exprt &expr)
obtain a refined string expression corresponding to string variable of string function call ...
symbol_exprt add_axioms_for_intern(const function_application_exprt &f)
add axioms stating that the return value for two equal string should be the same
string_exprt add_axioms_for_insert_bool(const function_application_exprt &f)
add axioms corresponding to the StringBuilder.insert(Z) java function
string_exprt add_axioms_for_concat(const string_exprt &s1, const string_exprt &s2)
add axioms to say that the returned string expression is equal to the concatenation of the two string...
exprt add_axioms_for_parse_int(const function_application_exprt &f)
add axioms corresponding to the Integer.parseInt java function
exprt add_axioms_for_is_suffix(const function_application_exprt &f, bool swap_arguments=false)
add axioms corresponding to the String.isSuffix java function
string_exprt find_or_add_string_of_symbol(const symbol_exprt &sym)
if a symbol represent a string is present in the symbol_to_string table, returns the corresponding st...
std::map< string_exprt, symbol_exprt > hash
exprt add_axioms_for_equals_ignore_case(const function_application_exprt &f)
add axioms corresponding to the String.equalsIgnoreCase java function
string_exprt add_axioms_for_insert(const string_exprt &s1, const string_exprt &s2, const exprt &offset)
add axioms stating that the result correspond to the first string where we inserted the second one at...
exprt add_axioms_for_char_literal(const function_application_exprt &f)
add axioms stating that the returned value is equal to the argument
exprt is_high_surrogate(const exprt &chr) const
the output is true when the character is a high surrogate for UTF-16 encoding, see https://en...
string_exprt add_axioms_from_char(const function_application_exprt &f)
add axioms corresponding to the String.valueOf(C) java function
String expressions for the string solver.
exprt add_axioms_for_to_char_array(const function_application_exprt &f)
add axioms corresponding to the String.toCharArray java function
static constant_exprt constant_char(int i, const typet &char_type)
generate constant character expression with character type.
symbol_exprt fresh_exist_index(const irep_idt &prefix, const typet &type)
generate an index symbol which is existentially quantified
void set_string_symbol_equal_to_expr(const symbol_exprt &sym, const exprt &str)
add a correspondence to make sure the symbol points to the same string as the second argument ...
string_exprt add_axioms_for_constant(irep_idt sval, const refined_string_typet &ref_type)
add axioms saying the returned string expression should be equal to the string constant ...
string_exprt add_axioms_for_to_lower_case(const function_application_exprt &expr)
add axioms corresponding to the String.toLowerCase java function
exprt add_axioms_for_char_at(const function_application_exprt &f)
add axioms stating that the character of the string at the given position is equal to the returned va...
Base class for all expressions.
Definition: expr.h:46
exprt is_low_surrogate(const exprt &chr) const
the output is true when the character is a low surrogate for UTF-16 encoding, see https://en...
string_exprt add_axioms_for_to_upper_case(const function_application_exprt &expr)
add axioms corresponding to the String.toUpperCase java function
string_exprt add_axioms_for_set_length(const function_application_exprt &f)
add axioms to say that the returned string expression has length given by the second argument and who...
exprt int_of_hex_char(const exprt &chr) const
returns the value represented by the character
std::list< symbol_exprt > index_symbols
exprt add_axioms_for_code_point_before(const function_application_exprt &f)
add axioms corresponding to the String.codePointBefore java function
static exprt character_equals_ignore_case(exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z)
returns an expression which is true when the two given characters are equal when ignoring case for AS...
string_exprt add_axioms_from_int(const function_application_exprt &f)
add axioms corresponding to the String.valueOf(I) java function
string_exprt add_axioms_for_replace(const function_application_exprt &f)
add axioms corresponding to the String.replace java function
Expression to hold a symbol (variable)
Definition: std_expr.h:82
string_exprt add_axioms_from_int_hex(const exprt &i, const refined_string_typet &ref_type)
add axioms stating that the returned string corresponds to the integer argument written in hexadecima...
int8_t s1
Definition: bytecode_info.h:59
string_exprt add_axioms_for_concat_long(const function_application_exprt &f)
Add axioms corresponding to the StringBuilder.append(J) java function.
string_exprt add_axioms_for_concat_char(const function_application_exprt &f)
add axioms corresponding to the StringBuilder.append(C) java function
int16_t s2
Definition: bytecode_info.h:60
exprt add_axioms_for_index_of_string(const string_exprt &str, const string_exprt &substring, const exprt &from_index)
add axioms stating that the returned value is either -1 or greater than from_index and the string beg...
std::map< string_not_contains_constraintt, symbol_exprt > witness
string_exprt add_axioms_for_code_point(const exprt &code_point, const refined_string_typet &ref_type)
static const function_application_exprt::argumentst & args(const function_application_exprt &expr, size_t nb)
bitvector_typet char_type()
Definition: c_types.cpp:113
string_exprt add_axioms_for_insert_char_array(const function_application_exprt &f)
add axioms corresponding to the StringBuilder.insert:(I[CII) and StringBuilder.insert:(I[C) java func...
Definition: kdev_t.h:24
static irep_idt extract_java_string(const symbol_exprt &s)
extract java string from symbol expression when they are encoded inside the symbol name ...
string_exprt add_axioms_from_char_array(const function_application_exprt &f)
add axioms corresponding to the String.
array index operator
Definition: std_expr.h:1170
exprt axiom_for_is_positive_index(const exprt &x)
expression true exactly when the index is positive
string_exprt add_axioms_for_substring(const string_exprt &str, const exprt &start, const exprt &end)
add axioms stating that the returned string expression is equal to the input one starting at start an...