cprover
string_constraint_generator_comparison.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Generates string constraints for function comparing strings,
4  such as: equals, equalsIgnoreCase, compareTo, hashCode, intern
5 
6 Author: Romain Brenguier, romain.brenguier@diffblue.com
7 
8 \*******************************************************************/
9 
13 
15 #include <util/deprecate.h>
16 
31 {
32  PRECONDITION(f.type()==bool_typet() || f.type().id()==ID_c_bool);
33  PRECONDITION(f.arguments().size() == 2);
34 
37  symbol_exprt eq=fresh_boolean("equal");
38  typecast_exprt tc_eq(eq, f.type());
39 
40  typet index_type=s1.length().type();
41 
42 
43  implies_exprt a1(eq, equal_exprt(s1.length(), s2.length()));
44  lemmas.push_back(a1);
45 
46  symbol_exprt qvar=fresh_univ_index("QA_equal", index_type);
48  qvar, s1.length(), implies_exprt(eq, equal_exprt(s1[qvar], s2[qvar])));
49  constraints.push_back(a2);
50 
52  exprt zero=from_integer(0, index_type);
53  and_exprt bound_witness(
54  binary_relation_exprt(witness, ID_lt, s1.length()),
55  binary_relation_exprt(witness, ID_ge, zero));
56  and_exprt witnessing(bound_witness, notequal_exprt(s1[witness], s2[witness]));
57  and_exprt diff_length(
58  notequal_exprt(s1.length(), s2.length()),
60  implies_exprt a3(not_exprt(eq), or_exprt(diff_length, witnessing));
61  lemmas.push_back(a3);
62 
63  return tc_eq;
64 }
65 
76  exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z)
77 {
78  and_exprt is_upper_case_1(
79  binary_relation_exprt(char_A, ID_le, char1),
80  binary_relation_exprt(char1, ID_le, char_Z));
81  and_exprt is_upper_case_2(
82  binary_relation_exprt(char_A, ID_le, char2),
83  binary_relation_exprt(char2, ID_le, char_Z));
84 
85  // Three possibilities:
86  // p1 : char1=char2
87  // p2 : (is_up1&&'a'-'A'+char1=char2)
88  // p3 : (is_up2&&'a'-'A'+char2=char1)
89  equal_exprt p1(char1, char2);
90  minus_exprt diff(char_a, char_A);
91 
92  // Overflow is not a problem here because is_upper_case conditions
93  // ensure that we are within a safe range.
94  and_exprt p2(is_upper_case_1,
95  equal_exprt(plus_exprt(diff, char1), char2));
96  and_exprt p3(is_upper_case_2,
97  equal_exprt(plus_exprt(diff, char2), char1));
98  return or_exprt(or_exprt(p1, p2), p3);
99 }
100 
116 {
117  PRECONDITION(f.type()==bool_typet() || f.type().id()==ID_c_bool);
118  PRECONDITION(f.arguments().size() == 2);
119  const symbol_exprt eq = fresh_boolean("equal_ignore_case");
122  const typet char_type = s1.content().type().subtype();
123  const exprt char_a = constant_char('a', char_type);
124  const exprt char_A = constant_char('A', char_type);
125  const exprt char_Z = constant_char('Z', char_type);
126  const typet index_type = s1.length().type();
127 
128  const implies_exprt a1(eq, equal_exprt(s1.length(), s2.length()));
129  lemmas.push_back(a1);
130 
131  const symbol_exprt qvar =
132  fresh_univ_index("QA_equal_ignore_case", index_type);
133  const exprt constr2 =
134  character_equals_ignore_case(s1[qvar], s2[qvar], char_a, char_A, char_Z);
135  const string_constraintt a2(qvar, s1.length(), implies_exprt(eq, constr2));
136  constraints.push_back(a2);
137 
138  const symbol_exprt witness =
139  fresh_exist_index("witness_unequal_ignore_case", index_type);
140  const exprt zero = from_integer(0, witness.type());
141  const and_exprt bound_witness(
142  binary_relation_exprt(witness, ID_lt, s1.length()),
143  binary_relation_exprt(witness, ID_ge, zero));
144  const exprt witness_eq = character_equals_ignore_case(
145  s1[witness], s2[witness], char_a, char_A, char_Z);
146  const not_exprt witness_diff(witness_eq);
147  const implies_exprt a3(
148  not_exprt(eq),
149  or_exprt(
150  notequal_exprt(s1.length(), s2.length()),
151  and_exprt(bound_witness, witness_diff)));
152  lemmas.push_back(a3);
153 
154  return typecast_exprt(eq, f.type());
155 }
156 
168 {
169  PRECONDITION(f.arguments().size() == 1);
170  const array_string_exprt str = get_string_expr(f.arguments()[0]);
171  const typet &return_type = f.type();
172  const typet &index_type = str.length().type();
173 
174  auto pair=hash_code_of_string.insert(
175  std::make_pair(str, fresh_symbol("hash", return_type)));
176  const exprt hash = pair.first->second;
177 
178  for(auto it : hash_code_of_string)
179  {
180  const symbol_exprt i = fresh_exist_index("index_hash", index_type);
181  const equal_exprt c1(it.second, hash);
182  const notequal_exprt c2(it.first.length(), str.length());
183  const and_exprt c3(
184  equal_exprt(it.first.length(), str.length()),
185  and_exprt(
186  notequal_exprt(str[i], it.first[i]),
187  and_exprt(str.axiom_for_length_gt(i), axiom_for_is_positive_index(i))));
188  lemmas.push_back(or_exprt(c1, or_exprt(c2, c3)));
189  }
190  return hash;
191 }
192 
214 {
215  PRECONDITION(f.arguments().size() == 2);
216  const typet &return_type=f.type();
217  PRECONDITION(return_type.id() == ID_signedbv);
220  const symbol_exprt res = fresh_symbol("compare_to", return_type);
221  const typet &index_type = s1.length().type();
222 
223  const equal_exprt res_null(res, from_integer(0, return_type));
224  const implies_exprt a1(res_null, equal_exprt(s1.length(), s2.length()));
225  lemmas.push_back(a1);
226 
227  const symbol_exprt i = fresh_univ_index("QA_compare_to", index_type);
228  const string_constraintt a2(
229  i, s1.length(), implies_exprt(res_null, equal_exprt(s1[i], s2[i])));
230  constraints.push_back(a2);
231 
232  const symbol_exprt x = fresh_exist_index("index_compare_to", index_type);
233  const equal_exprt ret_char_diff(
234  res,
235  minus_exprt(
236  typecast_exprt(s1[x], return_type), typecast_exprt(s2[x], return_type)));
237  const equal_exprt ret_length_diff(
238  res,
239  minus_exprt(
240  typecast_exprt(s1.length(), return_type),
241  typecast_exprt(s2.length(), return_type)));
242  const or_exprt guard1(
243  and_exprt(s1.axiom_for_length_le(s2.length()), s1.axiom_for_length_gt(x)),
244  and_exprt(s1.axiom_for_length_ge(s2.length()), s2.axiom_for_length_gt(x)));
245  const and_exprt cond1(ret_char_diff, guard1);
246  const or_exprt guard2(
247  and_exprt(s2.axiom_for_length_gt(s1.length()), s1.axiom_for_has_length(x)),
248  and_exprt(s1.axiom_for_length_gt(s2.length()), s2.axiom_for_has_length(x)));
249  const and_exprt cond2(ret_length_diff, guard2);
250 
251  const implies_exprt a3(
252  not_exprt(res_null),
253  and_exprt(
254  binary_relation_exprt(x, ID_ge, from_integer(0, return_type)),
255  or_exprt(cond1, cond2)));
256  lemmas.push_back(a3);
257 
258  const symbol_exprt i2 = fresh_univ_index("QA_compare_to", index_type);
259  const string_constraintt a4(
260  i2, x, implies_exprt(not_exprt(res_null), equal_exprt(s1[i2], s2[i2])));
261  constraints.push_back(a4);
262 
263  return res;
264 }
265 
271 DEPRECATED("never tested")
274 {
275  PRECONDITION(f.arguments().size() == 1);
276  const array_string_exprt str = get_string_expr(f.arguments()[0]);
277  // For now we only enforce content equality and not pointer equality
278  const typet &return_type=f.type();
279  const typet index_type = str.length().type();
280 
281  auto pair=intern_of_string.insert(
282  std::make_pair(str, fresh_symbol("pool", return_type)));
283  const symbol_exprt intern = pair.first->second;
284 
285  // intern(str)=s_0 || s_1 || ...
286  // for each string s.
287  // intern(str)=intern(s) || |str|!=|s|
288  // || (|str|==|s| &&exists i<|s|. s[i]!=str[i])
289 
290  exprt::operandst disj;
291  for(auto it : intern_of_string)
292  disj.push_back(equal_exprt(intern, it.second));
293  lemmas.push_back(disjunction(disj));
294 
295  // WARNING: the specification may be incomplete or incorrect
296  for(auto it : intern_of_string)
297  if(it.second!=str)
298  {
299  symbol_exprt i=fresh_exist_index("index_intern", index_type);
300  lemmas.push_back(
301  or_exprt(
302  equal_exprt(it.second, intern),
303  or_exprt(
304  notequal_exprt(str.length(), it.first.length()),
305  and_exprt(
306  equal_exprt(str.length(), it.first.length()),
307  and_exprt(
308  notequal_exprt(str[i], it.first[i]),
309  and_exprt(
310  str.axiom_for_length_gt(i),
311  axiom_for_is_positive_index(i)))))));
312  }
313 
314  return intern;
315 }
exprt add_axioms_for_compare_to(const function_application_exprt &f)
Lexicographic comparison of two strings.
The type of an expression.
Definition: type.h:22
Boolean negation.
Definition: std_expr.h:3230
semantic type conversion
Definition: std_expr.h:2111
Generates string constraints to link results from string functions with their arguments.
A generic base class for relations, i.e., binary predicates.
Definition: std_expr.h:752
application of (mathematical) function
Definition: std_expr.h:4531
boolean OR
Definition: std_expr.h:2391
exprt add_axioms_for_equals(const function_application_exprt &f)
Equality of the content of two strings.
std::map< array_string_exprt, exprt > hash_code_of_string
argumentst & arguments()
Definition: std_expr.h:4564
symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type)
generate an index symbol to be used as an universaly quantified variable
typet & type()
Definition: expr.h:56
The proper Booleans.
Definition: std_types.h:34
boolean implication
Definition: std_expr.h:2339
equality
Definition: std_expr.h:1354
symbol_exprt fresh_boolean(const irep_idt &prefix)
generate a Boolean symbol which is existentially quantified
const irep_idt & id() const
Definition: irep.h:189
DEPRECATED("use instrument_cover_goals(goto_programt &goto_program," "const cover_instrumenterst &instrumenters," "message_handlert &message_handler, const irep_idt mode) instead") void instrument_cover_goals(const symbol_tablet &symbol_table
Instruments goto program for a given coverage criterion.
exprt add_axioms_for_hash_code(const function_application_exprt &f)
Value that is identical for strings with the same content.
boolean AND
Definition: std_expr.h:2255
inequality
Definition: std_expr.h:1406
#define PRECONDITION(CONDITION)
Definition: invariant.h:230
exprt add_axioms_for_equals_ignore_case(const function_application_exprt &f)
Equality of the content ignoring case of characters.
The plus expression.
Definition: std_expr.h:893
bitvector_typet index_type()
Definition: c_types.cpp:16
exprt disjunction(const exprt::operandst &op)
Definition: std_expr.cpp:24
std::vector< exprt > operandst
Definition: expr.h:45
static constant_exprt constant_char(int i, const typet &char_type)
generate constant character expression with character type.
symbol_exprt fresh_exist_index(const irep_idt &prefix, const typet &type)
generate an index symbol which is existentially quantified
Base class for all expressions.
Definition: expr.h:42
Universally quantified string constraint
static exprt character_equals_ignore_case(exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z)
Returns an expression which is true when the two given characters are equal when ignoring case for AS...
binary minus
Definition: std_expr.h:959
Expression to hold a symbol (variable)
Definition: std_expr.h:90
int8_t s1
Definition: bytecode_info.h:59
int16_t s2
Definition: bytecode_info.h:60
const typet & subtype() const
Definition: type.h:33
std::vector< string_constraintt > constraints
std::map< string_not_contains_constraintt, symbol_exprt > witness
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
bitvector_typet char_type()
Definition: c_types.cpp:114
array_string_exprt get_string_expr(const exprt &expr)
casts an expression to a string expression, or fetches the actual string_exprt in the case of a symbo...
exprt axiom_for_is_positive_index(const exprt &x)
expression true exactly when the index is positive