cprover
string_constraint_generator_transformation.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Generates string constraints for string transformations,
4  that is, functions taking one string and returning another
5 
6 Author: Romain Brenguier, romain.brenguier@diffblue.com
7 
8 \*******************************************************************/
9 
13 
16 #include <util/arith_tools.h>
17 
37 {
38  PRECONDITION(f.arguments().size() == 4);
39  const array_string_exprt res =
42  const exprt &k = f.arguments()[3];
43  const typet &index_type = s1.length().type();
44  const typet &char_type = s1.content().type().subtype();
45 
46  // We add axioms:
47  // a1 : |res|=k
48  // a2 : forall i< min(|s1|, k) .res[i] = s1[i]
49  // a3 : forall |s1| <= i < |res|. res[i] = 0
50 
51  lemmas.push_back(res.axiom_for_has_length(k));
52 
53  const symbol_exprt idx = fresh_univ_index("QA_index_set_length", index_type);
54  const string_constraintt a2(
55  idx, minimum(s1.length(), k), equal_exprt(s1[idx], res[idx]));
56  constraints.push_back(a2);
57 
58  symbol_exprt idx2 = fresh_univ_index("QA_index_set_length2", index_type);
60  idx2,
61  s1.length(),
62  res.length(),
63  equal_exprt(res[idx2], constant_char(0, char_type)));
64  constraints.push_back(a3);
65 
66  return from_integer(0, signedbv_typet(32));
67 }
68 
76 // NOLINTNEXTLINE
90 {
92  PRECONDITION(args.size() == 4 || args.size() == 5);
93  const array_string_exprt str = get_string_expr(args[2]);
94  const array_string_exprt res = char_array_of_pointer(args[1], args[0]);
95  const exprt &i = args[3];
96  const exprt j = args.size() == 5 ? args[4] : str.length();
97  return add_axioms_for_substring(res, str, i, j);
98 }
99 
115  const array_string_exprt &res,
116  const array_string_exprt &str,
117  const exprt &start,
118  const exprt &end)
119 {
120  const typet &index_type = str.length().type();
121  PRECONDITION(start.type()==index_type);
122  PRECONDITION(end.type()==index_type);
123 
124  const exprt start1 = maximum(start, from_integer(0, start.type()));
125  const exprt end1 = maximum(minimum(end, str.length()), start1);
126 
127  // Axiom 1.
128  lemmas.push_back(equal_exprt(res.length(), minus_exprt(end1, start1)));
129 
130  // Axiom 2.
131  constraints.push_back([&] {
132  const symbol_exprt idx = fresh_univ_index("QA_index_substring", index_type);
133  return string_constraintt(
134  idx, res.length(), equal_exprt(res[idx], str[plus_exprt(start1, idx)]));
135  }());
136 
137  return from_integer(0, signedbv_typet(32));
138 }
139 
168 {
169  PRECONDITION(f.arguments().size() == 3);
170  const array_string_exprt &str = get_string_expr(f.arguments()[2]);
171  const array_string_exprt &res =
172  char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
173  const typet &index_type = str.length().type();
174  const typet &char_type = str.content().type().subtype();
175  const symbol_exprt idx = fresh_exist_index("index_trim", index_type);
176  const exprt space_char = from_integer(' ', char_type);
177 
178  // Axiom 1.
179  lemmas.push_back(str.axiom_for_length_ge(plus_exprt(idx, res.length())));
180 
181  binary_relation_exprt a2(idx, ID_ge, from_integer(0, index_type));
182  lemmas.push_back(a2);
183 
184  exprt a3=str.axiom_for_length_ge(idx);
185  lemmas.push_back(a3);
186 
187  exprt a4=res.axiom_for_length_ge(
189  lemmas.push_back(a4);
190 
191  exprt a5 = res.axiom_for_length_le(str.length());
192  lemmas.push_back(a5);
193 
194  symbol_exprt n=fresh_univ_index("QA_index_trim", index_type);
195  binary_relation_exprt non_print(str[n], ID_le, space_char);
196  string_constraintt a6(n, idx, non_print);
197  constraints.push_back(a6);
198 
199  // Axiom 7.
200  constraints.push_back([&] {
201  const symbol_exprt n2 = fresh_univ_index("QA_index_trim2", index_type);
202  const minus_exprt bound(minus_exprt(str.length(), idx), res.length());
203  const binary_relation_exprt eqn2(
204  str[plus_exprt(idx, plus_exprt(res.length(), n2))], ID_le, space_char);
205  return string_constraintt(n2, bound, eqn2);
206  }());
207 
208  symbol_exprt n3=fresh_univ_index("QA_index_trim3", index_type);
209  equal_exprt eqn3(res[n3], str[plus_exprt(n3, idx)]);
210  string_constraintt a8(n3, res.length(), eqn3);
211  constraints.push_back(a8);
212 
213  // Axiom 9.
214  lemmas.push_back([&] {
215  const plus_exprt index_before(
216  idx, minus_exprt(res.length(), from_integer(1, index_type)));
217  const binary_relation_exprt no_space_before(
218  str[index_before], ID_gt, space_char);
219  return or_exprt(
220  equal_exprt(idx, str.length()),
221  and_exprt(
222  binary_relation_exprt(str[idx], ID_gt, space_char), no_space_before));
223  }());
224  return from_integer(0, f.type());
225 }
226 
245 {
246  PRECONDITION(f.arguments().size() == 3);
247  const array_string_exprt res =
248  char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
249  const array_string_exprt str = get_string_expr(f.arguments()[2]);
250  const refined_string_typet &ref_type =
251  to_refined_string_type(f.arguments()[2].type());
252  const typet &char_type=ref_type.get_char_type();
253  const typet &index_type=ref_type.get_index_type();
254  const exprt char_A=constant_char('A', char_type);
255  const exprt char_Z=constant_char('Z', char_type);
256 
257  // \todo for now, only characters in Basic Latin and Latin-1 supplement
258  // are supported (up to 0x100), we should add others using case mapping
259  // information from the UnicodeData file.
260 
261  equal_exprt a1(res.length(), str.length());
262  lemmas.push_back(a1);
263 
264  symbol_exprt idx=fresh_univ_index("QA_lower_case", index_type);
265  exprt::operandst upper_case;
266  // Characters between 'A' and 'Z' are upper-case
267  upper_case.push_back(and_exprt(
268  binary_relation_exprt(char_A, ID_le, str[idx]),
269  binary_relation_exprt(str[idx], ID_le, char_Z)));
270 
271  // Characters between 0xc0 (latin capital A with grave)
272  // and 0xd6 (latin capital O with diaeresis) are upper-case
273  upper_case.push_back(and_exprt(
274  binary_relation_exprt(from_integer(0xc0, char_type), ID_le, str[idx]),
275  binary_relation_exprt(str[idx], ID_le, from_integer(0xd6, char_type))));
276 
277  // Characters between 0xd8 (latin capital O with stroke)
278  // and 0xde (latin capital thorn) are upper-case
279  upper_case.push_back(and_exprt(
280  binary_relation_exprt(from_integer(0xd8, char_type), ID_le, str[idx]),
281  binary_relation_exprt(str[idx], ID_le, from_integer(0xde, char_type))));
282 
283  exprt is_upper_case=disjunction(upper_case);
284 
285  // The difference between upper-case and lower-case for the basic latin and
286  // latin-1 supplement is 0x20.
287  exprt diff=from_integer(0x20, char_type);
288  equal_exprt converted(res[idx], plus_exprt(str[idx], diff));
289  and_exprt non_converted(
290  equal_exprt(res[idx], str[idx]),
291  binary_relation_exprt(str[idx], ID_lt, from_integer(0x100, char_type)));
292  if_exprt conditional_convert(is_upper_case, converted, non_converted);
293 
294  string_constraintt a2(idx, res.length(), conditional_convert);
295  constraints.push_back(a2);
296 
297  return from_integer(0, f.type());
298 }
299 
319  const array_string_exprt &res,
320  const array_string_exprt &str)
321 {
322  const typet &char_type = str.content().type().subtype();
323  const typet &index_type = str.length().type();
324  exprt char_a=constant_char('a', char_type);
325  exprt char_A=constant_char('A', char_type);
326  exprt char_z=constant_char('z', char_type);
327 
328  // \todo Add support for locales using case mapping information
329  // from the UnicodeData file.
330 
331  equal_exprt a1(res.length(), str.length());
332  lemmas.push_back(a1);
333 
334  symbol_exprt idx1=fresh_univ_index("QA_upper_case1", index_type);
335  const and_exprt is_lower_case(
336  binary_relation_exprt(char_a, ID_le, str[idx1]),
337  binary_relation_exprt(str[idx1], ID_le, char_z));
338  minus_exprt diff(char_A, char_a);
339  equal_exprt convert(res[idx1], plus_exprt(str[idx1], diff));
340  implies_exprt body1(is_lower_case, convert);
341  string_constraintt a2(idx1, res.length(), body1);
342  constraints.push_back(a2);
343 
344  symbol_exprt idx2=fresh_univ_index("QA_upper_case2", index_type);
345  const not_exprt is_not_lower_case(
346  and_exprt(
347  binary_relation_exprt(char_a, ID_le, str[idx2]),
348  binary_relation_exprt(str[idx2], ID_le, char_z)));
349  equal_exprt eq(res[idx2], str[idx2]);
350  implies_exprt body2(is_not_lower_case, eq);
351  string_constraintt a3(idx2, res.length(), body2);
352  constraints.push_back(a3);
353  return from_integer(0, signedbv_typet(32));
354 }
355 
358 // NOLINTNEXTLINE
360 // NOLINTNEXTLINE
369 {
370  PRECONDITION(f.arguments().size() == 3);
371  array_string_exprt res =
372  char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
374  return add_axioms_for_to_upper_case(res, str);
375 }
376 
393 {
394  PRECONDITION(f.arguments().size() == 5);
395  const array_string_exprt str = get_string_expr(f.arguments()[2]);
396  const array_string_exprt res =
397  char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
398  const exprt &position = f.arguments()[3];
399  const exprt &character = f.arguments()[4];
400 
401  const binary_relation_exprt out_of_bounds(position, ID_ge, str.length());
402  const equal_exprt a1(res.length(), str.length());
403  lemmas.push_back(a1);
404  const equal_exprt a2(res[position], character);
405  lemmas.push_back(a2);
406 
407  const symbol_exprt q = fresh_univ_index("QA_char_set", position.type());
408  const equal_exprt a3_body(res[q], str[q]);
409  const string_constraintt a3(q, minimum(res.length(), position), a3_body);
410  constraints.push_back(a3);
411 
412  const symbol_exprt q2 = fresh_univ_index("QA_char_set2", position.type());
413  const plus_exprt lower_bound(position, from_integer(1, position.type()));
414  const equal_exprt a4_body(res[q2], str[q2]);
415  const string_constraintt a4(q2, lower_bound, res.length(), a4_body);
416  constraints.push_back(a4);
417 
418  return if_exprt(
419  out_of_bounds, from_integer(1, f.type()), from_integer(0, f.type()));
420 }
421 
430  exprt expr1,
431  exprt expr2,
432  std::function<array_string_exprt(const exprt &)> get_string_expr)
433 {
434  if((expr1.type().id()==ID_unsignedbv
435  || expr1.type().id()==ID_char)
436  && (expr2.type().id()==ID_char
437  || expr2.type().id()==ID_unsignedbv))
438  return std::make_pair(expr1, expr2);
439  const auto expr1_str = get_string_expr(expr1);
440  const auto expr2_str = get_string_expr(expr2);
441  const auto expr1_length = numeric_cast<std::size_t>(expr1_str.length());
442  const auto expr2_length = numeric_cast<std::size_t>(expr2_str.length());
443  if(expr1_length && expr2_length && *expr1_length==1 && *expr2_length==1)
444  return std::make_pair(exprt(expr1_str[0]), exprt(expr2_str[0]));
445  return { };
446 }
447 
469 {
470  PRECONDITION(f.arguments().size() == 5);
472  array_string_exprt res =
473  char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
474  if(
475  const auto maybe_chars =
476  to_char_pair(f.arguments()[3], f.arguments()[4], [this](const exprt &e) {
477  return get_string_expr(e);
478  }))
479  {
480  const auto old_char=maybe_chars->first;
481  const auto new_char=maybe_chars->second;
482 
483  lemmas.push_back(equal_exprt(res.length(), str.length()));
484 
485  symbol_exprt qvar = fresh_univ_index("QA_replace", str.length().type());
486  implies_exprt case1(
487  equal_exprt(str[qvar], old_char),
488  equal_exprt(res[qvar], new_char));
489  implies_exprt case2(
490  not_exprt(equal_exprt(str[qvar], old_char)),
491  equal_exprt(res[qvar], str[qvar]));
492  string_constraintt a2(qvar, res.length(), and_exprt(case1, case2));
493  constraints.push_back(a2);
494  return from_integer(0, f.type());
495  }
496  return from_integer(1, f.type());
497 }
498 
505 {
506  PRECONDITION(f.arguments().size() == 4);
507  const array_string_exprt res =
508  char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
509  const array_string_exprt str = get_string_expr(f.arguments()[2]);
510  exprt index_one=from_integer(1, str.length().type());
511  return add_axioms_for_delete(
512  res, str, f.arguments()[3], plus_exprt(f.arguments()[3], index_one));
513 }
514 
530  const array_string_exprt &res,
531  const array_string_exprt &str,
532  const exprt &start,
533  const exprt &end)
534 {
535  PRECONDITION(start.type()==str.length().type());
536  PRECONDITION(end.type()==str.length().type());
537  const typet &index_type = str.length().type();
538  const typet &char_type = str.content().type().subtype();
541  const exprt return_code1 = add_axioms_for_substring(
542  sub1, str, from_integer(0, str.length().type()), start);
543  const exprt return_code2 =
544  add_axioms_for_substring(sub2, str, end, str.length());
545  const exprt return_code3 = add_axioms_for_concat(res, sub1, sub2);
546  return bitor_exprt(return_code1, bitor_exprt(return_code2, return_code3));
547 }
548 
551 // NOLINTNEXTLINE
553 // NOLINTNEXTLINE
563 {
564  PRECONDITION(f.arguments().size() == 5);
565  const array_string_exprt res =
566  char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
567  const array_string_exprt arg = get_string_expr(f.arguments()[2]);
568  return add_axioms_for_delete(res, arg, f.arguments()[3], f.arguments()[4]);
569 }
The type of an expression.
Definition: type.h:22
Boolean negation.
Definition: std_expr.h:3230
Generates string constraints to link results from string functions with their arguments.
A generic base class for relations, i.e., binary predicates.
Definition: std_expr.h:752
application of (mathematical) function
Definition: std_expr.h:4531
boolean OR
Definition: std_expr.h:2391
argumentst & arguments()
Definition: std_expr.h:4564
symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type)
generate an index symbol to be used as an universaly quantified variable
The trinary if-then-else operator.
Definition: std_expr.h:3361
typet & type()
Definition: expr.h:56
exprt add_axioms_for_trim(const function_application_exprt &f)
Remove leading and trailing whitespaces.
static optionalt< std::pair< exprt, exprt > > to_char_pair(exprt expr1, exprt expr2, std::function< array_string_exprt(const exprt &)> get_string_expr)
Convert two expressions to pair of chars If both expressions are characters, return pair of them If b...
exprt minimum(const exprt &a, const exprt &b)
boolean implication
Definition: std_expr.h:2339
equality
Definition: std_expr.h:1354
const irep_idt & id() const
Definition: irep.h:189
Bit-wise OR.
Definition: std_expr.h:2583
exprt add_axioms_for_delete_char_at(const function_application_exprt &expr)
add axioms corresponding to the StringBuilder.deleteCharAt java function
exprt::operandst argumentst
Definition: std_expr.h:4562
const refined_string_typet & to_refined_string_type(const typet &type)
Fixed-width bit-vector with two&#39;s complement interpretation.
Definition: std_types.h:1262
exprt add_axioms_for_replace(const function_application_exprt &f)
Replace a character by another in a string.
nonstd::optional< T > optionalt
Definition: optional.h:35
boolean AND
Definition: std_expr.h:2255
optionalt< Target > numeric_cast(const exprt &arg)
Converts an expression to any integral type.
Definition: arith_tools.h:122
#define PRECONDITION(CONDITION)
Definition: invariant.h:230
exprt add_axioms_for_set_length(const function_application_exprt &f)
Reduce or extend a string to have the given length.
static bool convert(const irep_idt &identifier, const std::ostringstream &s, symbol_tablet &symbol_table, message_handlert &message_handler)
The plus expression.
Definition: std_expr.h:893
bitvector_typet index_type()
Definition: c_types.cpp:16
exprt add_axioms_for_char_set(const function_application_exprt &f)
Set a character to a specific value at an index of the string.
exprt disjunction(const exprt::operandst &op)
Definition: std_expr.cpp:24
std::vector< exprt > operandst
Definition: expr.h:45
exprt maximum(const exprt &a, const exprt &b)
static constant_exprt constant_char(int i, const typet &char_type)
generate constant character expression with character type.
symbol_exprt fresh_exist_index(const irep_idt &prefix, const typet &type)
generate an index symbol which is existentially quantified
exprt add_axioms_for_delete(const array_string_exprt &res, const array_string_exprt &str, const exprt &start, const exprt &end)
Add axioms stating that res corresponds to the input str where we removed characters between the posi...
exprt add_axioms_for_substring(const array_string_exprt &res, const array_string_exprt &str, const exprt &start, const exprt &end)
Add axioms ensuring that res corresponds to the substring of str between indexes ‘start’ = max(st...
Base class for all expressions.
Definition: expr.h:42
Universally quantified string constraint
exprt add_axioms_for_to_lower_case(const function_application_exprt &f)
Conversion of a string to lower case.
binary minus
Definition: std_expr.h:959
Expression to hold a symbol (variable)
Definition: std_expr.h:90
int8_t s1
Definition: bytecode_info.h:59
const array_string_exprt & char_array_of_pointer(const exprt &pointer, const exprt &length)
Adds creates a new array if it does not already exists.
const typet & subtype() const
Definition: type.h:33
std::vector< string_constraintt > constraints
array_string_exprt fresh_string(const typet &index_type, const typet &char_type)
construct a string expression whose length and content are new variables
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
bitvector_typet char_type()
Definition: c_types.cpp:114
exprt add_axioms_for_concat(const array_string_exprt &res, const array_string_exprt &s1, const array_string_exprt &s2)
Add axioms enforcing that res is equal to the concatenation of s1 and s2.
array_string_exprt get_string_expr(const exprt &expr)
casts an expression to a string expression, or fetches the actual string_exprt in the case of a symbo...
exprt add_axioms_for_to_upper_case(const function_application_exprt &f)
Conversion of a string to upper case.