cprover
string_constraint_generator_transformation.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Generates string constraints for string transformations,
4  that is, functions taking one string and returning another
5 
6 Author: Romain Brenguier, romain.brenguier@diffblue.com
7 
8 \*******************************************************************/
9 
13 
15 
26 {
28  exprt k=args(f, 2)[1];
29  const refined_string_typet &ref_type=to_refined_string_type(s1.type());
30  string_exprt res=fresh_string(ref_type);
31 
32  // We add axioms:
33  // a1 : |res|=k
34  // a2 : forall i<k. (i<k ==> s[i]=s1[i]) &&(i >= k ==> s[i]=0)
35 
36  axioms.push_back(res.axiom_for_has_length(k));
37 
39  "QA_index_set_length", ref_type.get_index_type());
41  idx, k, and_exprt(
44  equal_exprt(s1[idx], res[idx])),
47  equal_exprt(s1[idx], constant_char(0, ref_type.get_char_type())))));
48  axioms.push_back(a2);
49 
50  return res;
51 }
52 
53 
63 {
65  assert(args.size()>=2);
67  exprt i(args[1]);
68  exprt j;
69  if(args.size()==3)
70  {
71  j=args[2];
72  }
73  else
74  {
75  assert(args.size()==2);
76  j=str.length();
77  }
78  return add_axioms_for_substring(str, i, j);
79 }
80 
88  const string_exprt &str, const exprt &start, const exprt &end)
89 {
90  const refined_string_typet &ref_type=to_refined_string_type(str.type());
91  const typet &index_type=ref_type.get_index_type();
92  symbol_exprt idx=fresh_exist_index("index_substring", index_type);
93  assert(start.type()==index_type);
94  assert(end.type()==index_type);
95  string_exprt res=fresh_string(ref_type);
96 
97  // We add axioms:
98  // a1 : start < end => |res| = end - start
99  // a2 : start >= end => |res| = 0
100  // a3 : |str| > end
101  // a4 : forall idx<str.length, str[idx]=arg_str[idx+i]
102 
103  implies_exprt a1(
104  binary_relation_exprt(start, ID_lt, end),
105  res.axiom_for_has_length(minus_exprt(end, start)));
106  axioms.push_back(a1);
107 
108  exprt is_empty=res.axiom_for_has_length(from_integer(0, index_type));
109  implies_exprt a2(binary_relation_exprt(start, ID_ge, end), is_empty);
110  axioms.push_back(a2);
111 
112  // Warning: check what to do if the string is not long enough
113  axioms.push_back(str.axiom_for_is_longer_than(end));
114 
116  idx, res.length(), equal_exprt(res[idx], str[plus_exprt(start, idx)]));
117  axioms.push_back(a4);
118  return res;
119 }
120 
125  const function_application_exprt &expr)
126 {
128  const refined_string_typet &ref_type=to_refined_string_type(str.type());
129  const typet &index_type=ref_type.get_index_type();
130  string_exprt res=fresh_string(ref_type);
131  symbol_exprt idx=fresh_exist_index("index_trim", index_type);
132  exprt space_char=constant_char(' ', ref_type.get_char_type());
133 
134  // We add axioms:
135  // a1 : m + |s1| <= |str|
136  // a2 : idx >= 0
137  // a3 : str >= idx
138  // a4 : |res|>= 0
139  // a5 : |res|<=|str|
140  // (this is necessary to prevent exceeding the biggest integer)
141  // a6 : forall n<m, str[n]<=' '
142  // a7 : forall n<|str|-m-|s1|, str[m+|s1|+n]<=' '
143  // a8 : forall n<|s1|, s[idx+n]=s1[n]
144  // a9 : (s[m]>' ' &&s[m+|s1|-1]>' ') || m=|s|
145 
146  exprt a1=str.axiom_for_is_longer_than(plus_exprt(idx, res.length()));
147  axioms.push_back(a1);
148 
149  binary_relation_exprt a2(idx, ID_ge, from_integer(0, index_type));
150  axioms.push_back(a2);
151 
152  exprt a3=str.axiom_for_is_longer_than(idx);
153  axioms.push_back(a3);
154 
156  from_integer(0, index_type));
157  axioms.push_back(a4);
158 
159  exprt a5=res.axiom_for_is_shorter_than(str);
160  axioms.push_back(a5);
161 
162  symbol_exprt n=fresh_univ_index("QA_index_trim", index_type);
163  binary_relation_exprt non_print(str[n], ID_le, space_char);
164  string_constraintt a6(n, idx, non_print);
165  axioms.push_back(a6);
166 
167  symbol_exprt n2=fresh_univ_index("QA_index_trim2", index_type);
168  minus_exprt bound(str.length(), plus_exprt(idx, res.length()));
170  str[plus_exprt(idx, plus_exprt(res.length(), n2))],
171  ID_le,
172  space_char);
173 
174  string_constraintt a7(n2, bound, eqn2);
175  axioms.push_back(a7);
176 
177  symbol_exprt n3=fresh_univ_index("QA_index_trim3", index_type);
178  equal_exprt eqn3(res[n3], str[plus_exprt(n3, idx)]);
179  string_constraintt a8(n3, res.length(), eqn3);
180  axioms.push_back(a8);
181 
182  minus_exprt index_before(
183  plus_exprt(idx, res.length()), from_integer(1, index_type));
184  binary_relation_exprt no_space_before(str[index_before], ID_gt, space_char);
185  or_exprt a9(
186  equal_exprt(idx, str.length()),
187  and_exprt(
188  binary_relation_exprt(str[idx], ID_gt, space_char),
189  no_space_before));
190  axioms.push_back(a9);
191  return res;
192 }
193 
198  const function_application_exprt &expr)
199 {
201  const refined_string_typet &ref_type=to_refined_string_type(str.type());
202  const typet &char_type=ref_type.get_char_type();
203  const typet &index_type=ref_type.get_index_type();
204  string_exprt res=fresh_string(ref_type);
205  exprt char_a=constant_char('a', char_type);
206  exprt char_A=constant_char('A', char_type);
207  exprt char_z=constant_char('z', char_type);
208  exprt char_Z=constant_char('Z', char_type);
209 
210  // TODO: add support for locales using case mapping information
211  // from the UnicodeData file.
212 
213  // We add axioms:
214  // a1 : |res| = |str|
215  // a2 : forall idx<str.length, 'A'<=str[idx]<='Z' => res[idx]=str[idx]+'a'-'A'
216  // a3 : forall idx<str.length, !('a'<=str[idx]<='z') => res[idx]=str[idx]
217  // forall idx<str.length,
218  // this[idx]='A'<=str[idx]<='Z' ? str[idx]+'a'-'A' : str[idx]
219 
221  axioms.push_back(a1);
222 
223  symbol_exprt idx=fresh_univ_index("QA_lower_case", index_type);
224  exprt is_upper_case=and_exprt(
225  binary_relation_exprt(char_A, ID_le, str[idx]),
226  binary_relation_exprt(str[idx], ID_le, char_Z));
227  minus_exprt diff(char_a, char_A);
228  equal_exprt convert(res[idx], plus_exprt(str[idx], diff));
229  string_constraintt a2(idx, res.length(), is_upper_case, convert);
230  axioms.push_back(a2);
231 
232  equal_exprt eq(res[idx], str[idx]);
233  string_constraintt a3(idx, res.length(), not_exprt(is_upper_case), eq);
234  axioms.push_back(a3);
235  return res;
236 }
237 
242  const function_application_exprt &expr)
243 {
245  const refined_string_typet &ref_type=to_refined_string_type(str.type());
246  const typet &char_type=ref_type.get_char_type();
247  const typet &index_type=ref_type.get_index_type();
248  string_exprt res=fresh_string(ref_type);
249  exprt char_a=constant_char('a', char_type);
250  exprt char_A=constant_char('A', char_type);
251  exprt char_z=constant_char('z', char_type);
252  exprt char_Z=constant_char('Z', char_type);
253 
254  // TODO: add support for locales using case mapping information
255  // from the UnicodeData file.
256 
257  // We add axioms:
258  // a1 : |res| = |str|
259  // a2 : forall idx<str.length, 'a'<=str[idx]<='z' => res[idx]=str[idx]+'A'-'a'
260  // a3 : forall idx<str.length, !('a'<=str[idx]<='z') => res[idx]=str[idx]
261 
263  axioms.push_back(a1);
264 
265  symbol_exprt idx=fresh_univ_index("QA_upper_case", index_type);
266  exprt is_lower_case=and_exprt(
267  binary_relation_exprt(char_a, ID_le, str[idx]),
268  binary_relation_exprt(str[idx], ID_le, char_z));
269  minus_exprt diff(char_A, char_a);
270  equal_exprt convert(res[idx], plus_exprt(str[idx], diff));
271  string_constraintt a2(idx, res.length(), is_lower_case, convert);
272  axioms.push_back(a2);
273 
274  equal_exprt eq(res[idx], str[idx]);
275  string_constraintt a3(idx, res.length(), not_exprt(is_lower_case), eq);
276  axioms.push_back(a3);
277  return res;
278 }
279 
280 
290 {
292  const refined_string_typet &ref_type=to_refined_string_type(str.type());
293  string_exprt res=fresh_string(ref_type);
294  with_exprt sarrnew(str.content(), args(f, 3)[1], args(f, 3)[2]);
295 
296  // We add axiom:
297  // a1 : arg1 < |str| => res = str with [arg1]=arg2
298 
299  implies_exprt a1(
300  binary_relation_exprt(args(f, 3)[1], ID_lt, str.length()),
301  and_exprt(
302  equal_exprt(res.content(), sarrnew),
303  res.axiom_for_has_same_length_as(str)));
304  axioms.push_back(a1);
305  return res;
306 }
307 
315 {
317  const refined_string_typet &ref_type=to_refined_string_type(str.type());
318  const exprt &old_char=args(f, 3)[1];
319  const exprt &new_char=args(f, 3)[2];
320  string_exprt res=fresh_string(ref_type);
321 
322  // We add axioms:
323  // a1 : |res| = |str|
324  // a2 : forall qvar, 0<=qvar<|res|,
325  // str[qvar]=oldChar => res[qvar]=newChar
326  // !str[qvar]=oldChar => res[qvar]=str[qvar]
327 
328  axioms.push_back(res.axiom_for_has_same_length_as(str));
329 
330  symbol_exprt qvar=fresh_univ_index("QA_replace", ref_type.get_index_type());
331  implies_exprt case1(
332  equal_exprt(str[qvar], old_char),
333  equal_exprt(res[qvar], new_char));
334  implies_exprt case2(
335  not_exprt(equal_exprt(str[qvar], old_char)),
336  equal_exprt(res[qvar], str[qvar]));
337  string_constraintt a2(qvar, res.length(), and_exprt(case1, case2));
338  axioms.push_back(a2);
339  return res;
340 }
341 
349 {
351  exprt index_one=from_integer(1, str.length().type());
352  return add_axioms_for_delete(
353  str, args(f, 2)[1], plus_exprt(args(f, 2)[1], index_one));
354 }
355 
362  const string_exprt &str, const exprt &start, const exprt &end)
363 {
364  assert(start.type()==str.length().type());
365  assert(end.type()==str.length().type());
367  str, from_integer(0, str.length().type()), start);
368  string_exprt str2=add_axioms_for_substring(str, end, str.length());
369  return add_axioms_for_concat(str1, str2);
370 }
371 
378 {
380  return add_axioms_for_delete(str, args(f, 3)[1], args(f, 3)[2]);
381 }
The type of an expression.
Definition: type.h:20
Boolean negation.
Definition: std_expr.h:2648
Generates string constraints to link results from string functions with their arguments.
A generic base class for relations, i.e., binary predicates.
Definition: std_expr.h:568
string_exprt add_axioms_for_delete(const string_exprt &str, const exprt &start, const exprt &end)
add axioms stating that the returned string corresponds to the input one where we removed characters ...
string_exprt add_axioms_for_delete_char_at(const function_application_exprt &expr)
add axioms corresponding to the StringBuilder.deleteCharAt java function
application of (mathematical) function
Definition: std_expr.h:3785
boolean OR
Definition: std_expr.h:1968
const typet & get_char_type() const
argumentst & arguments()
Definition: std_expr.h:3805
symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type)
generate an index symbol to be used as an universaly quantified variable
equal_exprt axiom_for_has_same_length_as(const string_exprt &rhs) const
Definition: string_expr.h:114
typet & type()
Definition: expr.h:60
string_exprt add_axioms_for_trim(const function_application_exprt &expr)
add axioms corresponding to the String.trim java function
binary_relation_exprt axiom_for_is_shorter_than(const string_exprt &rhs) const
Definition: string_expr.h:85
boolean implication
Definition: std_expr.h:1926
equality
Definition: std_expr.h:1082
string_exprt fresh_string(const refined_string_typet &type)
construct a string expression whose length and content are new variables
string_exprt add_axioms_for_char_set(const function_application_exprt &expr)
add axioms corresponding stating that the result is similar to that of the StringBuilder.setCharAt java function Warning: this may be underspecified in the case wher the index exceed the length of the string
const exprt & length() const
Definition: string_expr.h:35
exprt::operandst argumentst
Definition: std_expr.h:3803
string_exprt add_axioms_for_string_expr(const exprt &expr)
obtain a refined string expression corresponding to string variable of string function call ...
const refined_string_typet & to_refined_string_type(const typet &type)
string_exprt add_axioms_for_concat(const string_exprt &s1, const string_exprt &s2)
add axioms to say that the returned string expression is equal to the concatenation of the two string...
boolean AND
Definition: std_expr.h:1852
The plus expression.
Definition: std_expr.h:702
void convert(const goto_functionst::goto_functiont &function, xmlt &xml)
takes a goto_function and creates an according xml structure
const typet & get_index_type() const
bitvector_typet index_type()
Definition: c_types.cpp:15
binary_relation_exprt axiom_for_is_longer_than(const string_exprt &rhs) const
Definition: string_expr.h:56
static constant_exprt constant_char(int i, const typet &char_type)
generate constant character expression with character type.
symbol_exprt fresh_exist_index(const irep_idt &prefix, const typet &type)
generate an index symbol which is existentially quantified
string_exprt add_axioms_for_to_lower_case(const function_application_exprt &expr)
add axioms corresponding to the String.toLowerCase java function
Base class for all expressions.
Definition: expr.h:46
const exprt & content() const
Definition: string_expr.h:39
Operator to update elements in structs and arrays.
Definition: std_expr.h:2861
string_exprt add_axioms_for_to_upper_case(const function_application_exprt &expr)
add axioms corresponding to the String.toUpperCase java function
bool is_empty(const std::string &s)
binary_relation_exprt axiom_for_is_strictly_longer_than(const exprt &rhs) const
Definition: string_expr.h:68
string_exprt add_axioms_for_set_length(const function_application_exprt &f)
add axioms to say that the returned string expression has length given by the second argument and who...
binary minus
Definition: std_expr.h:758
string_exprt add_axioms_for_replace(const function_application_exprt &f)
add axioms corresponding to the String.replace java function
Expression to hold a symbol (variable)
Definition: std_expr.h:82
int8_t s1
Definition: bytecode_info.h:59
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
static const function_application_exprt::argumentst & args(const function_application_exprt &expr, size_t nb)
bitvector_typet char_type()
Definition: c_types.cpp:113
string_exprt add_axioms_for_substring(const string_exprt &str, const exprt &start, const exprt &end)
add axioms stating that the returned string expression is equal to the input one starting at start an...