cprover
string_constraint_generator_code_points.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Generates string constraints for Java functions dealing with
4  code points
5 
6 Author: Romain Brenguier, romain.brenguier@diffblue.com
7 
8 \*******************************************************************/
9 
12 
14 
20 std::pair<exprt, string_constraintst> add_axioms_for_code_point(
21  const array_string_exprt &res,
22  const exprt &code_point)
23 {
24  string_constraintst constraints;
25  const typet &char_type = res.content().type().subtype();
26  const typet &type=code_point.type();
27  PRECONDITION(type.id()==ID_signedbv);
28 
29  // We add axioms:
30  // a1 : code_point<0x010000 => |res|=1
31  // a2 : code_point>=0x010000 => |res|=2
32  // a3 : code_point<0x010000 => res[0]=code_point
33  // a4 : code_point>=0x010000 => res[0]=0xD800+(code_point-0x10000)/0x0400
34  // a5 : code_point>=0x010000 => res[1]=0xDC00+(code_point-0x10000)/0x0400
35  // For more explenations about this conversion, see:
36  // https://en.wikipedia.org/wiki/UTF-16
37 
38  exprt hex010000=from_integer(0x010000, type);
39  exprt hexD800=from_integer(0xD800, type);
40  exprt hexDC00=from_integer(0xDC00, type);
41  exprt hex0400=from_integer(0x0400, type);
42 
43  binary_relation_exprt small(code_point, ID_lt, hex010000);
44  implies_exprt a1(small, length_eq(res, 1));
45  constraints.existential.push_back(a1);
46 
47  implies_exprt a2(not_exprt(small), length_eq(res, 2));
48  constraints.existential.push_back(a2);
49 
50  typecast_exprt code_point_as_char(code_point, char_type);
51  implies_exprt a3(small, equal_exprt(res[0], code_point_as_char));
52  constraints.existential.push_back(a3);
53 
54  plus_exprt first_char(
55  hexD800, div_exprt(minus_exprt(code_point, hex010000), hex0400));
56  implies_exprt a4(
57  not_exprt(small),
58  equal_exprt(res[0], typecast_exprt(first_char, char_type)));
59  constraints.existential.push_back(a4);
60 
61  plus_exprt second_char(hexDC00, mod_exprt(code_point, hex0400));
62  implies_exprt a5(
63  not_exprt(small),
64  equal_exprt(res[1], typecast_exprt(second_char, char_type)));
65  constraints.existential.push_back(a5);
66 
67  return {from_integer(0, get_return_code_type()), constraints};
68 }
69 
76 static exprt is_high_surrogate(const exprt &chr)
77 {
78  return and_exprt(
79  binary_relation_exprt(chr, ID_ge, from_integer(0xD800, chr.type())),
80  binary_relation_exprt(chr, ID_le, from_integer(0xDBFF, chr.type())));
81 }
82 
89 static exprt is_low_surrogate(const exprt &chr)
90 {
91  return and_exprt(
92  binary_relation_exprt(chr, ID_ge, from_integer(0xDC00, chr.type())),
93  binary_relation_exprt(chr, ID_le, from_integer(0xDFFF, chr.type())));
94 }
95 
105 exprt pair_value(exprt char1, exprt char2, typet return_type)
106 {
107  exprt hex010000=from_integer(0x010000, return_type);
108  exprt hex0800=from_integer(0x0800, return_type);
109  exprt hex0400=from_integer(0x0400, return_type);
110  mult_exprt m1(mod_exprt(char1, hex0800), hex0400);
111  mod_exprt m2(char2, hex0400);
112  plus_exprt pair_value(hex010000, plus_exprt(m1, m2));
113  return std::move(pair_value);
114 }
115 
122 std::pair<exprt, string_constraintst> add_axioms_for_code_point_at(
123  symbol_generatort &fresh_symbol,
125  array_poolt &array_pool)
126 {
127  string_constraintst constraints;
128  const typet &return_type = f.type();
129  PRECONDITION(return_type.id()==ID_signedbv);
130  PRECONDITION(f.arguments().size() == 2);
131  const array_string_exprt str = get_string_expr(array_pool, f.arguments()[0]);
132  const exprt &pos = f.arguments()[1];
133 
134  const symbol_exprt result = fresh_symbol("char", return_type);
135  const exprt index1 = from_integer(1, str.length().type());
136  const exprt &char1=str[pos];
137  const exprt &char2 = str[plus_exprt(pos, index1)];
138  const typecast_exprt char1_as_int(char1, return_type);
139  const typecast_exprt char2_as_int(char2, return_type);
140  const exprt pair = pair_value(char1_as_int, char2_as_int, return_type);
141  const exprt is_low = is_low_surrogate(str[plus_exprt(pos, index1)]);
142  const and_exprt return_pair(is_high_surrogate(str[pos]), is_low);
143 
144  constraints.existential.push_back(
145  implies_exprt(return_pair, equal_exprt(result, pair)));
146  constraints.existential.push_back(
147  implies_exprt(not_exprt(return_pair), equal_exprt(result, char1_as_int)));
148  return {result, constraints};
149 }
150 
155 std::pair<exprt, string_constraintst> add_axioms_for_code_point_before(
156  symbol_generatort &fresh_symbol,
158  array_poolt &array_pool)
159 {
161  PRECONDITION(args.size()==2);
162  typet return_type=f.type();
163  PRECONDITION(return_type.id()==ID_signedbv);
164  symbol_exprt result=fresh_symbol("char", return_type);
165  array_string_exprt str = get_string_expr(array_pool, args[0]);
166  string_constraintst constraints;
167 
168  const exprt &char1=
169  str[minus_exprt(args[1], from_integer(2, str.length().type()))];
170  const exprt &char2=
171  str[minus_exprt(args[1], from_integer(1, str.length().type()))];
172  const typecast_exprt char1_as_int(char1, return_type);
173  const typecast_exprt char2_as_int(char2, return_type);
174 
175  const exprt pair = pair_value(char1_as_int, char2_as_int, return_type);
176  const and_exprt return_pair(
177  is_high_surrogate(char1), is_low_surrogate(char2));
178 
179  constraints.existential.push_back(
180  implies_exprt(return_pair, equal_exprt(result, pair)));
181  constraints.existential.push_back(
182  implies_exprt(not_exprt(return_pair), equal_exprt(result, char2_as_int)));
183  return {result, constraints};
184 }
185 
193 std::pair<exprt, string_constraintst> add_axioms_for_code_point_count(
194  symbol_generatort &fresh_symbol,
196  array_poolt &array_pool)
197 {
198  PRECONDITION(f.arguments().size() == 3);
199  string_constraintst constraints;
200  const array_string_exprt str = get_string_expr(array_pool, f.arguments()[0]);
201  const exprt &begin = f.arguments()[1];
202  const exprt &end = f.arguments()[2];
203  const typet &return_type=f.type();
204  const symbol_exprt result = fresh_symbol("code_point_count", return_type);
205  const minus_exprt length(end, begin);
206  const div_exprt minimum(length, from_integer(2, length.type()));
207  constraints.existential.push_back(
208  binary_relation_exprt(result, ID_le, length));
209  constraints.existential.push_back(
210  binary_relation_exprt(result, ID_ge, minimum));
211 
212  return {result, constraints};
213 }
214 
222 std::pair<exprt, string_constraintst> add_axioms_for_offset_by_code_point(
223  symbol_generatort &fresh_symbol,
225 {
226  PRECONDITION(f.arguments().size() == 3);
227  string_constraintst constraints;
228  const exprt &index = f.arguments()[1];
229  const exprt &offset = f.arguments()[2];
230  const typet &return_type=f.type();
231  const symbol_exprt result = fresh_symbol("offset_by_code_point", return_type);
232 
233  const exprt minimum = plus_exprt(index, offset);
234  const exprt maximum = plus_exprt(minimum, offset);
235  constraints.existential.push_back(
236  binary_relation_exprt(result, ID_le, maximum));
237  constraints.existential.push_back(
238  binary_relation_exprt(result, ID_ge, minimum));
239 
240  return {result, constraints};
241 }
242 
The type of an expression, extends irept.
Definition: type.h:27
Boolean negation.
Definition: std_expr.h:3308
Semantic type conversion.
Definition: std_expr.h:2277
Generates string constraints to link results from string functions with their arguments.
A base class for relations, i.e., binary predicates.
Definition: std_expr.h:878
Application of (mathematical) function.
Definition: std_expr.h:4481
std::pair< exprt, string_constraintst > add_axioms_for_code_point_before(symbol_generatort &fresh_symbol, const function_application_exprt &f, array_poolt &array_pool)
add axioms corresponding to the String.codePointBefore java function
std::pair< exprt, string_constraintst > add_axioms_for_offset_by_code_point(symbol_generatort &fresh_symbol, const function_application_exprt &f)
add axioms giving approximate bounds on the result of the String.offsetByCodePointCount java function...
Generation of fresh symbols of a given type.
literalt pos(literalt a)
Definition: literal.h:193
argumentst & arguments()
Definition: std_expr.h:4506
typet & type()
Return the type of the expression.
Definition: expr.h:68
signedbv_typet get_return_code_type()
Correspondance between arrays and pointers string representations.
Collection of constraints of different types: existential formulas, universal formulas, and "not contains" (universal with one alternation).
exprt minimum(const exprt &a, const exprt &b)
Boolean implication.
Definition: std_expr.h:2485
Equality.
Definition: std_expr.h:1484
const irep_idt & id() const
Definition: irep.h:259
array_string_exprt get_string_expr(array_poolt &pool, const exprt &expr)
casts an expression to a string expression, or fetches the actual string_exprt in the case of a symbo...
exprt pair_value(exprt char1, exprt char2, typet return_type)
the output corresponds to the unicode character given by the pair of characters of inputs assuming it...
exprt & content()
Definition: string_expr.h:80
static exprt is_high_surrogate(const exprt &chr)
the output is true when the character is a high surrogate for UTF-16 encoding, see https://en...
Division.
Definition: std_expr.h:1211
Boolean AND.
Definition: std_expr.h:2409
exprt::operandst argumentst
Definition: std_expr.h:4484
#define PRECONDITION(CONDITION)
Definition: invariant.h:438
The plus expression Associativity is not specified.
Definition: std_expr.h:1042
Binary multiplication Associativity is not specified.
Definition: std_expr.h:1159
exprt maximum(const exprt &a, const exprt &b)
std::vector< exprt > existential
static exprt is_low_surrogate(const exprt &chr)
the output is true when the character is a low surrogate for UTF-16 encoding, see https://en...
exprt & length()
Definition: string_expr.h:70
equal_exprt length_eq(const T &lhs, const exprt &rhs)
Definition: string_expr.h:54
Base class for all expressions.
Definition: expr.h:54
std::pair< exprt, string_constraintst > add_axioms_for_code_point(const array_string_exprt &res, const exprt &code_point)
add axioms for the conversion of an integer representing a java code point to a utf-16 string ...
Binary minus.
Definition: std_expr.h:1106
Expression to hold a symbol (variable)
Definition: std_expr.h:143
const typet & subtype() const
Definition: type.h:38
std::pair< exprt, string_constraintst > add_axioms_for_code_point_count(symbol_generatort &fresh_symbol, const function_application_exprt &f, array_poolt &array_pool)
add axioms giving approximate bounds on the result of the String.codePointCount java function ...
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
std::pair< exprt, string_constraintst > add_axioms_for_code_point_at(symbol_generatort &fresh_symbol, const function_application_exprt &f, array_poolt &array_pool)
add axioms corresponding to the String.codePointAt java function
bitvector_typet char_type()
Definition: c_types.cpp:114
Modulo.
Definition: std_expr.h:1287