cprover
string_constraint_generator_code_points.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Generates string constraints for Java functions dealing with
4  code points
5 
6 Author: Romain Brenguier, romain.brenguier@diffblue.com
7 
8 \*******************************************************************/
9 
12 
14 
15 /******************************************************************* \
16 
17 Function: string_constraint_generatort::add_axioms_for_code_point
18 
19  Inputs: an expression representing a java code point
20 
21  Outputs: a new string expression
22 
23  Purpose: add axioms for the conversion of an integer representing a java
24  code point to a utf-16 string
25 
26 \*******************************************************************/
27 
29  const exprt &code_point, const refined_string_typet &ref_type)
30 {
31  string_exprt res=fresh_string(ref_type);
32  const typet &type=code_point.type();
33  assert(type.id()==ID_signedbv);
34 
35  // We add axioms:
36  // a1 : code_point<0x010000 => |res|=1
37  // a2 : code_point>=0x010000 => |res|=2
38  // a3 : code_point<0x010000 => res[0]=code_point
39  // a4 : code_point>=0x010000 => res[0]=0xD800+(code_point-0x10000)/0x0400
40  // a5 : code_point>=0x010000 => res[1]=0xDC00+(code_point-0x10000)/0x0400
41  // For more explenations about this conversion, see:
42  // https://en.wikipedia.org/wiki/UTF-16
43 
44  exprt hex010000=from_integer(0x010000, type);
45  exprt hexD800=from_integer(0xD800, type);
46  exprt hexDC00=from_integer(0xDC00, type);
47  exprt hex0400=from_integer(0x0400, type);
48 
49  binary_relation_exprt small(code_point, ID_lt, hex010000);
50  implies_exprt a1(small, res.axiom_for_has_length(1));
51  axioms.push_back(a1);
52 
53  implies_exprt a2(not_exprt(small), res.axiom_for_has_length(2));
54  axioms.push_back(a2);
55 
56  typecast_exprt code_point_as_char(code_point, ref_type.get_char_type());
57  implies_exprt a3(small, equal_exprt(res[0], code_point_as_char));
58  axioms.push_back(a3);
59 
60  plus_exprt first_char(
61  hexD800, div_exprt(minus_exprt(code_point, hex010000), hex0400));
62  implies_exprt a4(
63  not_exprt(small),
64  equal_exprt(res[0], typecast_exprt(first_char, ref_type.get_char_type())));
65  axioms.push_back(a4);
66 
67  plus_exprt second_char(hexDC00, mod_exprt(code_point, hex0400));
68  implies_exprt a5(
69  not_exprt(small),
70  equal_exprt(res[1], typecast_exprt(second_char, ref_type.get_char_type())));
71  axioms.push_back(a5);
72 
73  return res;
74 }
75 
83 {
84  return and_exprt(
85  binary_relation_exprt(chr, ID_ge, constant_char(0xD800, chr.type())),
86  binary_relation_exprt(chr, ID_le, constant_char(0xDBFF, chr.type())));
87 }
88 
96 {
97  return and_exprt(
98  binary_relation_exprt(chr, ID_ge, constant_char(0xDC00, chr.type())),
99  binary_relation_exprt(chr, ID_le, constant_char(0xDFFF, chr.type())));
100 }
101 
110 exprt pair_value(exprt char1, exprt char2, typet return_type)
111 {
112  exprt hex010000=from_integer(0x010000, return_type);
113  exprt hex0800=from_integer(0x0800, return_type);
114  exprt hex0400=from_integer(0x0400, return_type);
115  mult_exprt m1(mod_exprt(char1, hex0800), hex0400);
116  mod_exprt m2(char2, hex0400);
117  plus_exprt pair_value(hex010000, plus_exprt(m1, m2));
118  return pair_value;
119 }
120 
127 {
128  typet return_type=f.type();
129  assert(return_type.id()==ID_signedbv);
131  const exprt &pos=args(f, 2)[1];
132 
133  symbol_exprt result=fresh_symbol("char", return_type);
134  exprt index1=from_integer(1, str.length().type());
135  const exprt &char1=str[pos];
136  const exprt &char2=str[plus_exprt(pos, index1)];
137  exprt char1_as_int=typecast_exprt(char1, return_type);
138  exprt char2_as_int=typecast_exprt(char2, return_type);
139  exprt pair=pair_value(char1_as_int, char2_as_int, return_type);
140  exprt is_low=is_low_surrogate(str[plus_exprt(pos, index1)]);
141  exprt return_pair=and_exprt(is_high_surrogate(str[pos]), is_low);
142 
143  axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair)));
144  axioms.push_back(
145  implies_exprt(not_exprt(return_pair), equal_exprt(result, char1_as_int)));
146  return result;
147 }
148 
155 {
157  assert(args.size()==2);
158  typet return_type=f.type();
159  assert(return_type.id()==ID_signedbv);
160  symbol_exprt result=fresh_symbol("char", return_type);
162 
163  const exprt &char1=
164  str[minus_exprt(args[1], from_integer(2, str.length().type()))];
165  const exprt &char2=
166  str[minus_exprt(args[1], from_integer(1, str.length().type()))];
167  exprt char1_as_int=typecast_exprt(char1, return_type);
168  exprt char2_as_int=typecast_exprt(char2, return_type);
169 
170  exprt pair=pair_value(char1_as_int, char2_as_int, return_type);
171  exprt return_pair=and_exprt(
172  is_high_surrogate(char1), is_low_surrogate(char2));
173 
174  axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair)));
175  axioms.push_back(
176  implies_exprt(not_exprt(return_pair), equal_exprt(result, char2_as_int)));
177  return result;
178 }
179 
187 {
189  const exprt &begin=args(f, 3)[1];
190  const exprt &end=args(f, 3)[2];
191  const typet &return_type=f.type();
192  symbol_exprt result=fresh_symbol("code_point_count", return_type);
193  minus_exprt length(end, begin);
194  div_exprt minimum(length, from_integer(2, length.type()));
195  axioms.push_back(binary_relation_exprt(result, ID_le, length));
196  axioms.push_back(binary_relation_exprt(result, ID_ge, minimum));
197 
198  return result;
199 }
200 
209 {
211  const exprt &index=args(f, 3)[1];
212  const exprt &offset=args(f, 3)[2];
213  const typet &return_type=f.type();
214  symbol_exprt result=fresh_symbol("offset_by_code_point", return_type);
215 
216  exprt minimum=plus_exprt(index, offset);
217  exprt maximum=plus_exprt(index, plus_exprt(offset, offset));
218  axioms.push_back(binary_relation_exprt(result, ID_le, maximum));
219  axioms.push_back(binary_relation_exprt(result, ID_ge, minimum));
220 
221  return result;
222 }
223 
The type of an expression.
Definition: type.h:20
Boolean negation.
Definition: std_expr.h:2648
semantic type conversion
Definition: std_expr.h:1725
Generates string constraints to link results from string functions with their arguments.
exprt add_axioms_for_code_point_at(const function_application_exprt &f)
add axioms corresponding to the String.codePointAt java function
A generic base class for relations, i.e., binary predicates.
Definition: std_expr.h:568
application of (mathematical) function
Definition: std_expr.h:3785
const typet & get_char_type() const
literalt pos(literalt a)
Definition: literal.h:193
argumentst & arguments()
Definition: std_expr.h:3805
static symbol_exprt fresh_symbol(const irep_idt &prefix, const typet &type=bool_typet())
generate a new symbol expression of the given type with some prefix
exprt add_axioms_for_code_point_count(const function_application_exprt &f)
add axioms giving approximate bounds on the result of the String.codePointCount java function ...
exprt add_axioms_for_offset_by_code_point(const function_application_exprt &f)
add axioms giving approximate bounds on the result of the String.offsetByCodePointCount java function...
equal_exprt axiom_for_has_length(const exprt &rhs) const
Definition: string_expr.h:120
typet & type()
Definition: expr.h:60
boolean implication
Definition: std_expr.h:1926
equality
Definition: std_expr.h:1082
string_exprt fresh_string(const refined_string_typet &type)
construct a string expression whose length and content are new variables
const irep_idt & id() const
Definition: irep.h:189
exprt pair_value(exprt char1, exprt char2, typet return_type)
the output corresponds to the unicode character given by the pair of characters of inputs assuming it...
division (integer and real)
Definition: std_expr.h:854
const exprt & length() const
Definition: string_expr.h:35
exprt::operandst argumentst
Definition: std_expr.h:3803
string_exprt add_axioms_for_string_expr(const exprt &expr)
obtain a refined string expression corresponding to string variable of string function call ...
boolean AND
Definition: std_expr.h:1852
The plus expression.
Definition: std_expr.h:702
binary multiplication
Definition: std_expr.h:806
exprt is_high_surrogate(const exprt &chr) const
the output is true when the character is a high surrogate for UTF-16 encoding, see https://en...
static constant_exprt constant_char(int i, const typet &char_type)
generate constant character expression with character type.
Base class for all expressions.
Definition: expr.h:46
exprt is_low_surrogate(const exprt &chr) const
the output is true when the character is a low surrogate for UTF-16 encoding, see https://en...
exprt add_axioms_for_code_point_before(const function_application_exprt &f)
add axioms corresponding to the String.codePointBefore java function
binary minus
Definition: std_expr.h:758
Expression to hold a symbol (variable)
Definition: std_expr.h:82
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
string_exprt add_axioms_for_code_point(const exprt &code_point, const refined_string_typet &ref_type)
static const function_application_exprt::argumentst & args(const function_application_exprt &expr, size_t nb)
binary modulo
Definition: std_expr.h:902