cprover
java_string_literals.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Java string literal processing
4 
5 Author: Chris Smowton, chris.smowton@diffblue.com
6 
7 \*******************************************************************/
8 
9 #include "java_string_literals.h"
10 #include "java_root_class.h"
11 #include "java_types.h"
12 #include "java_utils.h"
13 
14 #include <util/arith_tools.h>
15 #include <util/expr_initializer.h>
16 #include <util/namespace.h>
17 #include <util/unicode.h>
18 
19 #include <iomanip>
20 #include <sstream>
21 
26 static std::string escape_non_alnum(const std::string &to_escape)
27 {
28  std::ostringstream escaped;
29  for(auto &ch : to_escape)
30  {
31  if(ch=='_')
32  escaped << "__";
33  else if(isalnum(ch))
34  escaped << ch;
35  else
36  escaped << '_'
37  << std::hex
38  << std::setfill('0')
39  << std::setw(2)
40  << (unsigned int)ch;
41  }
42  return escaped.str();
43 }
44 
48 static array_exprt utf16_to_array(const std::wstring &in)
49 {
50  const auto jchar=java_char_type();
51  array_exprt ret(
52  array_typet(jchar, from_integer(in.length(), java_int_type())));
53  for(const auto c : in)
54  ret.copy_to_operands(from_integer(c, jchar));
55  return ret;
56 }
57 
67  const exprt &string_expr,
68  symbol_table_baset &symbol_table,
69  bool string_refinement_enabled)
70 {
71  PRECONDITION(string_expr.id() == ID_java_string_literal);
72  const irep_idt value = string_expr.get(ID_value);
73  const symbol_typet string_type("java::java.lang.String");
74 
75  const std::string escaped_symbol_name = escape_non_alnum(id2string(value));
76  const std::string escaped_symbol_name_with_prefix =
77  JAVA_STRING_LITERAL_PREFIX "." + escaped_symbol_name;
78 
79  auto findit = symbol_table.symbols.find(escaped_symbol_name_with_prefix);
80  if(findit != symbol_table.symbols.end())
81  return findit->second.symbol_expr();
82 
83  // Create a new symbol:
84  symbolt new_symbol;
85  new_symbol.name = escaped_symbol_name_with_prefix;
86  new_symbol.type = string_type;
87  new_symbol.base_name = escaped_symbol_name;
88  new_symbol.pretty_name = value;
89  new_symbol.mode = ID_java;
90  new_symbol.is_type = false;
91  new_symbol.is_lvalue = true;
92  new_symbol.is_static_lifetime = true;
93 
94  namespacet ns(symbol_table);
95 
96  // Regardless of string refinement setting, at least initialize
97  // the literal with @clsid = String and @lock = false:
98  symbol_typet jlo_symbol("java::java.lang.Object");
99  const auto &jlo_struct = to_struct_type(ns.follow(jlo_symbol));
100  struct_exprt jlo_init(jlo_symbol);
101  const auto &jls_struct = to_struct_type(ns.follow(string_type));
102  java_root_class_init(jlo_init, jlo_struct, false, "java::java.lang.String");
103 
104  // If string refinement *is* around, populate the actual
105  // contents as well:
106  if(string_refinement_enabled)
107  {
108  const array_exprt data =
110 
111  struct_exprt literal_init(new_symbol.type);
112  literal_init.operands().resize(jls_struct.components().size());
113  const std::size_t jlo_nb = jls_struct.component_number("@java.lang.Object");
114  literal_init.operands()[jlo_nb] = jlo_init;
115 
116  const std::size_t length_nb = jls_struct.component_number("length");
117  const typet &length_type = jls_struct.components()[length_nb].type();
118  const exprt length = from_integer(data.operands().size(), length_type);
119  literal_init.operands()[length_nb] = length;
120 
121  // Initialize the string with a constant utf-16 array:
122  symbolt array_symbol;
123  array_symbol.name = escaped_symbol_name_with_prefix + "_constarray";
124  array_symbol.base_name = escaped_symbol_name + "_constarray";
125  array_symbol.pretty_name = value;
126  array_symbol.mode = ID_java;
127  array_symbol.is_type = false;
128  array_symbol.is_lvalue = true;
129  // These are basically const global data:
130  array_symbol.is_static_lifetime = true;
131  array_symbol.is_state_var = true;
132  array_symbol.value = data;
133  array_symbol.type = array_symbol.value.type();
134 
135  if(symbol_table.add(array_symbol))
136  throw "failed to add constarray symbol to symbol table";
137 
138  const symbol_exprt array_expr = array_symbol.symbol_expr();
139  const address_of_exprt array_pointer(
140  index_exprt(array_expr, from_integer(0, java_int_type())));
141 
142  const std::size_t data_nb = jls_struct.component_number("data");
143  literal_init.operands()[data_nb] = array_pointer;
144 
145  // Associate array with pointer
146  symbolt return_symbol;
147  return_symbol.name = escaped_symbol_name_with_prefix + "_return_value";
148  return_symbol.base_name = escaped_symbol_name + "_return_value";
149  return_symbol.pretty_name =
150  escaped_symbol_name.length() > 10
151  ? escaped_symbol_name.substr(0, 10) + "..._return_value"
152  : escaped_symbol_name + "_return_value";
153  return_symbol.mode = ID_java;
154  return_symbol.is_type = false;
155  return_symbol.is_lvalue = true;
156  return_symbol.is_static_lifetime = true;
157  return_symbol.is_state_var = true;
158  return_symbol.value = make_function_application(
159  ID_cprover_associate_array_to_pointer_func,
160  {array_symbol.value, array_pointer},
161  java_int_type(),
162  symbol_table);
163  return_symbol.type = return_symbol.value.type();
164  if(symbol_table.add(return_symbol))
165  throw "failed to add return symbol to symbol table";
166 
167  // Initialise the literal structure with
168  // (ABC_return_value, { ..., .length = N, .data = &ABC_constarray }),
169  // using a C-style comma expression to indicate that we need the
170  // _return_value global for its side-effects.
171  exprt init_comma_expr(ID_comma);
172  init_comma_expr.type() = literal_init.type();
173  init_comma_expr.copy_to_operands(return_symbol.symbol_expr());
174  init_comma_expr.move_to_operands(literal_init);
175  new_symbol.value = init_comma_expr;
176  }
177  else if(jls_struct.components().size()>=1 &&
178  jls_struct.components()[0].get_name()=="@java.lang.Object")
179  {
180  // Case where something defined java.lang.String, so it has
181  // a proper base class (always java.lang.Object in practical
182  // JDKs seen so far)
183  struct_exprt literal_init(new_symbol.type);
184  literal_init.move_to_operands(jlo_init);
185  for(const auto &comp : jls_struct.components())
186  {
187  if(comp.get_name()=="@java.lang.Object")
188  continue;
189  // Other members of JDK's java.lang.String we don't understand
190  // without string-refinement. Just zero-init them; consider using
191  // test-gen-like nondet object trees instead.
192  literal_init.copy_to_operands(
193  zero_initializer(comp.type(), string_expr.source_location(), ns));
194  }
195  new_symbol.value = literal_init;
196  }
197  else if(jls_struct.get_bool(ID_incomplete_class))
198  {
199  // Case where java.lang.String was stubbed, and so directly defines
200  // @class_identifier and @lock:
201  new_symbol.value = jlo_init;
202  }
203 
204  bool add_failed = symbol_table.add(new_symbol);
205  INVARIANT(
206  !add_failed,
207  "string literal symbol was already checked not to be "
208  "in the symbol table, so adding it should succeed");
209 
210  return new_symbol.symbol_expr();
211 }
The type of an expression.
Definition: type.h:22
irep_idt name
The unique identifier.
Definition: symbol.h:43
#define JAVA_STRING_LITERAL_PREFIX
Definition: java_utils.h:51
const std::string & id2string(const irep_idt &d)
Definition: irep.h:43
irep_idt mode
Language mode.
Definition: symbol.h:52
typet java_int_type()
Definition: java_types.cpp:32
void copy_to_operands(const exprt &expr)
Definition: expr.cpp:55
void move_to_operands(exprt &expr)
Definition: expr.cpp:22
exprt value
Initial value of symbol.
Definition: symbol.h:37
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:55
typet & type()
Definition: expr.h:56
Symbol table entry.This is a symbol in the symbol table, stored in an object of type symbol_tablet...
Definition: symbol.h:30
bool is_static_lifetime
Definition: symbol.h:67
#define INVARIANT(CONDITION, REASON)
Definition: invariant.h:205
Expression Initialization.
const irep_idt & id() const
Definition: irep.h:189
class symbol_exprt symbol_expr() const
produces a symbol_exprt for a symbol
Definition: symbol.cpp:111
A reference into the symbol table.
Definition: std_types.h:110
exprt make_function_application(const irep_idt &function_name, const exprt::operandst &arguments, const typet &type, symbol_table_baset &symbol_table)
Create a function application expression.
Definition: java_utils.cpp:280
int size
Definition: kdev_t.h:25
const irep_idt & get(const irep_namet &name) const
Definition: irep.cpp:213
TO_BE_DOCUMENTED.
Definition: namespace.h:74
#define PRECONDITION(CONDITION)
Definition: invariant.h:230
const typet & follow(const typet &) const
Definition: namespace.cpp:55
const struct_typet & to_struct_type(const typet &type)
Cast a generic typet to a struct_typet.
Definition: std_types.h:318
Operator to return the address of an object.
Definition: std_expr.h:3170
const symbolst & symbols
static std::string escape_non_alnum(const std::string &to_escape)
Replace non-alphanumeric characters with _xx escapes, where xx are hex digits.
exprt zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns, message_handlert &message_handler)
void java_root_class_init(struct_exprt &jlo, const struct_typet &root_type, const bool lock, const irep_idt &class_identifier)
Adds members for an object of the root class (usually java.lang.Object).
std::wstring utf8_to_utf16_little_endian(const std::string &in)
Definition: unicode.cpp:281
typet type
Type of symbol.
Definition: symbol.h:34
Base class for all expressions.
Definition: expr.h:42
bool is_state_var
Definition: symbol.h:63
irep_idt base_name
Base (non-scoped) name.
Definition: symbol.h:49
The symbol table base class interface.
const source_locationt & source_location() const
Definition: expr.h:125
symbol_exprt get_or_create_string_literal_symbol(const exprt &string_expr, symbol_table_baset &symbol_table, bool string_refinement_enabled)
Creates or gets an existing constant global symbol for a given string literal.
arrays with given size
Definition: std_types.h:1004
Expression to hold a symbol (variable)
Definition: std_expr.h:90
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
bool is_type
Definition: symbol.h:63
typet java_char_type()
Definition: java_types.cpp:57
operandst & operands()
Definition: expr.h:66
static array_exprt utf16_to_array(const std::wstring &in)
Convert UCS-2 or UTF-16 to an array expression.
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
struct constructor from list of elements
Definition: std_expr.h:1815
array constructor from list of elements
Definition: std_expr.h:1617
Definition: kdev_t.h:24
bool is_lvalue
Definition: symbol.h:68
array index operator
Definition: std_expr.h:1462