cprover
Loading...
Searching...
No Matches
remove_java_new.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Remove Java New Operators
4
5Author: Peter Schrammel
6
7\*******************************************************************/
8
11
12#include "remove_java_new.h"
13
17
18#include "java_types.h"
19#include "java_utils.h"
20
21#include <util/arith_tools.h>
22#include <util/c_types.h>
25#include <util/std_code.h>
26
28{
29public:
32 {
33 }
34
35 // Lower java_new for a single function
36 bool lower_java_new(
37 const irep_idt &function_identifier,
40
41 // Lower java_new for a single instruction
43 const irep_idt &function_identifier,
47
48protected:
51
53 const irep_idt &function_identifier,
54 const exprt &lhs,
55 const side_effect_exprt &rhs,
58
60 const irep_idt &function_identifier,
61 const exprt &lhs,
62 const side_effect_exprt &rhs,
66};
67
81 const irep_idt &function_identifier,
82 const exprt &lhs,
83 const side_effect_exprt &rhs,
84 goto_programt &dest,
86{
87 PRECONDITION(!lhs.is_nil());
88 PRECONDITION(rhs.operands().empty());
89 PRECONDITION(rhs.type().id() == ID_pointer);
90 source_locationt location = rhs.source_location();
91 typet object_type = rhs.type().subtype();
92
93 // build size expression
94 const auto object_size = size_of_expr(object_type, ns);
95 CHECK_RETURN(object_size.has_value());
96
97 // we produce a malloc side-effect, which stays
98 side_effect_exprt malloc_expr(ID_allocate, rhs.type(), location);
99 malloc_expr.copy_to_operands(*object_size);
100 // could use true and get rid of the code below
101 malloc_expr.copy_to_operands(false_exprt());
102 *target =
103 goto_programt::make_assignment(code_assignt(lhs, malloc_expr), location);
104
105 // zero-initialize the object
106 dereference_exprt deref(lhs, object_type);
107 auto zero_object = zero_initializer(object_type, location, ns);
108 CHECK_RETURN(zero_object.has_value());
110 to_struct_expr(*zero_object), ns, to_struct_tag_type(object_type));
111 return dest.insert_after(
112 target,
114 code_assignt(deref, *zero_object), location));
115}
116
131 const irep_idt &function_identifier,
132 const exprt &lhs,
133 const side_effect_exprt &rhs,
134 goto_programt &dest,
136 message_handlert &message_handler)
137{
138 // lower_java_new_array without lhs not implemented
139 PRECONDITION(!lhs.is_nil());
140 PRECONDITION(rhs.operands().size() >= 1); // one per dimension
141 PRECONDITION(rhs.type().id() == ID_pointer);
142
143 source_locationt location = rhs.source_location();
144 struct_tag_typet object_type = to_struct_tag_type(rhs.type().subtype());
145 PRECONDITION(ns.follow(object_type).id() == ID_struct);
146
147 // build size expression
148 const auto object_size = size_of_expr(object_type, ns);
149 CHECK_RETURN(object_size.has_value());
150
151 // we produce a malloc side-effect, which stays
152 side_effect_exprt malloc_expr(ID_allocate, rhs.type(), location);
153 malloc_expr.copy_to_operands(*object_size);
154 // code use true and get rid of the code below
155 malloc_expr.copy_to_operands(false_exprt());
156
157 *target =
158 goto_programt::make_assignment(code_assignt(lhs, malloc_expr), location);
159 goto_programt::targett next = std::next(target);
160
161 const struct_typet &struct_type = to_struct_type(ns.follow(object_type));
162
163 PRECONDITION(is_valid_java_array(struct_type));
164
165 // Init base class:
166 dereference_exprt deref(lhs, object_type);
167 auto zero_object = zero_initializer(object_type, location, ns);
168 CHECK_RETURN(zero_object.has_value());
169 set_class_identifier(to_struct_expr(*zero_object), ns, object_type);
170 dest.insert_before(
171 next,
173 code_assignt(deref, *zero_object), location));
174
175 // If it's a reference array we need to set the dimension and element type
176 // fields. Primitive array types don't have these fields; if the element type
177 // is a void pointer then the element type is statically unknown and the
178 // caller must set these up itself. This happens in array[reference].clone(),
179 // where the type info must be copied over from the input array)
180 const auto underlying_type_and_dimension =
182
183 bool target_type_is_reference_array =
184 underlying_type_and_dimension.second >= 1 &&
185 can_cast_type<java_reference_typet>(underlying_type_and_dimension.first);
186
187 if(target_type_is_reference_array)
188 {
189 exprt object_array_dimension = get_array_dimension_field(lhs);
190 dest.insert_before(
191 next,
193 object_array_dimension,
195 underlying_type_and_dimension.second, object_array_dimension.type()),
196 location)));
197
198 exprt object_array_element_type = get_array_element_type_field(lhs);
199 dest.insert_before(
200 next,
202 object_array_element_type,
204 to_struct_tag_type(underlying_type_and_dimension.first.subtype())
206 string_typet()),
207 location)));
208 }
209
210 // if it's an array, we need to set the length field
211 member_exprt length(
212 deref,
213 struct_type.components()[1].get_name(),
214 struct_type.components()[1].type());
215 dest.insert_before(
216 next,
218 code_assignt(length, to_multi_ary_expr(rhs).op0()), location));
219
220 // we also need to allocate space for the data
222 deref,
223 struct_type.components()[2].get_name(),
224 struct_type.components()[2].type());
225
226 // Allocate a (struct realtype**) instead of a (void**) if possible.
227 const irept &given_element_type = object_type.find(ID_element_type);
228 typet allocate_data_type;
229 if(given_element_type.is_not_nil())
230 {
231 allocate_data_type =
232 pointer_type(static_cast<const typet &>(given_element_type));
233 }
234 else
235 allocate_data_type = data.type();
236
237 side_effect_exprt data_java_new_expr(
238 ID_java_new_array_data, allocate_data_type, location);
239
240 // The instruction may specify a (hopefully small) upper bound on the
241 // array size, in which case we allocate a fixed-length array that may
242 // be larger than the `length` member rather than use a true variable-
243 // length array, which produces a more complex formula in the current
244 // backend.
245 const irept size_bound = rhs.find(ID_length_upper_bound);
246 if(size_bound.is_nil())
247 data_java_new_expr.set(ID_size, to_multi_ary_expr(rhs).op0());
248 else
249 data_java_new_expr.set(ID_size, size_bound);
250
251 // Must directly assign the new array to a temporary
252 // because goto-symex will notice `x=side_effect_exprt` but not
253 // `x=typecast_exprt(side_effect_exprt(...))`
254 symbol_exprt new_array_data_symbol = fresh_java_symbol(
255 data_java_new_expr.type(),
256 "tmp_new_data_array",
257 location,
258 function_identifier,
260 .symbol_expr();
261 code_declt array_decl(new_array_data_symbol);
262 array_decl.add_source_location() = location;
263 dest.insert_before(next, goto_programt::make_decl(array_decl, location));
264 dest.insert_before(
265 next,
267 code_assignt(new_array_data_symbol, data_java_new_expr), location));
268
269 exprt cast_java_new = new_array_data_symbol;
270 if(cast_java_new.type() != data.type())
271 cast_java_new = typecast_exprt(cast_java_new, data.type());
272 dest.insert_before(
273 next,
275 code_assignt(data, cast_java_new), location));
276
277 // zero-initialize the data
278 if(!rhs.get_bool(ID_skip_initialize))
279 {
280 const auto zero_element =
281 zero_initializer(data.type().subtype(), location, ns);
282 CHECK_RETURN(zero_element.has_value());
283 codet array_set(ID_array_set);
284 array_set.copy_to_operands(new_array_data_symbol, *zero_element);
285 dest.insert_before(next, goto_programt::make_other(array_set, location));
286 }
287
288 // multi-dimensional?
289
290 if(rhs.operands().size() >= 2)
291 {
292 // produce
293 // for(int i=0; i<size; i++) tmp[i]=java_new(dim-1);
294 // This will be converted recursively.
295
296 goto_programt tmp;
297
298 symbol_exprt tmp_i =
300 length.type(), "tmp_index", location, function_identifier, symbol_table)
301 .symbol_expr();
302 code_declt decl(tmp_i);
303 decl.add_source_location() = location;
304 tmp.insert_before(
305 tmp.instructions.begin(), goto_programt::make_decl(decl, location));
306
307 side_effect_exprt sub_java_new = rhs;
308 sub_java_new.operands().erase(sub_java_new.operands().begin());
309
310 // we already know that rhs has pointer type
311 typet sub_type =
312 static_cast<const typet &>(rhs.type().subtype().find(ID_element_type));
313 CHECK_RETURN(sub_type.id() == ID_pointer);
314 sub_java_new.type() = sub_type;
315
316 plus_exprt(tmp_i, from_integer(1, tmp_i.type()));
317 dereference_exprt deref_expr(
318 plus_exprt(data, tmp_i), data.type().subtype());
319
320 code_blockt for_body;
321 symbol_exprt init_sym =
323 sub_type, "subarray_init", location, function_identifier, symbol_table)
324 .symbol_expr();
325 code_declt init_decl(init_sym);
326 init_decl.add_source_location() = location;
327 for_body.add(std::move(init_decl));
328
329 code_assignt init_subarray(init_sym, sub_java_new);
330 for_body.add(std::move(init_subarray));
331 code_assignt assign_subarray(
332 deref_expr, typecast_exprt(init_sym, deref_expr.type()));
333 for_body.add(std::move(assign_subarray));
334
336 from_integer(0, tmp_i.type()),
337 to_multi_ary_expr(rhs).op0(),
338 tmp_i,
339 std::move(for_body),
340 location);
341
342 goto_convert(for_loop, symbol_table, tmp, message_handler, ID_java);
343
344 // lower new side effects recursively
345 lower_java_new(function_identifier, tmp, message_handler);
346
347 dest.destructive_insert(next, tmp);
348 }
349
350 return std::prev(next);
351}
352
361 const irep_idt &function_identifier,
362 goto_programt &goto_program,
364 message_handlert &message_handler)
365{
366 if(!target->is_assign())
367 return target;
368
369 if(as_const(*target).assign_rhs().id() == ID_side_effect)
370 {
371 // we make a copy, as we intend to destroy the assignment
372 // inside lower_java_new and lower_java_new_array
373 exprt lhs = target->assign_lhs();
374 exprt rhs = target->assign_rhs();
375
376 const auto &side_effect_expr = to_side_effect_expr(rhs);
377 const auto &statement = side_effect_expr.get_statement();
378
379 if(statement == ID_java_new)
380 {
381 return lower_java_new(
382 function_identifier, lhs, side_effect_expr, goto_program, target);
383 }
384 else if(statement == ID_java_new_array)
385 {
387 function_identifier,
388 lhs,
389 side_effect_expr,
390 goto_program,
391 target,
392 message_handler);
393 }
394 }
395
396 return target;
397}
398
407 const irep_idt &function_identifier,
408 goto_programt &goto_program,
409 message_handlert &message_handler)
410{
411 bool changed = false;
412 for(goto_programt::instructionst::iterator target =
413 goto_program.instructions.begin();
414 target != goto_program.instructions.end();
415 ++target)
416 {
418 function_identifier, goto_program, target, message_handler);
419 changed = changed || new_target == target;
420 target = new_target;
421 }
422 if(!changed)
423 return false;
424 goto_program.update();
425 return true;
426}
427
437 const irep_idt &function_identifier,
439 goto_programt &goto_program,
440 symbol_table_baset &symbol_table,
441 message_handlert &message_handler)
442{
443 remove_java_newt rem{symbol_table};
444 rem.lower_java_new(
445 function_identifier, goto_program, target, message_handler);
446}
447
456 const irep_idt &function_identifier,
458 symbol_table_baset &symbol_table,
459 message_handlert &message_handler)
460{
461 remove_java_newt rem{symbol_table};
462 rem.lower_java_new(function_identifier, function.body, message_handler);
463}
464
472 goto_functionst &goto_functions,
473 symbol_table_baset &symbol_table,
474 message_handlert &message_handler)
475{
476 remove_java_newt rem{symbol_table};
477 bool changed = false;
478 for(auto &f : goto_functions.function_map)
479 changed =
480 rem.lower_java_new(f.first, f.second.body, message_handler) || changed;
481 if(changed)
482 goto_functions.compute_location_numbers();
483}
484
491void remove_java_new(goto_modelt &goto_model, message_handlert &message_handler)
492{
494 goto_model.goto_functions, goto_model.symbol_table, message_handler);
495}
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
const T & as_const(T &value)
Return a reference to the same object but ensures the type is const.
Definition: as_const.h:14
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:253
void set_class_identifier(struct_exprt &expr, const namespacet &ns, const struct_tag_typet &class_type)
If expr has its components filled in then sets the @class_identifier member of the struct.
Extract class identifier.
A codet representing an assignment in the program.
A codet representing sequential composition of program statements.
Definition: std_code.h:130
void add(const codet &code)
Definition: std_code.h:168
A codet representing the declaration of a local variable.
codet representation of a for statement.
Definition: std_code.h:734
static code_fort from_index_bounds(exprt start_index, exprt end_index, symbol_exprt loop_index, codet body, source_locationt location)
Produce a code_fort representing:
Definition: std_code.cpp:158
Data structure for representing an arbitrary statement in a program.
Definition: std_code_base.h:29
A constant literal expression.
Definition: std_expr.h:2807
Operator to dereference a pointer.
Definition: pointer_expr.h:648
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
Base class for all expressions.
Definition: expr.h:54
void copy_to_operands(const exprt &expr)
Copy the given argument to the end of exprt's operands.
Definition: expr.h:129
typet & type()
Return the type of the expression.
Definition: expr.h:82
operandst & operands()
Definition: expr.h:92
const source_locationt & source_location() const
Definition: expr.h:230
source_locationt & add_source_location()
Definition: expr.h:235
The Boolean constant false.
Definition: std_expr.h:2865
A collection of goto functions.
function_mapt function_map
::goto_functiont goto_functiont
void compute_location_numbers()
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:30
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:33
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:73
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:598
void update()
Update all indices.
void destructive_insert(const_targett target, goto_programt &p)
Inserts the given program p before target.
Definition: goto_program.h:706
instructionst::iterator targett
Definition: goto_program.h:592
static instructiont make_assignment(const code_assignt &_code, const source_locationt &l=source_locationt::nil())
Create an assignment instruction.
targett insert_after(const_targett target)
Insertion after the instruction pointed-to by the given instruction iterator target.
Definition: goto_program.h:684
static instructiont make_decl(const symbol_exprt &symbol, const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:949
targett insert_before(const_targett target)
Insertion before the instruction pointed-to by the given instruction iterator target.
Definition: goto_program.h:668
static instructiont make_other(const codet &_code, const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:942
There are a large number of kinds of tree structured or tree-like data in CPROVER.
Definition: irep.h:372
bool get_bool(const irep_idt &name) const
Definition: irep.cpp:58
const irept & find(const irep_idt &name) const
Definition: irep.cpp:106
void set(const irep_idt &name, const irep_idt &value)
Definition: irep.h:420
bool is_not_nil() const
Definition: irep.h:380
const irep_idt & id() const
Definition: irep.h:396
bool is_nil() const
Definition: irep.h:376
Extract member of struct or union.
Definition: std_expr.h:2667
const typet & follow(const typet &) const
Resolve type symbol to the type it points to.
Definition: namespace.cpp:49
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:91
The plus expression Associativity is not specified.
Definition: std_expr.h:914
remove_java_newt(symbol_table_baset &symbol_table)
symbol_table_baset & symbol_table
bool lower_java_new(const irep_idt &function_identifier, goto_programt &, message_handlert &)
Replace every java_new or java_new_array by a malloc side-effect and zero initialization.
goto_programt::targett lower_java_new_array(const irep_idt &function_identifier, const exprt &lhs, const side_effect_exprt &rhs, goto_programt &, goto_programt::targett, message_handlert &)
Replaces the instruction lhs = new java_array_type by the following code: lhs = ALLOCATE(java_type) l...
An expression containing a side effect.
Definition: std_code.h:1450
String type.
Definition: std_types.h:901
A struct tag type, i.e., struct_typet with an identifier.
Definition: std_types.h:449
Structure type, corresponds to C style structs.
Definition: std_types.h:231
const componentst & components() const
Definition: std_types.h:147
Expression to hold a symbol (variable)
Definition: std_expr.h:80
The symbol table base class interface.
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
const irep_idt & get_identifier() const
Definition: std_types.h:410
Semantic type conversion.
Definition: std_expr.h:1920
The type of an expression, extends irept.
Definition: type.h:29
const typet & subtype() const
Definition: type.h:48
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Expression Initialization.
void goto_convert(const codet &code, symbol_table_baset &symbol_table, goto_programt &dest, message_handlert &message_handler, const irep_idt &mode)
Program Transformation.
Symbol Table + CFG.
bool is_valid_java_array(const struct_typet &type)
Programmatic documentation of the structure of a Java array (of either primitives or references) type...
Definition: java_types.cpp:833
std::pair< typet, std::size_t > java_array_dimension_and_element_type(const struct_tag_typet &type)
Returns the underlying element type and array dimensionality of Java struct type.
Definition: java_types.cpp:187
exprt get_array_element_type_field(const exprt &pointer)
Definition: java_types.cpp:216
exprt get_array_dimension_field(const exprt &pointer)
Definition: java_types.cpp:204
bool can_cast_type< java_reference_typet >(const typet &type)
Definition: java_types.h:624
symbolt & fresh_java_symbol(const typet &type, const std::string &basename_prefix, const source_locationt &source_location, const irep_idt &function_name, symbol_table_baset &symbol_table)
Definition: java_utils.cpp:558
optionalt< exprt > size_of_expr(const typet &type, const namespacet &ns)
Pointer Logic.
exprt object_size(const exprt &pointer)
void remove_java_new(const irep_idt &function_identifier, goto_programt::targett target, goto_programt &goto_program, symbol_table_baset &symbol_table, message_handlert &message_handler)
Replace every java_new or java_new_array by a malloc side-effect and zero initialization.
Remove Java New Operators.
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
side_effect_exprt & to_side_effect_expr(exprt &expr)
Definition: std_code.h:1506
const struct_exprt & to_struct_expr(const exprt &expr)
Cast an exprt to a struct_exprt.
Definition: std_expr.h:1745
const multi_ary_exprt & to_multi_ary_expr(const exprt &expr)
Cast an exprt to a multi_ary_exprt.
Definition: std_expr.h:899
const struct_typet & to_struct_type(const typet &type)
Cast a typet to a struct_typet.
Definition: std_types.h:308
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition: std_types.h:474
Definition: kdev_t.h:24