cprover
Loading...
Searching...
No Matches
java_bytecode_convert_method.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: JAVA Bytecode Language Conversion
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
11
12#ifdef DEBUG
13#include <iostream>
14#endif
15
17
18#include <util/arith_tools.h>
19#include <util/bitvector_expr.h>
20#include <util/c_types.h>
22#include <util/floatbv_expr.h>
23#include <util/ieee_float.h>
24#include <util/invariant.h>
25#include <util/namespace.h>
26#include <util/prefix.h>
27#include <util/prefix_filter.h> // IWYU pragma: keep
28#include <util/std_expr.h>
30#include <util/threeval.h>
31
33
35
36#include "bytecode_info.h"
38#include "java_expr.h"
42#include "java_types.h"
43#include "java_utils.h"
44#include "lambda_synthesis.h"
45#include "pattern.h"
46
47#include <algorithm>
48#include <limits>
49
63 java_method_typet &ftype,
64 const irep_idt &name_prefix,
65 symbol_table_baset &symbol_table)
66{
67 java_method_typet::parameterst &parameters = ftype.parameters();
68
69 // Mostly borrowed from java_bytecode_convert.cpp; maybe factor this out.
70 // assign names to parameters
71 for(std::size_t i=0; i<parameters.size(); ++i)
72 {
73 irep_idt base_name, identifier;
74
75 if(i==0 && parameters[i].get_this())
76 base_name = ID_this;
77 else
78 base_name="stub_ignored_arg"+std::to_string(i);
79
80 identifier=id2string(name_prefix)+"::"+id2string(base_name);
81 parameters[i].set_base_name(base_name);
82 parameters[i].set_identifier(identifier);
83
84 // add to symbol table
85 parameter_symbolt parameter_symbol;
86 parameter_symbol.base_name=base_name;
87 parameter_symbol.mode=ID_java;
88 parameter_symbol.name=identifier;
89 parameter_symbol.type=parameters[i].type();
90 symbol_table.add(parameter_symbol);
91 }
92}
93
95 const irep_idt &identifier,
96 const irep_idt &base_name,
97 const irep_idt &pretty_name,
98 const typet &type,
100 symbol_table_baset &symbol_table,
101 message_handlert &message_handler)
102{
103 messaget log(message_handler);
104
105 symbolt symbol{identifier, type, ID_java};
106 symbol.base_name = base_name;
107 symbol.pretty_name = pretty_name;
108 symbol.type.set(ID_access, ID_private);
109 to_java_method_type(symbol.type).set_is_final(true);
111 to_java_method_type(symbol.type), symbol.name, symbol_table);
113
114 log.debug() << "Generating codet: new opaque symbol: method '" << symbol.name
115 << "'" << messaget::eom;
116 symbol_table.add(symbol);
117}
118
119static bool is_constructor(const irep_idt &method_name)
120{
121 return id2string(method_name).find("<init>") != std::string::npos;
122}
123
125{
126 if(stack.size()<n)
127 {
128 log.error() << "malformed bytecode (pop too high)" << messaget::eom;
129 throw 0;
130 }
131
132 exprt::operandst operands;
133 operands.resize(n);
134 for(std::size_t i=0; i<n; i++)
135 operands[i]=stack[stack.size()-n+i];
136
137 stack.resize(stack.size()-n);
138 return operands;
139}
140
143{
144 std::size_t residue_size=std::min(n, stack.size());
145
146 stack.resize(stack.size()-residue_size);
147}
148
150{
151 stack.resize(stack.size()+o.size());
152
153 for(std::size_t i=0; i<o.size(); i++)
154 stack[stack.size()-o.size()+i]=o[i];
155}
156
157// JVM program locations
159{
160 return "pc"+id2string(address);
161}
162
164 const std::string &prefix,
165 const typet &type)
166{
167 irep_idt base_name=prefix+"_tmp"+std::to_string(tmp_vars.size());
168 irep_idt identifier=id2string(current_method)+"::"+id2string(base_name);
169
170 auxiliary_symbolt tmp_symbol;
171 tmp_symbol.base_name=base_name;
172 tmp_symbol.is_static_lifetime=false;
173 tmp_symbol.mode=ID_java;
174 tmp_symbol.name=identifier;
175 tmp_symbol.type=type;
176 symbol_table.add(tmp_symbol);
177
178 symbol_exprt result(identifier, type);
179 result.set(ID_C_base_name, base_name);
180 tmp_vars.push_back(result);
181
182 return result;
183}
184
197 const exprt &arg,
198 char type_char,
199 size_t address)
200{
201 const std::size_t number_int =
203 variablest &var_list=variables[number_int];
204
205 // search variable in list for correct frame / address if necessary
206 const variablet &var=
207 find_variable_for_slot(address, var_list);
208
209 if(!var.symbol_expr.get_identifier().empty())
210 return var.symbol_expr;
211
212 // an unnamed local variable
213 irep_idt base_name = "anonlocal::" + std::to_string(number_int) + type_char;
214 irep_idt identifier = id2string(current_method) + "::" + id2string(base_name);
215
216 symbol_exprt result(identifier, java_type_from_char(type_char));
217 result.set(ID_C_base_name, base_name);
218 used_local_names.insert(result);
219 return std::move(result);
220}
221
230 const std::string &descriptor,
231 const optionalt<std::string> &signature,
232 const std::string &class_name,
233 const std::string &method_name,
234 message_handlert &message_handler)
235{
236 // In order to construct the method type, we can either use signature or
237 // descriptor. Since only signature contains the generics info, we want to
238 // use signature whenever possible. We revert to using descriptor if (1) the
239 // signature does not exist, (2) an unsupported generics are present or
240 // (3) in the special case when the number of parameters in the descriptor
241 // does not match the number of parameters in the signature - e.g. for
242 // certain types of inner classes and enums (see unit tests for examples).
243
244 messaget message(message_handler);
245
246 auto member_type_from_descriptor = java_type_from_string(descriptor);
247 INVARIANT(
248 member_type_from_descriptor.has_value() &&
249 member_type_from_descriptor->id() == ID_code,
250 "Must be code type");
251 if(signature.has_value())
252 {
253 try
254 {
255 auto member_type_from_signature =
256 java_type_from_string(signature.value(), class_name);
257 INVARIANT(
258 member_type_from_signature.has_value() &&
259 member_type_from_signature->id() == ID_code,
260 "Must be code type");
261 if(
262 to_java_method_type(*member_type_from_signature).parameters().size() ==
263 to_java_method_type(*member_type_from_descriptor).parameters().size())
264 {
265 return to_java_method_type(*member_type_from_signature);
266 }
267 else
268 {
269 message.debug() << "Method: " << class_name << "." << method_name
270 << "\n signature: " << signature.value()
271 << "\n descriptor: " << descriptor
272 << "\n different number of parameters, reverting to "
273 "descriptor"
274 << message.eom;
275 }
276 }
278 {
279 message.debug() << "Method: " << class_name << "." << method_name
280 << "\n could not parse signature: " << signature.value()
281 << "\n " << e.what() << "\n"
282 << " reverting to descriptor: " << descriptor
283 << message.eom;
284 }
285 }
286 return to_java_method_type(*member_type_from_descriptor);
287}
288
300 symbolt &class_symbol,
301 const irep_idt &method_identifier,
303 symbol_table_baset &symbol_table,
304 message_handlert &message_handler)
305{
307 m.descriptor,
308 m.signature,
309 id2string(class_symbol.name),
311 message_handler);
312
313 symbolt method_symbol{method_identifier, typet{}, ID_java};
314 method_symbol.base_name=m.base_name;
315 method_symbol.location=m.source_location;
316 method_symbol.location.set_function(method_identifier);
317
318 if(m.is_public)
319 member_type.set_access(ID_public);
320 else if(m.is_protected)
321 member_type.set_access(ID_protected);
322 else if(m.is_private)
323 member_type.set_access(ID_private);
324 else
325 member_type.set_access(ID_default);
326
327 if(m.is_synchronized)
328 member_type.set(ID_is_synchronized, true);
329 if(m.is_static)
330 member_type.set(ID_is_static, true);
331 member_type.set_native(m.is_native);
332 member_type.set_is_varargs(m.is_varargs);
333 member_type.set_is_synthetic(m.is_synthetic);
334
335 if(m.is_bridge)
336 member_type.set(ID_is_bridge_method, m.is_bridge);
337
338 // do we need to add 'this' as a parameter?
339 if(!m.is_static)
340 {
341 java_method_typet::parameterst &parameters = member_type.parameters();
342 const reference_typet object_ref_type =
344 java_method_typet::parametert this_p(object_ref_type);
345 this_p.set_this();
346 parameters.insert(parameters.begin(), this_p);
347 }
348
349 const std::string signature_string = pretty_signature(member_type);
350
351 if(is_constructor(method_symbol.base_name))
352 {
353 // we use full.class_name(...) as pretty name
354 // for constructors -- the idea is that they have
355 // an empty declarator.
356 method_symbol.pretty_name=
357 id2string(class_symbol.pretty_name) + signature_string;
358
359 member_type.set_is_constructor();
360 }
361 else
362 {
363 method_symbol.pretty_name = id2string(class_symbol.pretty_name) + "." +
364 id2string(m.base_name) + signature_string;
365 }
366
367 // Load annotations
368 if(!m.annotations.empty())
369 {
371 m.annotations,
372 type_checked_cast<annotated_typet>(static_cast<typet &>(member_type))
373 .get_annotations());
374 }
375
376 method_symbol.type=member_type;
377 // Not used in jbmc at present, but other codebases that use jbmc as a library
378 // use this information.
379 method_symbol.type.set(ID_C_abstract, m.is_abstract);
380 set_declaring_class(method_symbol, class_symbol.name);
381
383 m.annotations, "java::org.cprover.MustNotThrow"))
384 {
385 method_symbol.type.set(ID_C_must_not_throw, true);
386 }
387
388 // Assign names to parameters in the method symbol
389 java_method_typet &method_type = to_java_method_type(method_symbol.type);
390 method_type.set_is_final(m.is_final);
391 java_method_typet::parameterst &parameters = method_type.parameters();
393 java_method_parameter_slots(method_type);
395 m, method_identifier, parameters, slots_for_parameters);
396
397 symbol_table.add(method_symbol);
398
399 if(!m.is_static)
400 {
401 java_class_typet::methodt new_method{method_symbol.name, method_type};
402 new_method.set_base_name(method_symbol.base_name);
403 new_method.set_pretty_name(method_symbol.pretty_name);
404 new_method.set_access(member_type.get_access());
405 new_method.set_descriptor(m.descriptor);
406
407 to_java_class_type(class_symbol.type)
408 .methods()
409 .emplace_back(std::move(new_method));
410 }
411}
412
414 const irep_idt &class_identifier,
416{
417 return
418 id2string(class_identifier) + "." + id2string(method.name) + ":" +
419 method.descriptor;
420}
421
424 const irep_idt &method_identifier,
426 const java_bytecode_convert_methodt::method_offsett &slots_for_parameters)
427{
428 auto variables = variablest{};
429 // Find parameter names in the local variable table
430 // and store the result in the external variables vector
431 for(const auto &v : m.local_variable_table)
432 {
433 // Skip this variable if it is not a method parameter
434 if(v.index >= slots_for_parameters)
435 continue;
436
437 std::ostringstream id_oss;
438 id_oss << method_identifier << "::" << v.name;
439 irep_idt identifier(id_oss.str());
440 symbol_exprt result = symbol_exprt::typeless(identifier);
441 result.set(ID_C_base_name, v.name);
442
443 // Create a new variablet in the variables vector; in fact this entry will
444 // be rewritten below in the loop that iterates through the method
445 // parameters; the only field that seem to be useful to write here is the
446 // symbol_expr, others will be rewritten
447 variables[v.index].emplace_back(result, v.start_pc, v.length);
448 }
449
450 // The variables is a expanding_vectort, and the loop above may have expanded
451 // the vector introducing gaps where the entries are empty vectors. We now
452 // make sure that the vector of each LV slot contains exactly one variablet,
453 // which we will add below
454 std::size_t param_index = 0;
455 for(const auto &param : parameters)
456 {
457 INVARIANT(
458 variables[param_index].size() <= 1,
459 "should have at most one entry per index");
460 param_index += java_local_variable_slots(param.type());
461 }
462 INVARIANT(
463 param_index == slots_for_parameters,
464 "java_parameter_count and local computation must agree");
465 param_index = 0;
466 for(auto &param : parameters)
467 {
468 irep_idt base_name, identifier;
469
470 // Construct a sensible base name (base_name) and a fully qualified name
471 // (identifier) for every parameter of the method under translation,
472 // regardless of whether we have an LVT or not; and assign it to the
473 // parameter object (which is stored in the type of the symbol, not in the
474 // symbol table)
475 if(param_index == 0 && param.get_this())
476 {
477 // my.package.ClassName.myMethodName:(II)I::this
478 base_name = ID_this;
479 identifier = id2string(method_identifier) + "::" + id2string(base_name);
480 }
481 else if(!variables[param_index].empty())
482 {
483 // if already present in the LVT ...
484 base_name = variables[param_index][0].symbol_expr.get(ID_C_base_name);
485 identifier = variables[param_index][0].symbol_expr.get(ID_identifier);
486 }
487 else
488 {
489 // my.package.ClassName.myMethodName:(II)I::argNT, where N is the local
490 // variable slot where the parameter is stored and T is a character
491 // indicating the type
492 char suffix = java_char_from_type(param.type());
493 base_name = "arg" + std::to_string(param_index) + suffix;
494 identifier = id2string(method_identifier) + "::" + id2string(base_name);
495 }
496 param.set_base_name(base_name);
497 param.set_identifier(identifier);
498 param_index += java_local_variable_slots(param.type());
499 }
500 // The parameter slots detected in this function should agree with what
501 // java_parameter_count() thinks about this method
502 INVARIANT(
503 param_index == slots_for_parameters,
504 "java_parameter_count and local computation must agree");
505}
506
508 const java_method_typet::parameterst &parameters,
509 expanding_vectort<std::vector<java_bytecode_convert_methodt::variablet>>
510 &variables,
511 symbol_table_baset &symbol_table)
512{
513 std::size_t param_index = 0;
514 for(const auto &param : parameters)
515 {
516 parameter_symbolt parameter_symbol;
517 parameter_symbol.base_name = param.get_base_name();
518 parameter_symbol.mode = ID_java;
519 parameter_symbol.name = param.get_identifier();
520 parameter_symbol.type = param.type();
521 symbol_table.add(parameter_symbol);
522
523 // Add as a JVM local variable
524 variables[param_index].clear();
525 variables[param_index].emplace_back(
526 parameter_symbol.symbol_expr(),
527 0,
528 std::numeric_limits<size_t>::max(),
529 true);
530 param_index += java_local_variable_slots(param.type());
531 }
532}
533
535 const symbolt &class_symbol,
536 const methodt &m,
537 const optionalt<prefix_filtert> &method_context)
538{
539 // Construct the fully qualified method name
540 // (e.g. "my.package.ClassName.myMethodName:(II)I") and query the symbol table
541 // to retrieve the symbol (constructed by java_bytecode_convert_method_lazy)
542 // associated to the method
543 const irep_idt method_identifier =
544 get_method_identifier(class_symbol.name, m);
545
546 method_id = method_identifier;
548 symbol_table.get_writeable_ref(method_identifier), class_symbol.name);
549
550 // Obtain a std::vector of java_method_typet::parametert objects from the
551 // (function) type of the symbol
552 // Don't try to hang on to this reference into the symbol table, as we're
553 // about to create symbols for the method's parameters, which would invalidate
554 // the reference. Instead, copy the type here, set it up, then assign it back
555 // to the symbol later.
556 java_method_typet method_type =
558 method_type.set_is_final(m.is_final);
559 method_return_type = method_type.return_type();
560 java_method_typet::parameterst &parameters = method_type.parameters();
561
562 // Determine the number of local variable slots used by the JVM to maintain
563 // the formal parameters
565
566 log.debug() << "Generating codet: class " << class_symbol.name << ", method "
567 << m.name << messaget::eom;
568
569 // Add parameter symbols to the symbol table
571
572 symbolt &method_symbol = symbol_table.get_writeable_ref(method_identifier);
573
574 // Check the fields that can't change are valid
575 INVARIANT(
576 method_symbol.name == method_identifier,
577 "Name of method symbol shouldn't change");
578 INVARIANT(
579 method_symbol.base_name == m.base_name,
580 "Base name of method symbol shouldn't change");
581 INVARIANT(
582 method_symbol.module.empty(),
583 "Method symbol shouldn't have module");
584 // Update the symbol for the method
585 method_symbol.mode=ID_java;
586 method_symbol.location=m.source_location;
587 method_symbol.location.set_function(method_identifier);
588
589 for(const auto &exception_name : m.throws_exception_table)
590 method_type.add_throws_exception(exception_name);
591
592 const std::string signature_string = pretty_signature(method_type);
593
594 // Set up the pretty name for the method entry in the symbol table.
595 // The pretty name of a constructor includes the base name of the class
596 // instead of the internal method name "<init>". For regular methods, it's
597 // just the base name of the method.
598 if(is_constructor(method_symbol.base_name))
599 {
600 // we use full.class_name(...) as pretty name
601 // for constructors -- the idea is that they have
602 // an empty declarator.
603 method_symbol.pretty_name =
604 id2string(class_symbol.pretty_name) + signature_string;
605 INVARIANT(
606 method_type.get_is_constructor(),
607 "Member type should have already been marked as a constructor");
608 }
609 else
610 {
611 method_symbol.pretty_name = id2string(class_symbol.pretty_name) + "." +
612 id2string(m.base_name) + signature_string;
613 }
614
615 method_symbol.type = method_type;
616 current_method = method_symbol.name;
617 method_has_this = method_type.has_this();
618 if((!m.is_abstract) && (!m.is_native))
619 {
620 // Do not convert if method is not in context
621 if(!method_context || (*method_context)(id2string(method_identifier)))
622 {
623 code_blockt code{convert_parameter_annotations(m, method_type)};
625 method_symbol.value = std::move(code);
626 }
627 }
628}
629
630static irep_idt get_if_cmp_operator(const u1 bytecode)
631{
632 if(bytecode == patternt("if_?cmplt"))
633 return ID_lt;
634 if(bytecode == patternt("if_?cmple"))
635 return ID_le;
636 if(bytecode == patternt("if_?cmpgt"))
637 return ID_gt;
638 if(bytecode == patternt("if_?cmpge"))
639 return ID_ge;
640 if(bytecode == patternt("if_?cmpeq"))
641 return ID_equal;
642 if(bytecode == patternt("if_?cmpne"))
643 return ID_notequal;
644
645 throw "unhandled java comparison instruction";
646}
647
657 const exprt &pointer,
658 const fieldref_exprt &field_reference,
659 const namespacet &ns)
660{
661 struct_tag_typet class_type(field_reference.class_name());
662
663 const exprt typed_pointer =
665
666 const irep_idt &component_name = field_reference.component_name();
667
668 exprt accessed_object = checked_dereference(typed_pointer);
669 const auto type_of = [&ns](const exprt &object) {
670 return to_struct_type(ns.follow(object.type()));
671 };
672
673 // The field access is described as being against a particular type, but it
674 // may in fact belong to any ancestor type.
675 while(type_of(accessed_object).get_component(component_name).is_nil())
676 {
677 const auto components = type_of(accessed_object).components();
678 INVARIANT(
679 components.size() != 0,
680 "infer_opaque_type_fields should guarantee that a member access has a "
681 "corresponding field");
682
683 // Base class access is always done through the first component
684 accessed_object = member_exprt(accessed_object, components.front());
685 }
686 return member_exprt(
687 accessed_object, type_of(accessed_object).get_component(component_name));
688}
689
696 codet &repl,
697 const irep_idt &old_label,
698 const irep_idt &new_label)
699{
700 const auto &stmt=repl.get_statement();
701 if(stmt==ID_goto)
702 {
703 auto &g=to_code_goto(repl);
704 if(g.get_destination()==old_label)
705 g.set_destination(new_label);
706 }
707 else
708 {
709 for(auto &op : repl.operands())
710 if(op.id()==ID_code)
711 replace_goto_target(to_code(op), old_label, new_label);
712 }
713}
714
730 block_tree_nodet &tree,
731 code_blockt &this_block,
732 method_offsett address_start,
733 method_offsett address_limit,
734 method_offsett next_block_start_address)
735{
736 address_mapt dummy;
738 tree,
739 this_block,
740 address_start,
741 address_limit,
742 next_block_start_address,
743 dummy,
744 false);
745}
746
767 block_tree_nodet &tree,
768 code_blockt &this_block,
769 method_offsett address_start,
770 method_offsett address_limit,
771 method_offsett next_block_start_address,
772 const address_mapt &amap,
773 bool allow_merge)
774{
775 // Check the tree shape invariant:
776 PRECONDITION(tree.branch.size() == tree.branch_addresses.size());
777
778 // If there are no child blocks, return this.
779 if(tree.leaf)
780 return this_block;
781 PRECONDITION(!tree.branch.empty());
782
783 // Find child block starting > address_start:
784 const auto afterstart=
785 std::upper_bound(
786 tree.branch_addresses.begin(),
787 tree.branch_addresses.end(),
788 address_start);
789 CHECK_RETURN(afterstart != tree.branch_addresses.begin());
790 auto findstart=afterstart;
791 --findstart;
792 auto child_offset=
793 std::distance(tree.branch_addresses.begin(), findstart);
794
795 // Find child block starting >= address_limit:
796 auto findlim=
797 std::lower_bound(
798 tree.branch_addresses.begin(),
799 tree.branch_addresses.end(),
800 address_limit);
801 const auto findlim_block_start_address =
802 findlim == tree.branch_addresses.end() ? next_block_start_address
803 : (*findlim);
804
805 // If all children are in scope, return this.
806 if(findstart==tree.branch_addresses.begin() &&
807 findlim==tree.branch_addresses.end())
808 return this_block;
809
810 // Find the child code_blockt where the queried range begins:
811 auto child_iter = this_block.statements().begin();
812 // Skip any top-of-block declarations;
813 // all other children are labelled subblocks.
814 while(child_iter != this_block.statements().end() &&
815 child_iter->get_statement() == ID_decl)
816 ++child_iter;
817 CHECK_RETURN(child_iter != this_block.statements().end());
818 std::advance(child_iter, child_offset);
819 CHECK_RETURN(child_iter != this_block.statements().end());
820 auto &child_label=to_code_label(*child_iter);
821 auto &child_block=to_code_block(child_label.code());
822
823 bool single_child(afterstart==findlim);
824 if(single_child)
825 {
826 // Range wholly contained within a child block
828 tree.branch[child_offset],
829 child_block,
830 address_start,
831 address_limit,
832 findlim_block_start_address,
833 amap,
834 allow_merge);
835 }
836
837 // Otherwise we're being asked for a range of subblocks, but not all of them.
838 // If it's legal to draw a new lexical scope around the requested subset,
839 // do so; otherwise just return this block.
840
841 // This can be a new lexical scope if all incoming edges target the
842 // new block header, or come from within the suggested new block.
843
844 // If modifying the block tree is forbidden, give up and return this:
845 if(!allow_merge)
846 return this_block;
847
848 // Check for incoming control-flow edges targeting non-header
849 // blocks of the new proposed block range:
850 auto checkit=amap.find(*findstart);
851 CHECK_RETURN(checkit != amap.end());
852 ++checkit; // Skip the header, which can have incoming edges from outside.
853 for(;
854 checkit!=amap.end() && (checkit->first)<(findlim_block_start_address);
855 ++checkit)
856 {
857 for(auto p : checkit->second.predecessors)
858 {
859 if(p<(*findstart) || p>=findlim_block_start_address)
860 {
861 log.debug() << "Generating codet: "
862 << "warning: refusing to create lexical block spanning "
863 << (*findstart) << "-" << findlim_block_start_address
864 << " due to incoming edge " << p << " -> " << checkit->first
865 << messaget::eom;
866 return this_block;
867 }
868 }
869 }
870
871 // All incoming edges are acceptable! Create a new block wrapping
872 // the relevant children. Borrow the header block's label, and redirect
873 // any block-internal edges to target the inner header block.
874
875 const irep_idt child_label_name=child_label.get_label();
876 std::string new_label_str = id2string(child_label_name);
877 new_label_str+='$';
878 irep_idt new_label_irep(new_label_str);
879
880 code_labelt newlabel(child_label_name, code_blockt());
881 code_blockt &newblock=to_code_block(newlabel.code());
882 auto nblocks=std::distance(findstart, findlim);
883 CHECK_RETURN(nblocks >= 2);
884 log.debug() << "Generating codet: combining "
885 << std::distance(findstart, findlim) << " blocks for addresses "
886 << (*findstart) << "-" << findlim_block_start_address
887 << messaget::eom;
888
889 // Make a new block containing every child of interest:
890 auto &this_block_children = this_block.statements();
891 CHECK_RETURN(tree.branch.size() == this_block_children.size());
892 for(auto blockidx=child_offset, blocklim=child_offset+nblocks;
893 blockidx!=blocklim;
894 ++blockidx)
895 newblock.add(this_block_children[blockidx]);
896
897 // Relabel the inner header:
898 to_code_label(newblock.statements()[0]).set_label(new_label_irep);
899 // Relabel internal gotos:
900 replace_goto_target(newblock, child_label_name, new_label_irep);
901
902 // Remove the now-empty sibling blocks:
903 auto delfirst=this_block_children.begin();
904 std::advance(delfirst, child_offset+1);
905 auto dellim=delfirst;
906 std::advance(dellim, nblocks-1);
907 this_block_children.erase(delfirst, dellim);
908 this_block_children[child_offset].swap(newlabel);
909
910 // Perform the same transformation on the index tree:
911 block_tree_nodet newnode;
912 auto branchstart=tree.branch.begin();
913 std::advance(branchstart, child_offset);
914 auto branchlim=branchstart;
915 std::advance(branchlim, nblocks);
916 for(auto branchiter=branchstart; branchiter!=branchlim; ++branchiter)
917 newnode.branch.push_back(std::move(*branchiter));
918 ++branchstart;
919 tree.branch.erase(branchstart, branchlim);
920
921 CHECK_RETURN(tree.branch.size() == this_block_children.size());
922
923 auto branchaddriter=tree.branch_addresses.begin();
924 std::advance(branchaddriter, child_offset);
925 auto branchaddrlim=branchaddriter;
926 std::advance(branchaddrlim, nblocks);
927 newnode.branch_addresses.insert(
928 newnode.branch_addresses.begin(),
929 branchaddriter,
930 branchaddrlim);
931
932 ++branchaddriter;
933 tree.branch_addresses.erase(branchaddriter, branchaddrlim);
934
935 tree.branch[child_offset]=std::move(newnode);
936
937 CHECK_RETURN(tree.branch.size() == tree.branch_addresses.size());
938
939 return
942 this_block_children[child_offset]).code());
943}
944
947 const exprt &e,
948 std::map<irep_idt, java_bytecode_convert_methodt::variablet> &result)
949{
950 if(e.id()==ID_symbol)
951 {
952 const auto &symexpr=to_symbol_expr(e);
953 auto findit = result.emplace(
954 std::piecewise_construct,
955 std::forward_as_tuple(symexpr.get_identifier()),
956 std::forward_as_tuple(symexpr, pc, 1));
957 if(!findit.second)
958 {
959 auto &var = findit.first->second;
960
961 if(pc<var.start_pc)
962 {
963 var.length+=(var.start_pc-pc);
964 var.start_pc=pc;
965 }
966 else
967 {
968 var.length=std::max(var.length, (pc-var.start_pc)+1);
969 }
970 }
971 }
972 else
973 {
974 for(const auto &op : e.operands())
975 gather_symbol_live_ranges(pc, op, result);
976 }
977}
978
985 const irep_idt &classname)
986{
987 auto findit = symbol_table.symbols.find(clinit_wrapper_name(classname));
988 if(findit == symbol_table.symbols.end())
989 return code_skipt();
990 else
991 {
992 const code_function_callt ret(findit->second.symbol_expr());
994 needed_lazy_methods->add_needed_method(findit->second.name);
995 return ret;
996 }
997}
998
999static std::size_t get_bytecode_type_width(const typet &ty)
1000{
1001 if(ty.id()==ID_pointer)
1002 return 32;
1003 return to_bitvector_type(ty).get_width();
1004}
1005
1007 const methodt &method,
1008 const java_method_typet &method_type)
1009{
1010 code_blockt code;
1011
1012 // Consider parameter annotations
1013 const java_method_typet::parameterst &parameters(method_type.parameters());
1014 std::size_t param_index = method_type.has_this() ? 1 : 0;
1016 parameters.size() >= method.parameter_annotations.size() + param_index,
1017 "parameters and parameter annotations mismatch");
1018 for(const auto &param_annotations : method.parameter_annotations)
1019 {
1020 // NotNull annotations are not standardized. We support these:
1021 if(
1023 param_annotations, "java::javax.validation.constraints.NotNull") ||
1025 param_annotations, "java::org.jetbrains.annotations.NotNull") ||
1027 param_annotations, "org.eclipse.jdt.annotation.NonNull") ||
1029 param_annotations, "java::edu.umd.cs.findbugs.annotations.NonNull"))
1030 {
1031 const irep_idt &param_identifier =
1032 parameters[param_index].get_identifier();
1033 const symbolt &param_symbol = symbol_table.lookup_ref(param_identifier);
1034 const auto param_type =
1036 if(param_type)
1037 {
1038 code_assertt code_assert(notequal_exprt(
1039 param_symbol.symbol_expr(), null_pointer_exprt(*param_type)));
1040 source_locationt check_loc = method.source_location;
1041 check_loc.set_comment("Not null annotation check");
1042 check_loc.set_property_class("not-null-annotation-check");
1043 code_assert.add_source_location() = check_loc;
1044
1045 code.add(std::move(code_assert));
1046 }
1047 }
1048 ++param_index;
1049 }
1050
1051 return code;
1052}
1053
1056{
1057 const instructionst &instructions=method.instructions;
1058
1059 // Run a worklist algorithm, assuming that the bytecode has not
1060 // been tampered with. See "Leroy, X. (2003). Java bytecode
1061 // verification: algorithms and formalizations. Journal of Automated
1062 // Reasoning, 30(3-4), 235-269." for a more complete treatment.
1063
1064 // first pass: get targets and map addresses to instructions
1065
1066 address_mapt address_map;
1067 std::set<method_offsett> targets;
1068
1069 std::vector<method_offsett> jsr_ret_targets;
1070 std::vector<instructionst::const_iterator> ret_instructions;
1071
1072 for(auto i_it = instructions.begin(); i_it != instructions.end(); i_it++)
1073 {
1075 std::pair<address_mapt::iterator, bool> a_entry=
1076 address_map.insert(std::make_pair(i_it->address, ins));
1077 CHECK_RETURN(a_entry.second);
1078 // addresses are strictly increasing, hence we must have inserted
1079 // a new maximal key
1080 CHECK_RETURN(a_entry.first == --address_map.end());
1081
1082 const auto bytecode = i_it->bytecode;
1083 const std::string statement = bytecode_info[i_it->bytecode].mnemonic;
1084
1085 // clang-format off
1086 if(bytecode != BC_goto &&
1087 bytecode != BC_return &&
1088 bytecode != patternt("?return") &&
1089 bytecode != BC_athrow &&
1090 bytecode != BC_jsr &&
1091 bytecode != BC_jsr_w &&
1092 bytecode != BC_ret)
1093 {
1094 // clang-format on
1095 instructionst::const_iterator next=i_it;
1096 if(++next!=instructions.end())
1097 a_entry.first->second.successors.push_back(next->address);
1098 }
1099
1100 // clang-format off
1101 if(bytecode == BC_athrow ||
1102 bytecode == BC_putfield ||
1103 bytecode == BC_getfield ||
1104 bytecode == BC_checkcast ||
1105 bytecode == BC_newarray ||
1106 bytecode == BC_anewarray ||
1107 bytecode == BC_idiv ||
1108 bytecode == BC_ldiv ||
1109 bytecode == BC_irem ||
1110 bytecode == BC_lrem ||
1111 bytecode == patternt("?astore") ||
1112 bytecode == patternt("?aload") ||
1113 bytecode == BC_invokestatic ||
1114 bytecode == BC_invokevirtual ||
1115 bytecode == BC_invokespecial ||
1116 bytecode == BC_invokeinterface ||
1118 (bytecode == BC_monitorenter || bytecode == BC_monitorexit)))
1119 {
1120 // clang-format on
1121 const std::vector<method_offsett> handler =
1122 try_catch_handler(i_it->address, method.exception_table);
1123 std::list<method_offsett> &successors = a_entry.first->second.successors;
1124 successors.insert(successors.end(), handler.begin(), handler.end());
1125 targets.insert(handler.begin(), handler.end());
1126 }
1127
1128 // clang-format off
1129 if(bytecode == BC_goto ||
1130 bytecode == patternt("if_?cmp??") ||
1131 bytecode == patternt("if??") ||
1132 bytecode == BC_ifnonnull ||
1133 bytecode == BC_ifnull ||
1134 bytecode == BC_jsr ||
1135 bytecode == BC_jsr_w)
1136 {
1137 // clang-format on
1138 PRECONDITION(!i_it->args.empty());
1139
1140 auto target = numeric_cast_v<unsigned>(to_constant_expr(i_it->args[0]));
1141 targets.insert(target);
1142
1143 a_entry.first->second.successors.push_back(target);
1144
1145 if(bytecode == BC_jsr || bytecode == BC_jsr_w)
1146 {
1147 auto next = std::next(i_it);
1149 next != instructions.end(), "jsr should have valid return address");
1150 targets.insert(next->address);
1151 jsr_ret_targets.push_back(next->address);
1152 }
1153 }
1154 else if(bytecode == BC_tableswitch || bytecode == BC_lookupswitch)
1155 {
1156 bool is_label=true;
1157 for(const auto &arg : i_it->args)
1158 {
1159 if(is_label)
1160 {
1161 auto target = numeric_cast_v<unsigned>(to_constant_expr(arg));
1162 targets.insert(target);
1163 a_entry.first->second.successors.push_back(target);
1164 }
1165 is_label=!is_label;
1166 }
1167 }
1168 else if(bytecode == BC_ret)
1169 {
1170 // Finish these later, once we've seen all jsr instructions.
1171 ret_instructions.push_back(i_it);
1172 }
1173 }
1174 draw_edges_from_ret_to_jsr(address_map, jsr_ret_targets, ret_instructions);
1175
1176 for(const auto &address : address_map)
1177 {
1178 for(auto s : address.second.successors)
1179 {
1180 const auto a_it = address_map.find(s);
1181 CHECK_RETURN(a_it != address_map.end());
1182 a_it->second.predecessors.insert(address.first);
1183 }
1184 }
1185
1186 // Clean the list of temporary variables created by a call to `tmp_variable`.
1187 // These are local variables in the goto function used to represent temporary
1188 // values of the JVM operand stack, newly allocated objects before the
1189 // constructor is called, ...
1190 tmp_vars.clear();
1191
1192 // Now that the control flow graph is built, set up our local variables
1193 // (these require the graph to determine live ranges)
1194 setup_local_variables(method, address_map);
1195
1196 std::set<method_offsett> working_set;
1197
1198 if(!instructions.empty())
1199 working_set.insert(instructions.front().address);
1200
1201 while(!working_set.empty())
1202 {
1203 auto cur = working_set.begin();
1204 auto address_it = address_map.find(*cur);
1205 CHECK_RETURN(address_it != address_map.end());
1206 auto &instruction = address_it->second;
1207 const method_offsett cur_pc = *cur;
1208 working_set.erase(cur);
1209
1210 if(instruction.done)
1211 continue;
1212 working_set.insert(
1213 instruction.successors.begin(), instruction.successors.end());
1214
1215 instructionst::const_iterator i_it = instruction.source;
1216 stack.swap(instruction.stack);
1217 instruction.stack.clear();
1218 codet &c = instruction.code;
1219
1220 INVARIANT(
1221 stack.empty() || instruction.predecessors.size() <= 1 ||
1222 has_prefix(stack.front().get_string(ID_C_base_name), "$stack"),
1223 "inconsistent stack");
1224
1225 exprt arg0=i_it->args.size()>=1?i_it->args[0]:nil_exprt();
1226 exprt arg1=i_it->args.size()>=2?i_it->args[1]:nil_exprt();
1227
1228 const auto bytecode = i_it->bytecode;
1229 const bytecode_infot &stmt_bytecode_info = bytecode_info[i_it->bytecode];
1230 const std::string statement = stmt_bytecode_info.mnemonic;
1231
1232 // deal with _idx suffixes
1233 if(statement.size()>=2 &&
1234 statement[statement.size()-2]=='_' &&
1235 isdigit(statement[statement.size()-1]))
1236 {
1237 arg0=
1240 std::string(id2string(statement), statement.size()-1, 1)),
1241 java_int_type());
1242 }
1243
1244 typet catch_type;
1245
1246 // Find catch blocks that begin here. For now we assume if more than
1247 // one catch targets the same bytecode then we must be indifferent to
1248 // its type and just call it a Throwable.
1249 auto it=method.exception_table.begin();
1250 for(; it!=method.exception_table.end(); ++it)
1251 {
1252 if(cur_pc==it->handler_pc)
1253 {
1254 if(
1255 catch_type != typet() ||
1256 it->catch_type == struct_tag_typet(irep_idt()))
1257 {
1258 catch_type = struct_tag_typet("java::java.lang.Throwable");
1259 break;
1260 }
1261 else
1262 catch_type=it->catch_type;
1263 }
1264 }
1265
1266 optionalt<codet> catch_instruction;
1267
1268 if(catch_type!=typet())
1269 {
1270 // at the beginning of a handler, clear the stack and
1271 // push the corresponding exceptional return variable
1272 // We also create a catch exception instruction that
1273 // precedes the catch block, and which remove_exceptionst
1274 // will transform into something like:
1275 // catch_var = GLOBAL_THROWN_EXCEPTION;
1276 // GLOBAL_THROWN_EXCEPTION = null;
1277 stack.clear();
1278 symbol_exprt catch_var=
1280 "caught_exception",
1281 java_reference_type(catch_type));
1282 stack.push_back(catch_var);
1283 catch_instruction = code_landingpadt(catch_var);
1284 }
1285
1286 exprt::operandst op = pop(stmt_bytecode_info.pop);
1287 exprt::operandst results;
1288 results.resize(stmt_bytecode_info.push, nil_exprt());
1289
1290 if(bytecode == BC_aconst_null)
1291 {
1292 PRECONDITION(results.size() == 1);
1294 }
1295 else if(bytecode == BC_athrow)
1296 {
1297 PRECONDITION(op.size() == 1 && results.size() == 1);
1298 convert_athrow(i_it->source_location, op, c, results);
1299 }
1300 else if(bytecode == BC_checkcast)
1301 {
1302 // checkcast throws an exception in case a cast of object
1303 // on stack to given type fails.
1304 // The stack isn't modified.
1305 PRECONDITION(op.size() == 1 && results.size() == 1);
1306 convert_checkcast(arg0, op, c, results);
1307 }
1308 else if(bytecode == BC_invokedynamic)
1309 {
1310 if(
1311 const auto res =
1312 convert_invoke_dynamic(i_it->source_location, i_it->address, arg0, c))
1313 {
1314 results.resize(1);
1315 results[0] = *res;
1316 }
1317 }
1318 else if(
1319 bytecode == BC_invokestatic && id2string(arg0.get(ID_identifier)) ==
1320 "java::org.cprover.CProver.assume:(Z)V")
1321 {
1322 const java_method_typet &method_type = to_java_method_type(arg0.type());
1323 INVARIANT(
1324 method_type.parameters().size() == 1,
1325 "function expected to have exactly one parameter");
1326 c = replace_call_to_cprover_assume(i_it->source_location, c);
1327 }
1328 // replace calls to CProver.atomicBegin
1329 else if(
1330 bytecode == BC_invokestatic &&
1331 arg0.get(ID_identifier) == "java::org.cprover.CProver.atomicBegin:()V")
1332 {
1333 c = codet(ID_atomic_begin);
1334 }
1335 // replace calls to CProver.atomicEnd
1336 else if(
1337 bytecode == BC_invokestatic &&
1338 arg0.get(ID_identifier) == "java::org.cprover.CProver.atomicEnd:()V")
1339 {
1340 c = codet(ID_atomic_end);
1341 }
1342 else if(
1343 bytecode == BC_invokeinterface || bytecode == BC_invokespecial ||
1344 bytecode == BC_invokevirtual || bytecode == BC_invokestatic)
1345 {
1346 class_method_descriptor_exprt *class_method_descriptor =
1348
1349 INVARIANT(
1350 class_method_descriptor,
1351 "invokeinterface, invokespecial, invokevirtual and invokestatic should "
1352 "be called with a class method descriptor expression as arg0");
1353
1355 i_it->source_location, statement, *class_method_descriptor, c, results);
1356 }
1357 else if(bytecode == BC_return)
1358 {
1359 PRECONDITION(op.empty() && results.empty());
1361 }
1362 else if(bytecode == patternt("?return"))
1363 {
1364 // Return types are promoted in java, so this might need
1365 // conversion.
1366 PRECONDITION(op.size() == 1 && results.empty());
1367 const exprt r =
1370 }
1371 else if(bytecode == patternt("?astore"))
1372 {
1373 PRECONDITION(results.empty());
1374 c = convert_astore(statement, op, i_it->source_location);
1375 }
1376 else if(bytecode == patternt("?store") || bytecode == patternt("?store_?"))
1377 {
1378 // store value into some local variable
1379 PRECONDITION(op.size() == 1 && results.empty());
1380 c = convert_store(
1381 statement, arg0, op, i_it->address, i_it->source_location);
1382 }
1383 else if(bytecode == patternt("?aload"))
1384 {
1385 PRECONDITION(results.size() == 1);
1386 results[0] = convert_aload(statement, op);
1387 }
1388 else if(bytecode == patternt("?load") || bytecode == patternt("?load_?"))
1389 {
1390 // load a value from a local variable
1391 results[0] = convert_load(arg0, statement[0], i_it->address);
1392 }
1393 else if(bytecode == BC_ldc || bytecode == BC_ldc_w || bytecode == BC_ldc2_w)
1394 {
1395 PRECONDITION(op.empty() && results.size() == 1);
1396
1397 INVARIANT(
1398 !can_cast_expr<java_string_literal_exprt>(arg0) && arg0.id() != ID_type,
1399 "String and Class literals should have been lowered in "
1400 "generate_constant_global_variables");
1401
1402 results[0] = arg0;
1403 }
1404 else if(bytecode == BC_goto || bytecode == BC_goto_w)
1405 {
1406 PRECONDITION(op.empty() && results.empty());
1407 const mp_integer number =
1409 code_gotot code_goto(label(integer2string(number)));
1410 c=code_goto;
1411 }
1412 else if(bytecode == BC_jsr || bytecode == BC_jsr_w)
1413 {
1414 // As 'goto', except we must also push the subroutine return address:
1415 PRECONDITION(op.empty() && results.size() == 1);
1416 const mp_integer number =
1418 code_gotot code_goto(label(integer2string(number)));
1419 c=code_goto;
1420 results[0]=
1422 std::next(i_it)->address,
1423 unsignedbv_typet(64));
1424 results[0].type() = pointer_type(java_void_type());
1425 }
1426 else if(bytecode == BC_ret)
1427 {
1428 // Since we have a bounded target set, make life easier on our analyses
1429 // and write something like:
1430 // if(retaddr==5) goto 5; else if(retaddr==10) goto 10; ...
1431 PRECONDITION(op.empty() && results.empty());
1432 PRECONDITION(!jsr_ret_targets.empty());
1433 c = convert_ret(
1434 jsr_ret_targets, arg0, i_it->source_location, i_it->address);
1435 }
1436 else if(bytecode == BC_iconst_m1)
1437 {
1438 CHECK_RETURN(results.size() == 1);
1439 results[0]=from_integer(-1, java_int_type());
1440 }
1441 else if(bytecode == patternt("?const_?"))
1442 {
1443 CHECK_RETURN(results.size() == 1);
1444 results = convert_const(statement, to_constant_expr(arg0), results);
1445 }
1446 else if(bytecode == patternt("?ipush"))
1447 {
1448 CHECK_RETURN(results.size() == 1);
1450 arg0.is_constant(), "ipush argument expected to be constant");
1451 results[0] = typecast_exprt::conditional_cast(arg0, java_int_type());
1452 }
1453 else if(bytecode == patternt("if_?cmp??"))
1454 {
1455 PRECONDITION(op.size() == 2 && results.empty());
1456 const mp_integer number =
1458 c = convert_if_cmp(
1459 address_map, bytecode, op, number, i_it->source_location);
1460 }
1461 else if(bytecode == patternt("if??"))
1462 {
1463 // clang-format off
1464 const irep_idt id=
1465 bytecode == BC_ifeq ? ID_equal :
1466 bytecode == BC_ifne ? ID_notequal :
1467 bytecode == BC_iflt ? ID_lt :
1468 bytecode == BC_ifge ? ID_ge :
1469 bytecode == BC_ifgt ? ID_gt :
1470 bytecode == BC_ifle ? ID_le :
1471 irep_idt();
1472 // clang-format on
1473
1474 INVARIANT(!id.empty(), "unexpected bytecode-if");
1475 PRECONDITION(op.size() == 1 && results.empty());
1476 const mp_integer number =
1478 c = convert_if(address_map, op, id, number, i_it->source_location);
1479 }
1480 else if(bytecode == patternt("ifnonnull"))
1481 {
1482 PRECONDITION(op.size() == 1 && results.empty());
1483 const mp_integer number =
1485 c = convert_ifnonull(address_map, op, number, i_it->source_location);
1486 }
1487 else if(bytecode == patternt("ifnull"))
1488 {
1489 PRECONDITION(op.size() == 1 && results.empty());
1490 const mp_integer number =
1492 c = convert_ifnull(address_map, op, number, i_it->source_location);
1493 }
1494 else if(bytecode == BC_iinc)
1495 {
1496 c = convert_iinc(arg0, arg1, i_it->source_location, i_it->address);
1497 }
1498 else if(bytecode == patternt("?xor"))
1499 {
1500 PRECONDITION(op.size() == 2 && results.size() == 1);
1501 results[0]=bitxor_exprt(op[0], op[1]);
1502 }
1503 else if(bytecode == patternt("?or"))
1504 {
1505 PRECONDITION(op.size() == 2 && results.size() == 1);
1506 results[0]=bitor_exprt(op[0], op[1]);
1507 }
1508 else if(bytecode == patternt("?and"))
1509 {
1510 PRECONDITION(op.size() == 2 && results.size() == 1);
1511 results[0]=bitand_exprt(op[0], op[1]);
1512 }
1513 else if(bytecode == patternt("?shl"))
1514 {
1515 PRECONDITION(op.size() == 2 && results.size() == 1);
1516 results = convert_shl(statement, op, results);
1517 }
1518 else if(bytecode == patternt("?shr"))
1519 {
1520 PRECONDITION(op.size() == 2 && results.size() == 1);
1521 results[0]=ashr_exprt(op[0], op[1]);
1522 }
1523 else if(bytecode == patternt("?ushr"))
1524 {
1525 PRECONDITION(op.size() == 2 && results.size() == 1);
1526 results = convert_ushr(statement, op, results);
1527 }
1528 else if(bytecode == patternt("?add"))
1529 {
1530 PRECONDITION(op.size() == 2 && results.size() == 1);
1531 results[0]=plus_exprt(op[0], op[1]);
1532 }
1533 else if(bytecode == patternt("?sub"))
1534 {
1535 PRECONDITION(op.size() == 2 && results.size() == 1);
1536 results[0]=minus_exprt(op[0], op[1]);
1537 }
1538 else if(bytecode == patternt("?div"))
1539 {
1540 PRECONDITION(op.size() == 2 && results.size() == 1);
1541 results[0]=div_exprt(op[0], op[1]);
1542 }
1543 else if(bytecode == patternt("?mul"))
1544 {
1545 PRECONDITION(op.size() == 2 && results.size() == 1);
1546 results[0]=mult_exprt(op[0], op[1]);
1547 }
1548 else if(bytecode == patternt("?neg"))
1549 {
1550 PRECONDITION(op.size() == 1 && results.size() == 1);
1551 results[0]=unary_minus_exprt(op[0], op[0].type());
1552 }
1553 else if(bytecode == patternt("?rem"))
1554 {
1555 PRECONDITION(op.size() == 2 && results.size() == 1);
1556 // This is _not_ the remainder operation defined by IEEE 754,
1557 // but similar to the fmod C library function.
1558 if(bytecode == BC_frem || bytecode == BC_drem)
1559 results[0] = binary_exprt(op[0], ID_floatbv_mod, op[1]);
1560 else
1561 results[0]=mod_exprt(op[0], op[1]);
1562 }
1563 else if(bytecode == patternt("?cmp"))
1564 {
1565 PRECONDITION(op.size() == 2 && results.size() == 1);
1566 results = convert_cmp(op, results);
1567 }
1568 else if(bytecode == patternt("?cmp?"))
1569 {
1570 PRECONDITION(op.size() == 2 && results.size() == 1);
1571 results = convert_cmp2(statement, op, results);
1572 }
1573 else if(bytecode == patternt("?cmpl"))
1574 {
1575 PRECONDITION(op.size() == 2 && results.size() == 1);
1576 results[0]=binary_relation_exprt(op[0], ID_lt, op[1]);
1577 }
1578 else if(bytecode == BC_dup)
1579 {
1580 PRECONDITION(op.size() == 1 && results.size() == 2);
1581 results[0]=results[1]=op[0];
1582 }
1583 else if(bytecode == BC_dup_x1)
1584 {
1585 PRECONDITION(op.size() == 2 && results.size() == 3);
1586 results[0]=op[1];
1587 results[1]=op[0];
1588 results[2]=op[1];
1589 }
1590 else if(bytecode == BC_dup_x2)
1591 {
1592 PRECONDITION(op.size() == 3 && results.size() == 4);
1593 results[0]=op[2];
1594 results[1]=op[0];
1595 results[2]=op[1];
1596 results[3]=op[2];
1597 }
1598 // dup2* behaviour depends on the size of the operands on the
1599 // stack
1600 else if(bytecode == BC_dup2)
1601 {
1602 PRECONDITION(!stack.empty() && results.empty());
1603 convert_dup2(op, results);
1604 }
1605 else if(bytecode == BC_dup2_x1)
1606 {
1607 PRECONDITION(!stack.empty() && results.empty());
1608 convert_dup2_x1(op, results);
1609 }
1610 else if(bytecode == BC_dup2_x2)
1611 {
1612 PRECONDITION(!stack.empty() && results.empty());
1613 convert_dup2_x2(op, results);
1614 }
1615 else if(bytecode == BC_getfield)
1616 {
1617 PRECONDITION(op.size() == 1 && results.size() == 1);
1618 results[0] = java_bytecode_promotion(
1620 }
1621 else if(bytecode == BC_getstatic)
1622 {
1623 PRECONDITION(op.empty() && results.size() == 1);
1624 const auto &field_name=arg0.get_string(ID_component_name);
1625 const bool is_assertions_disabled_field=
1626 field_name.find("$assertionsDisabled")!=std::string::npos;
1627
1628 const irep_idt field_id(
1629 get_static_field(arg0.get_string(ID_class), field_name));
1630
1631 // Symbol should have been populated by java_bytecode_convert_class:
1632 const symbol_exprt symbol_expr(
1633 symbol_table.lookup_ref(field_id).symbol_expr());
1634
1636 i_it->source_location,
1637 arg0,
1638 symbol_expr,
1639 is_assertions_disabled_field,
1640 c,
1641 results);
1642 }
1643 else if(bytecode == BC_putfield)
1644 {
1645 PRECONDITION(op.size() == 2 && results.empty());
1647 }
1648 else if(bytecode == BC_putstatic)
1649 {
1650 PRECONDITION(op.size() == 1 && results.empty());
1651 const auto &field_name=arg0.get_string(ID_component_name);
1652
1653 const irep_idt field_id(
1654 get_static_field(arg0.get_string(ID_class), field_name));
1655
1656 // Symbol should have been populated by java_bytecode_convert_class:
1657 const symbol_exprt symbol_expr(
1658 symbol_table.lookup_ref(field_id).symbol_expr());
1659
1660 c = convert_putstatic(i_it->source_location, arg0, op, symbol_expr);
1661 }
1662 else if(
1663 bytecode == BC_f2i || bytecode == BC_f2l || bytecode == BC_d2i ||
1664 bytecode == BC_d2l)
1665 {
1666 PRECONDITION(op.size() == 1 && results.size() == 1);
1667 typet src_type = java_type_from_char(statement[0]);
1668 typet dest_type = java_type_from_char(statement[2]);
1669
1670 // See JLS 5.1.3. Narrowing Primitive Conversion
1671 // +-NaN is converted to 0
1672 // +-Inf resp. values beyond the int/long range
1673 // are mapped to max/min of int/long.
1674 // Other values are rounded towards zero
1675
1676 // for int: 2147483647, for long: 9223372036854775807L
1677 exprt largest_as_dest =
1679
1680 // 2147483647 is not exactly representable in float;
1681 // it will be rounded up to 2147483648, which is fine.
1682 // 9223372036854775807L is not exactly representable
1683 // neither in float nor in double; it is rounded up to
1684 // 9223372036854775808.0, which is fine.
1685 exprt largest_as_src =
1686 from_integer(to_integer_bitvector_type(dest_type).largest(), src_type);
1687
1688 // for int: -2147483648, for long: -9223372036854775808L
1689 exprt smallest_as_dest =
1691
1692 // -2147483648 and -9223372036854775808L are exactly
1693 // representable in float and double.
1694 exprt smallest_as_src =
1695 from_integer(to_integer_bitvector_type(dest_type).smallest(), src_type);
1696
1697 results[0] = if_exprt(
1698 binary_relation_exprt(op[0], ID_le, smallest_as_src),
1699 smallest_as_dest,
1700 if_exprt(
1701 binary_relation_exprt(op[0], ID_ge, largest_as_src),
1702 largest_as_dest,
1703 typecast_exprt::conditional_cast(op[0], dest_type)));
1704 }
1705 else if(bytecode == patternt("?2?")) // i2c etc.
1706 {
1707 PRECONDITION(op.size() == 1 && results.size() == 1);
1708 typet type=java_type_from_char(statement[2]);
1709 results[0] = typecast_exprt::conditional_cast(op[0], type);
1710
1711 // These types get converted/truncated then immediately turned back into
1712 // ints again, so we just double-cast here.
1713 if(
1714 type == java_char_type() || type == java_byte_type() ||
1715 type == java_short_type())
1716 {
1717 results[0] = typecast_exprt(results[0], java_int_type());
1718 }
1719 }
1720 else if(bytecode == BC_new)
1721 {
1722 // use temporary since the stack symbol might get duplicated
1723 PRECONDITION(op.empty() && results.size() == 1);
1724 convert_new(i_it->source_location, arg0, c, results);
1725 }
1726 else if(bytecode == BC_newarray || bytecode == BC_anewarray)
1727 {
1728 // the op is the array size
1729 PRECONDITION(op.size() == 1 && results.size() == 1);
1730 c = convert_newarray(i_it->source_location, statement, arg0, op, results);
1731 }
1732 else if(bytecode == BC_multianewarray)
1733 {
1734 // The first argument is the type, the second argument is the number of
1735 // dimensions. The size of each dimension is on the stack.
1736 const std::size_t dimension =
1738
1739 op=pop(dimension);
1740 CHECK_RETURN(results.size() == 1);
1741 c = convert_multianewarray(i_it->source_location, arg0, op, results);
1742 }
1743 else if(bytecode == BC_arraylength)
1744 {
1745 PRECONDITION(op.size() == 1 && results.size() == 1);
1746
1747 // any array type is fine here, so we go for a reference array
1749 PRECONDITION(array.type().id() == ID_struct_tag);
1750 array.set(ID_java_member_access, true);
1751
1752 results[0] = member_exprt{std::move(array), "length", java_int_type()};
1753 }
1754 else if(bytecode == BC_tableswitch || bytecode == BC_lookupswitch)
1755 {
1756 PRECONDITION(op.size() == 1 && results.empty());
1757 c = convert_switch(op, i_it->args, i_it->source_location);
1758 }
1759 else if(bytecode == BC_pop || bytecode == BC_pop2)
1760 {
1761 c = convert_pop(statement, op);
1762 }
1763 else if(bytecode == BC_instanceof)
1764 {
1765 PRECONDITION(op.size() == 1 && results.size() == 1);
1766
1767 results[0] =
1769 }
1770 else if(bytecode == BC_monitorenter || bytecode == BC_monitorexit)
1771 {
1772 c = convert_monitorenterexit(statement, op, i_it->source_location);
1773 }
1774 else if(bytecode == BC_swap)
1775 {
1776 PRECONDITION(op.size() == 2 && results.size() == 2);
1777 results[1]=op[0];
1778 results[0]=op[1];
1779 }
1780 else if(bytecode == BC_nop)
1781 {
1782 c=code_skipt();
1783 }
1784 else
1785 {
1786 c=codet(statement);
1787 c.operands()=op;
1788 }
1789
1790 c = do_exception_handling(method, working_set, cur_pc, c);
1791
1792 // Finally if this is the beginning of a catch block (already determined
1793 // before the big bytecode switch), insert the exception 'landing pad'
1794 // instruction before the actual instruction:
1795 if(catch_instruction.has_value())
1796 {
1797 if(c.get_statement() != ID_block)
1798 c = code_blockt{{c}};
1799 c.operands().insert(c.operands().begin(), *catch_instruction);
1800 }
1801
1802 if(!i_it->source_location.get_line().empty())
1803 merge_source_location_rec(c, i_it->source_location);
1804
1805 push(results);
1806
1807 instruction.done = true;
1808 for(const auto address : instruction.successors)
1809 {
1810 address_mapt::iterator a_it2=address_map.find(address);
1811 CHECK_RETURN(a_it2 != address_map.end());
1812
1813 // clear the stack if this is an exception handler
1814 for(const auto &exception_row : method.exception_table)
1815 {
1816 if(address==exception_row.handler_pc)
1817 {
1818 stack.clear();
1819 break;
1820 }
1821 }
1822
1823 if(!stack.empty() && a_it2->second.predecessors.size()>1)
1824 {
1825 // copy into temporaries
1826 code_blockt more_code;
1827
1828 // introduce temporaries when successor is seen for the first
1829 // time
1830 if(a_it2->second.stack.empty())
1831 {
1832 for(stackt::iterator s_it=stack.begin();
1833 s_it!=stack.end();
1834 ++s_it)
1835 {
1836 symbol_exprt lhs=tmp_variable("$stack", s_it->type());
1837 code_assignt a(lhs, *s_it);
1838 more_code.add(a);
1839
1840 s_it->swap(lhs);
1841 }
1842 }
1843 else
1844 {
1845 INVARIANT(
1846 a_it2->second.stack.size() == stack.size(),
1847 "Stack sizes should be the same.");
1848 stackt::const_iterator os_it=a_it2->second.stack.begin();
1849 for(auto &expr : stack)
1850 {
1851 INVARIANT(
1852 has_prefix(os_it->get_string(ID_C_base_name), "$stack"),
1853 "invalid base name");
1854 symbol_exprt lhs=to_symbol_expr(*os_it);
1855 code_assignt a(lhs, expr);
1856 more_code.add(a);
1857
1858 expr.swap(lhs);
1859 ++os_it;
1860 }
1861 }
1862
1863 if(results.empty())
1864 {
1865 more_code.add(c);
1866 c.swap(more_code);
1867 }
1868 else
1869 {
1870 if(c.get_statement() != ID_block)
1871 c = code_blockt{{c}};
1872
1873 auto &last_statement=to_code_block(c).find_last_statement();
1874 if(last_statement.get_statement()==ID_goto)
1875 {
1876 // Insert stack twiddling before branch:
1877 if(last_statement.get_statement() != ID_block)
1878 last_statement = code_blockt{{last_statement}};
1879
1880 last_statement.operands().insert(
1881 last_statement.operands().begin(),
1882 more_code.statements().begin(),
1883 more_code.statements().end());
1884 }
1885 else
1886 to_code_block(c).append(more_code);
1887 }
1888 }
1889 a_it2->second.stack=stack;
1890 }
1891 }
1892
1893 code_blockt code;
1894
1895 // Add anonymous locals to the symtab:
1896 for(const auto &var : used_local_names)
1897 {
1898 symbolt new_symbol{var.get_identifier(), var.type(), ID_java};
1899 new_symbol.base_name=var.get(ID_C_base_name);
1900 new_symbol.pretty_name=strip_java_namespace_prefix(var.get_identifier());
1901 new_symbol.is_file_local=true;
1902 new_symbol.is_thread_local=true;
1903 new_symbol.is_lvalue=true;
1904 symbol_table.add(new_symbol);
1905 }
1906
1907 // Try to recover block structure as indicated in the local variable table:
1908
1909 // The block tree node mirrors the block structure of root_block,
1910 // indexing the Java PCs where each subblock starts and ends.
1911 block_tree_nodet root;
1912 code_blockt root_block;
1913
1914 // First create a simple flat list of basic blocks. We'll add lexical nesting
1915 // constructs as variable live-ranges require next.
1916 bool start_new_block=true;
1917 bool has_seen_previous_address=false;
1918 method_offsett previous_address = 0;
1919 for(const auto &address_pair : address_map)
1920 {
1921 const method_offsett address = address_pair.first;
1922 CHECK_RETURN(address_pair.first == address_pair.second.source->address);
1923 const codet &c=address_pair.second.code;
1924
1925 // Start a new lexical block if this is a branch target:
1926 if(!start_new_block)
1927 start_new_block=targets.find(address)!=targets.end();
1928 // Start a new lexical block if this is a control flow join
1929 // (e.g. due to exceptional control flow)
1930 if(!start_new_block)
1931 start_new_block=address_pair.second.predecessors.size()>1;
1932 // Start a new lexical block if we've just entered a block in which
1933 // exceptions are handled. This is usually the start of a try block, but a
1934 // single try can be represented as multiple non-contiguous blocks in the
1935 // exception table.
1936 if(!start_new_block && has_seen_previous_address)
1937 {
1938 for(const auto &exception_row : method.exception_table)
1939 if(exception_row.start_pc==previous_address)
1940 {
1941 start_new_block=true;
1942 break;
1943 }
1944 }
1945
1946 if(start_new_block)
1947 {
1948 root_block.add(
1949 code_labelt{label(std::to_string(address)), code_blockt{}});
1950 root.branch.push_back(block_tree_nodet::get_leaf());
1951 INVARIANT(
1952 (root.branch_addresses.empty() ||
1953 root.branch_addresses.back() < address),
1954 "Block addresses should be unique and increasing");
1955 root.branch_addresses.push_back(address);
1956 }
1957
1958 if(c.get_statement()!=ID_skip)
1959 {
1960 auto &lastlabel = to_code_label(root_block.statements().back());
1961 auto &add_to_block=to_code_block(lastlabel.code());
1962 add_to_block.add(c);
1963 }
1964 start_new_block=address_pair.second.successors.size()>1;
1965
1966 previous_address=address;
1967 has_seen_previous_address=true;
1968 }
1969
1970 // Find out where temporaries are used:
1971 std::map<irep_idt, variablet> temporary_variable_live_ranges;
1972 for(const auto &aentry : address_map)
1974 aentry.first,
1975 aentry.second.code,
1976 temporary_variable_live_ranges);
1977
1978 std::vector<const variablet*> vars_to_process;
1979 for(const auto &vlist : variables)
1980 for(const auto &v : vlist)
1981 vars_to_process.push_back(&v);
1982
1983 for(const auto &v : tmp_vars)
1984 vars_to_process.push_back(
1985 &temporary_variable_live_ranges.at(v.get_identifier()));
1986
1987 for(const auto &v : used_local_names)
1988 vars_to_process.push_back(
1989 &temporary_variable_live_ranges.at(v.get_identifier()));
1990
1991 for(const auto vp : vars_to_process)
1992 {
1993 const auto &v=*vp;
1994 if(v.is_parameter)
1995 continue;
1996 // Merge lexical scopes as far as possible to allow us to
1997 // declare these variable scopes faithfully.
1998 // Don't insert yet, as for the time being the blocks' only
1999 // operands must be other blocks.
2000 // The declarations will be inserted in the next pass instead.
2002 root,
2003 root_block,
2004 v.start_pc,
2005 v.start_pc + v.length,
2006 std::numeric_limits<method_offsett>::max(),
2007 address_map);
2008 }
2009 for(const auto vp : vars_to_process)
2010 {
2011 const auto &v=*vp;
2012 if(v.is_parameter)
2013 continue;
2014 // Skip anonymous variables:
2015 if(v.symbol_expr.get_identifier().empty())
2016 continue;
2017 auto &block = get_block_for_pcrange(
2018 root,
2019 root_block,
2020 v.start_pc,
2021 v.start_pc + v.length,
2022 std::numeric_limits<method_offsett>::max());
2023 code_declt d(v.symbol_expr);
2024 block.statements().insert(block.statements().begin(), d);
2025 }
2026
2027 for(auto &block : root_block.statements())
2028 code.add(block);
2029
2030 return code;
2031}
2032
2034 const irep_idt &statement,
2035 const exprt::operandst &op)
2036{
2037 // these are skips
2038 codet c = code_skipt();
2039
2040 // pop2 removes two single-word items from the stack (e.g. two
2041 // integers, or an integer and an object reference) or one
2042 // two-word item (i.e. a double or a long).
2043 // http://cs.au.dk/~mis/dOvs/jvmspec/ref-pop2.html
2044 if(statement == "pop2" && get_bytecode_type_width(op[0].type()) == 32)
2045 pop(1);
2046 return c;
2047}
2048
2050 const exprt::operandst &op,
2052 const source_locationt &location)
2053{
2054 // we turn into switch-case
2055 code_blockt code_block;
2056 code_block.add_source_location() = location;
2057
2058 bool is_label = true;
2059 for(auto a_it = args.begin(); a_it != args.end();
2060 a_it++, is_label = !is_label)
2061 {
2062 if(is_label)
2063 {
2064 const mp_integer number =
2066 // The switch case does not contain any code, it just branches via a GOTO
2067 // to the jump target of the tableswitch/lookupswitch case at
2068 // hand. Therefore we consider this code to belong to the source bytecode
2069 // instruction and not the target instruction.
2070 const method_offsett label_number =
2072 code_gotot code(label(std::to_string(label_number)));
2073 code.add_source_location() = location;
2074
2075 if(a_it == args.begin())
2076 {
2077 code_switch_caset code_case(nil_exprt(), std::move(code));
2078 code_case.set_default();
2079
2080 code_block.add(std::move(code_case), location);
2081 }
2082 else
2083 {
2084 exprt case_op =
2085 typecast_exprt::conditional_cast(*std::prev(a_it), op[0].type());
2086 case_op.add_source_location() = location;
2087
2088 code_switch_caset code_case(std::move(case_op), std::move(code));
2089 code_block.add(std::move(code_case), location);
2090 }
2091 }
2092 }
2093
2094 code_switcht code_switch(op[0], std::move(code_block));
2095 code_switch.add_source_location() = location;
2096 return code_switch;
2097}
2098
2100 const irep_idt &statement,
2101 const exprt::operandst &op,
2102 const source_locationt &source_location)
2103{
2104 const irep_idt descriptor = (statement == "monitorenter") ?
2105 "java::java.lang.Object.monitorenter:(Ljava/lang/Object;)V" :
2106 "java::java.lang.Object.monitorexit:(Ljava/lang/Object;)V";
2107
2108 if(!threading_support || !symbol_table.has_symbol(descriptor))
2109 return code_skipt();
2110
2111 // becomes a function call
2112 java_method_typet type(
2114 java_void_type());
2115 code_function_callt call(symbol_exprt(descriptor, type), {op[0]});
2116 call.add_source_location() = source_location;
2118 needed_lazy_methods->add_needed_method(descriptor);
2119 return std::move(call);
2120}
2121
2123 exprt::operandst &op,
2124 exprt::operandst &results)
2125{
2126 if(get_bytecode_type_width(stack.back().type()) == 32)
2127 op = pop(2);
2128 else
2129 op = pop(1);
2130
2131 results.insert(results.end(), op.begin(), op.end());
2132 results.insert(results.end(), op.begin(), op.end());
2133}
2134
2136 exprt::operandst &op,
2137 exprt::operandst &results)
2138{
2139 if(get_bytecode_type_width(stack.back().type()) == 32)
2140 op = pop(3);
2141 else
2142 op = pop(2);
2143
2144 results.insert(results.end(), op.begin() + 1, op.end());
2145 results.insert(results.end(), op.begin(), op.end());
2146}
2147
2149 exprt::operandst &op,
2150 exprt::operandst &results)
2151{
2152 if(get_bytecode_type_width(stack.back().type()) == 32)
2153 op = pop(2);
2154 else
2155 op = pop(1);
2156
2157 exprt::operandst op2;
2158
2159 if(get_bytecode_type_width(stack.back().type()) == 32)
2160 op2 = pop(2);
2161 else
2162 op2 = pop(1);
2163
2164 results.insert(results.end(), op.begin(), op.end());
2165 results.insert(results.end(), op2.begin(), op2.end());
2166 results.insert(results.end(), op.begin(), op.end());
2167}
2168
2170 const irep_idt &statement,
2171 const constant_exprt &arg0,
2172 exprt::operandst &results) const
2173{
2174 const char type_char = statement[0];
2175 const bool is_double('d' == type_char);
2176 const bool is_float('f' == type_char);
2177
2178 if(is_double || is_float)
2179 {
2180 const ieee_float_spect spec(
2183
2184 ieee_floatt value(spec);
2185 if(arg0.type().id() != ID_floatbv)
2186 {
2187 const mp_integer number = numeric_cast_v<mp_integer>(arg0);
2188 value.from_integer(number);
2189 }
2190 else
2191 value.from_expr(arg0);
2192
2193 results[0] = value.to_expr();
2194 }
2195 else
2196 {
2197 const mp_integer value = numeric_cast_v<mp_integer>(arg0);
2198 const typet type = java_type_from_char(statement[0]);
2199 results[0] = from_integer(value, type);
2200 }
2201 return results;
2202}
2203
2205 const java_method_typet::parameterst &parameters,
2207{
2208 // do some type adjustment for the arguments,
2209 // as Java promotes arguments
2210 // Also cast pointers since intermediate locals
2211 // can be void*.
2212 INVARIANT(
2213 parameters.size() == arguments.size(),
2214 "for each parameter there must be exactly one argument");
2215 for(std::size_t i = 0; i < parameters.size(); i++)
2216 {
2217 const typet &type = parameters[i].type();
2218 if(
2219 type == java_boolean_type() || type == java_char_type() ||
2220 type == java_byte_type() || type == java_short_type() ||
2221 type.id() == ID_pointer)
2222 {
2223 arguments[i] = typecast_exprt::conditional_cast(arguments[i], type);
2224 }
2225 }
2226}
2227
2229 source_locationt location,
2230 const irep_idt &statement,
2231 class_method_descriptor_exprt &class_method_descriptor,
2232 codet &c,
2233 exprt::operandst &results)
2234{
2235 const bool use_this(statement != "invokestatic");
2236 const bool is_virtual(
2237 statement == "invokevirtual" || statement == "invokeinterface");
2238
2239 const irep_idt &invoked_method_id = class_method_descriptor.get_identifier();
2240 INVARIANT(
2241 !invoked_method_id.empty(),
2242 "invoke statement arg0 must have an identifier");
2243
2244 auto method_symbol = symbol_table.symbols.find(invoked_method_id);
2245
2246 // Use the most precise type available: the actual symbol has generic info,
2247 // whereas the type given by the invoke instruction doesn't and is therefore
2248 // less accurate.
2249 if(method_symbol != symbol_table.symbols.end())
2250 {
2251 // Note the number of parameters might change here due to constructors using
2252 // invokespecial will have zero arguments (the `this` is added below)
2253 // but the symbol for the <init> will have the this parameter.
2254 INVARIANT(
2255 to_java_method_type(class_method_descriptor.type()).return_type().id() ==
2256 to_code_type(method_symbol->second.type).return_type().id(),
2257 "Function return type must not change in kind");
2258 class_method_descriptor.type() = method_symbol->second.type;
2259 }
2260
2261 // Note arg0 and arg0.type() are subject to many side-effects in this method,
2262 // then finally used to populate the call instruction.
2263 java_method_typet &method_type =
2264 to_java_method_type(class_method_descriptor.type());
2265
2266 java_method_typet::parameterst &parameters(method_type.parameters());
2267
2268 if(use_this)
2269 {
2270 const irep_idt class_id = class_method_descriptor.class_id();
2271
2272 if(parameters.empty() || !parameters[0].get_this())
2273 {
2274 typet thistype = struct_tag_typet(class_id);
2275 reference_typet object_ref_type = java_reference_type(thistype);
2276 java_method_typet::parametert this_p(object_ref_type);
2277 this_p.set_this();
2278 this_p.set_base_name(ID_this);
2279 parameters.insert(parameters.begin(), this_p);
2280 }
2281
2282 // Note invokespecial is used for super-method calls as well as
2283 // constructors.
2284 if(statement == "invokespecial")
2285 {
2286 if(is_constructor(invoked_method_id))
2287 {
2289 needed_lazy_methods->add_needed_class(class_id);
2290 method_type.set_is_constructor();
2291 }
2292 else
2293 method_type.set(ID_java_super_method_call, true);
2294 }
2295 }
2296
2297 location.set_function(method_id);
2298
2299 code_function_callt::argumentst arguments = pop(parameters.size());
2300
2301 // double-check a bit
2302 INVARIANT(
2303 !use_this || arguments.front().type().id() == ID_pointer,
2304 "first argument must be a pointer");
2305
2306 adjust_invoke_argument_types(parameters, arguments);
2307
2308 // do some type adjustment for return values
2309 exprt lhs = nil_exprt();
2310 const typet &return_type = method_type.return_type();
2311
2312 if(return_type.id() != ID_empty)
2313 {
2314 // return types are promoted in Java
2315 lhs = tmp_variable("return", return_type);
2316 exprt promoted = java_bytecode_promotion(lhs);
2317 results.resize(1);
2318 results[0] = promoted;
2319 }
2320
2321 // If we don't have a definition for the called symbol, and we won't
2322 // inherit a definition from a super-class, we create a new symbol and
2323 // insert it in the symbol table. The name and type of the method are
2324 // derived from the information we have in the call.
2325 // We fix the access attribute to ID_private, because of the following
2326 // reasons:
2327 // - We don't know the original access attribute and since the .class file is
2328 // unavailable, we have no way to know.
2329 // - The translated method could be an inherited protected method, hence
2330 // accessible from the original caller, but not from the generated test.
2331 // Therefore we must assume that the method is not accessible.
2332 // We set opaque methods as final to avoid assuming they can be overridden.
2333 if(
2334 method_symbol == symbol_table.symbols.end() &&
2335 !(is_virtual && is_method_inherited(
2336 class_method_descriptor.class_id(),
2337 class_method_descriptor.mangled_method_name())))
2338 {
2340 invoked_method_id,
2341 class_method_descriptor.base_method_name(),
2342 id2string(class_method_descriptor.class_id()).substr(6) + "." +
2343 id2string(class_method_descriptor.base_method_name()) + "()",
2344 method_type,
2345 class_method_descriptor.class_id(),
2348 }
2349
2350 exprt function;
2351 if(is_virtual)
2352 {
2353 // dynamic binding
2354 PRECONDITION(use_this);
2355 PRECONDITION(!arguments.empty());
2356 function = class_method_descriptor;
2357 // Populate needed methods later,
2358 // once we know what object types can exist.
2359 }
2360 else
2361 {
2362 // static binding
2363 function = symbol_exprt(invoked_method_id, method_type);
2365 {
2366 needed_lazy_methods->add_needed_method(invoked_method_id);
2367 // Calling a static method causes static initialization:
2368 needed_lazy_methods->add_needed_class(class_method_descriptor.class_id());
2369 }
2370 }
2371
2373 std::move(lhs), std::move(function), std::move(arguments));
2374 call.add_source_location() = location;
2375 call.function().add_source_location() = location;
2376
2377 // Replacing call if it is a function of the Character library,
2378 // returning the same call otherwise
2380
2381 if(!use_this)
2382 {
2383 codet clinit_call = get_clinit_call(class_method_descriptor.class_id());
2384 if(clinit_call.get_statement() != ID_skip)
2385 c = code_blockt({clinit_call, c});
2386 }
2387}
2388
2390 source_locationt location,
2391 codet &c)
2392{
2393 exprt operand = pop(1)[0];
2394
2395 // we may need to adjust the type of the argument
2396 operand = typecast_exprt::conditional_cast(operand, bool_typet());
2397
2398 c = code_assumet(operand);
2399 location.set_function(method_id);
2400 c.add_source_location() = location;
2401 return c;
2402}
2403
2405 const exprt &arg0,
2406 const exprt::operandst &op,
2407 codet &c,
2408 exprt::operandst &results) const
2409{
2410 java_instanceof_exprt check(op[0], to_struct_tag_type(arg0.type()));
2411 code_assertt assert_class(check);
2412 assert_class.add_source_location().set_comment("Dynamic cast check");
2413 assert_class.add_source_location().set_property_class("bad-dynamic-cast");
2414 // we add this assert such that we can recognise it
2415 // during the instrumentation phase
2416 c = std::move(assert_class);
2417 results[0] = op[0];
2418}
2419
2421 const source_locationt &location,
2422 const exprt::operandst &op,
2423 codet &c,
2424 exprt::operandst &results) const
2425{
2426 if(
2429 to_reference_type(op[0].type())) == "java::java.lang.AssertionError") &&
2431 {
2432 // we translate athrow into
2433 // ASSERT false;
2434 // ASSUME false:
2435 code_assertt assert_code(false_exprt{});
2436 source_locationt assert_location = location; // copy
2437 assert_location.set_comment("assertion at " + location.as_string());
2438 assert_location.set("user-provided", true);
2439 assert_location.set_property_class(ID_assertion);
2440 assert_code.add_source_location() = assert_location;
2441
2442 code_assumet assume_code(false_exprt{});
2443 source_locationt assume_location = location; // copy
2444 assume_location.set("user-provided", true);
2445 assume_code.add_source_location() = assume_location;
2446
2447 c = code_blockt({assert_code, assume_code});
2448 }
2449 else
2450 {
2451 side_effect_expr_throwt throw_expr(irept(), typet(), location);
2452 throw_expr.copy_to_operands(op[0]);
2453 c = code_expressiont(throw_expr);
2454 }
2455 results[0] = op[0];
2456}
2457
2460 const std::set<method_offsett> &working_set,
2461 method_offsett cur_pc,
2462 codet &code)
2463{
2464 // For each exception handler range that starts here add a CATCH-PUSH marker
2465 // Each CATCH-PUSH records a list of all the exception id and handler label
2466 // pairs handled for its exact block
2467
2468 // Gather all try-catch blocks that have cur_pc as the starting pc
2469 typedef std::vector<std::reference_wrapper<
2471 std::map<std::size_t, exceptionst> exceptions_by_end;
2473 : method.exception_table)
2474 {
2475 if(exception.start_pc == cur_pc)
2476 exceptions_by_end[exception.end_pc].push_back(exception);
2477 }
2478 for(const auto &exceptions : exceptions_by_end)
2479 {
2480 // For each block with a distinct end position create one CATCH-PUSH
2481 code_push_catcht catch_push;
2482 // Fill in its exception_list
2483 code_push_catcht::exception_listt &exception_list =
2484 catch_push.exception_list();
2486 : exceptions.second)
2487 {
2488 exception_list.emplace_back(
2489 exception.catch_type.get_identifier(),
2490 // Record the exception handler in the CATCH-PUSH instruction by
2491 // generating a label corresponding to the handler's pc
2492 label(std::to_string(exception.handler_pc)));
2493 }
2494 // Prepend the CATCH-PUSH instruction
2495 code = code_blockt({ std::move(catch_push), code });
2496 }
2497
2498 // Next add the CATCH-POP instructions
2499 // exception_row.end_pc is exclusive so append a CATCH-POP instruction if
2500 // this is the instruction before it.
2501 // To do this, attempt to find all exception handlers that end at the
2502 // earliest known instruction after this one.
2503
2504 // Dangerously, we assume that the next opcode in the bytecode after the end
2505 // of the exception handler block (whose address matches the exclusive end
2506 // position of the block) will be a successor of some code investigated
2507 // before the instruction at the end of that handler and therefore in the
2508 // working set.
2509 // As an example of where this may fail, for non-obfuscated bytecode
2510 // generated by most compilers the next opcode after the block ending at the
2511 // end of the try block is the lexically next bit of code after the try
2512 // block, i.e. the catch block. When there aren't any throwing statements in
2513 // the try block this block will not be the successor of any instruction.
2514
2515 auto next_opcode_it = std::find_if(
2516 working_set.begin(),
2517 working_set.end(),
2518 [cur_pc](method_offsett offset) { return offset > cur_pc; });
2519 if(next_opcode_it != working_set.end())
2520 {
2521 // Count the distinct start positions of handlers that end at this location
2522 std::set<std::size_t> start_positions; // Use a set to deduplicate
2523 for(const auto &exception_row : method.exception_table)
2524 {
2525 // Check if the next instruction found is the (exclusive) end of a block
2526 if(*next_opcode_it == exception_row.end_pc)
2527 start_positions.insert(exception_row.start_pc);
2528 }
2529 for(std::size_t handler = 0; handler < start_positions.size(); ++handler)
2530 {
2531 // Append a CATCH-POP instruction before the end of the block
2532 code = code_blockt({ code, code_pop_catcht() });
2533 }
2534 }
2535
2536 return code;
2537}
2538
2540 const source_locationt &location,
2541 const exprt &arg0,
2542 const exprt::operandst &op,
2543 exprt::operandst &results)
2544{
2545 const reference_typet ref_type = java_reference_type(arg0.type());
2546 side_effect_exprt java_new_array(ID_java_new_array, ref_type, location);
2547 java_new_array.operands() = op;
2548
2549 code_blockt create;
2550
2551 if(max_array_length != 0)
2552 {
2554 binary_relation_exprt le_max_size(op[0], ID_le, size_limit);
2555 code_assumet assume_le_max_size(le_max_size);
2556 create.add(assume_le_max_size);
2557 }
2558
2559 const exprt tmp = tmp_variable("newarray", ref_type);
2560 create.add(code_assignt(tmp, java_new_array));
2561 results[0] = tmp;
2562
2563 return create;
2564}
2565
2567 const source_locationt &location,
2568 const irep_idt &statement,
2569 const exprt &arg0,
2570 const exprt::operandst &op,
2571 exprt::operandst &results)
2572{
2573 java_reference_typet ref_type = [&]() {
2574 if(statement == "newarray")
2575 {
2576 irep_idt id = arg0.type().id();
2577
2578 char element_type;
2579 if(id == ID_bool)
2580 element_type = 'z';
2581 else if(id == ID_char)
2582 element_type = 'c';
2583 else if(id == ID_float)
2584 element_type = 'f';
2585 else if(id == ID_double)
2586 element_type = 'd';
2587 else if(id == ID_byte)
2588 element_type = 'b';
2589 else if(id == ID_short)
2590 element_type = 's';
2591 else if(id == ID_int)
2592 element_type = 'i';
2593 else if(id == ID_long)
2594 element_type = 'j';
2595 else
2596 element_type = '?';
2597 return java_array_type(element_type);
2598 }
2599 else
2600 {
2602 }
2603 }();
2604
2605 side_effect_exprt java_new_array(ID_java_new_array, ref_type, location);
2606 java_new_array.copy_to_operands(op[0]);
2607
2608 code_blockt block;
2609
2610 if(max_array_length != 0)
2611 {
2613 binary_relation_exprt le_max_size(op[0], ID_le, size_limit);
2614 code_assumet assume_le_max_size(le_max_size);
2615 block.add(std::move(assume_le_max_size));
2616 }
2617 const exprt tmp = tmp_variable("newarray", ref_type);
2618 block.add(code_assignt(tmp, java_new_array));
2619 results[0] = tmp;
2620
2621 return block;
2622}
2623
2625 const source_locationt &location,
2626 const exprt &arg0,
2627 codet &c,
2628 exprt::operandst &results)
2629{
2630 const reference_typet ref_type = java_reference_type(arg0.type());
2631 side_effect_exprt java_new_expr(ID_java_new, ref_type, location);
2632
2633 if(!location.get_line().empty())
2634 java_new_expr.add_source_location() = location;
2635
2636 const exprt tmp = tmp_variable("new", ref_type);
2637 c = code_assignt(tmp, java_new_expr);
2638 c.add_source_location() = location;
2639 codet clinit_call =
2641 if(clinit_call.get_statement() != ID_skip)
2642 {
2643 c = code_blockt({clinit_call, c});
2644 }
2645 results[0] = tmp;
2646}
2647
2649 const source_locationt &location,
2650 const exprt &arg0,
2651 const exprt::operandst &op,
2652 const symbol_exprt &symbol_expr)
2653{
2654 if(needed_lazy_methods && arg0.type().id() == ID_struct_tag)
2655 {
2656 needed_lazy_methods->add_needed_class(
2658 }
2659
2660 code_blockt block;
2661 block.add_source_location() = location;
2662
2663 // Note this initializer call deliberately inits the class used to make
2664 // the reference, which may be a child of the class that actually defines
2665 // the field.
2666 codet clinit_call = get_clinit_call(arg0.get_string(ID_class));
2667 if(clinit_call.get_statement() != ID_skip)
2668 block.add(clinit_call);
2669
2671 "stack_static_field",
2672 block,
2674 symbol_expr.get_identifier());
2675 block.add(code_assignt(symbol_expr, op[0]));
2676 return block;
2677}
2678
2680 const fieldref_exprt &arg0,
2681 const exprt::operandst &op)
2682{
2683 code_blockt block;
2685 "stack_field", block, bytecode_write_typet::FIELD, arg0.component_name());
2686 block.add(code_assignt(to_member(op[0], arg0, ns), op[1]));
2687 return block;
2688}
2689
2691 const source_locationt &source_location,
2692 const exprt &arg0,
2693 const symbol_exprt &symbol_expr,
2694 const bool is_assertions_disabled_field,
2695 codet &c,
2696 exprt::operandst &results)
2697{
2699 {
2700 if(arg0.type().id() == ID_struct_tag)
2701 {
2702 needed_lazy_methods->add_needed_class(
2704 }
2705 else if(arg0.type().id() == ID_pointer)
2706 {
2707 const auto &pointer_type = to_pointer_type(arg0.type());
2708 if(pointer_type.base_type().id() == ID_struct_tag)
2709 {
2710 needed_lazy_methods->add_needed_class(
2712 }
2713 }
2714 else if(is_assertions_disabled_field)
2715 {
2716 needed_lazy_methods->add_needed_class(arg0.get_string(ID_class));
2717 }
2718 }
2719 symbol_exprt symbol_with_location = symbol_expr;
2720 symbol_with_location.add_source_location() = source_location;
2721 results[0] = java_bytecode_promotion(symbol_with_location);
2722
2723 // Note this initializer call deliberately inits the class used to make
2724 // the reference, which may be a child of the class that actually defines
2725 // the field.
2726 codet clinit_call = get_clinit_call(arg0.get_string(ID_class));
2727 if(clinit_call.get_statement() != ID_skip)
2728 c = clinit_call;
2729 else if(is_assertions_disabled_field)
2730 {
2731 // set $assertionDisabled to false
2732 c = code_assignt(symbol_expr, false_exprt());
2733 }
2734}
2735
2737 const irep_idt &statement,
2738 const exprt::operandst &op,
2739 exprt::operandst &results) const
2740{
2741 const int nan_value(statement[4] == 'l' ? -1 : 1);
2742 const typet result_type(java_int_type());
2743 const exprt nan_result(from_integer(nan_value, result_type));
2744
2745 // (value1 == NaN || value2 == NaN) ?
2746 // nan_value : value1 < value2 ? -1 : value2 < value1 1 ? 1 : 0;
2747 // (value1 == NaN || value2 == NaN) ?
2748 // nan_value : value1 == value2 ? 0 : value1 < value2 -1 ? 1 : 0;
2749
2750 isnan_exprt nan_op0(op[0]);
2751 isnan_exprt nan_op1(op[1]);
2752 exprt one = from_integer(1, result_type);
2753 exprt minus_one = from_integer(-1, result_type);
2754 results[0] = if_exprt(
2755 or_exprt(nan_op0, nan_op1),
2756 nan_result,
2757 if_exprt(
2758 ieee_float_equal_exprt(op[0], op[1]),
2759 from_integer(0, result_type),
2760 if_exprt(binary_relation_exprt(op[0], ID_lt, op[1]), minus_one, one)));
2761 return results;
2762}
2763
2765 const exprt::operandst &op,
2766 exprt::operandst &results) const
2767{ // The integer result on the stack is:
2768 // 0 if op[0] equals op[1]
2769 // -1 if op[0] is less than op[1]
2770 // 1 if op[0] is greater than op[1]
2771
2772 const typet t = java_int_type();
2773 exprt one = from_integer(1, t);
2774 exprt minus_one = from_integer(-1, t);
2775
2776 if_exprt greater =
2777 if_exprt(binary_relation_exprt(op[0], ID_gt, op[1]), one, minus_one);
2778
2779 results[0] = if_exprt(
2780 binary_relation_exprt(op[0], ID_equal, op[1]), from_integer(0, t), greater);
2781 return results;
2782}
2783
2785 const irep_idt &statement,
2786 const exprt::operandst &op,
2787 exprt::operandst &results) const
2788{
2789 const typet type = java_type_from_char(statement[0]);
2790
2791 const std::size_t width = get_bytecode_type_width(type);
2792
2793 // According to JLS 15.19 Shift Operators
2794 // a mask 0b11111 is applied for int and 0b111111 for long.
2795 exprt mask = from_integer(width - 1, op[1].type());
2796
2797 results[0] = shl_exprt(op[0], bitand_exprt(op[1], mask));
2798 return results;
2799}
2800
2802 const irep_idt &statement,
2803 const exprt::operandst &op,
2804 exprt::operandst &results) const
2805{
2806 const typet type = java_type_from_char(statement[0]);
2807
2808 const std::size_t width = get_bytecode_type_width(type);
2809 typet target = unsignedbv_typet(width);
2810
2811 exprt lhs = typecast_exprt::conditional_cast(op[0], target);
2812 exprt rhs = typecast_exprt::conditional_cast(op[1], target);
2813
2814 results[0] =
2815 typecast_exprt::conditional_cast(lshr_exprt(lhs, rhs), op[0].type());
2816
2817 return results;
2818}
2819
2821 const exprt &arg0,
2822 const exprt &arg1,
2823 const source_locationt &location,
2824 const method_offsett address)
2825{
2826 code_blockt block;
2827 block.add_source_location() = location;
2828 // search variable on stack
2829 const exprt &locvar = variable(arg0, 'i', address);
2831 "stack_iinc",
2832 block,
2834 to_symbol_expr(locvar).get_identifier());
2835
2836 const exprt arg1_int_type =
2838 code_assignt code_assign(
2839 variable(arg0, 'i', address),
2840 plus_exprt(
2842 variable(arg0, 'i', address), java_int_type()),
2843 arg1_int_type));
2844 block.add(std::move(code_assign));
2845
2846 return block;
2847}
2848
2851 const exprt::operandst &op,
2852 const mp_integer &number,
2853 const source_locationt &location) const
2854{
2856 const exprt rhs(null_pointer_exprt(to_pointer_type(lhs.type())));
2857
2858 const method_offsett label_number = numeric_cast_v<method_offsett>(number);
2859
2860 code_ifthenelset code_branch(
2861 binary_relation_exprt(lhs, ID_equal, rhs),
2862 code_gotot(label(std::to_string(label_number))));
2863
2864 code_branch.then_case().add_source_location() =
2865 address_map.at(label_number).source->source_location;
2866 code_branch.add_source_location() = location;
2867
2868 return code_branch;
2869}
2870
2873 const exprt::operandst &op,
2874 const mp_integer &number,
2875 const source_locationt &location) const
2876{
2878 const exprt rhs(null_pointer_exprt(to_pointer_type(lhs.type())));
2879
2880 const method_offsett label_number = numeric_cast_v<method_offsett>(number);
2881
2882 code_ifthenelset code_branch(
2883 binary_relation_exprt(lhs, ID_notequal, rhs),
2884 code_gotot(label(std::to_string(label_number))));
2885
2886 code_branch.then_case().add_source_location() =
2887 address_map.at(label_number).source->source_location;
2888 code_branch.add_source_location() = location;
2889
2890 return code_branch;
2891}
2892
2895 const exprt::operandst &op,
2896 const irep_idt &id,
2897 const mp_integer &number,
2898 const source_locationt &location) const
2899{
2900 const method_offsett label_number = numeric_cast_v<method_offsett>(number);
2901
2902 code_ifthenelset code_branch(
2903 binary_relation_exprt(op[0], id, from_integer(0, op[0].type())),
2904 code_gotot(label(std::to_string(label_number))));
2905
2906 code_branch.cond().add_source_location() = location;
2908 code_branch.then_case().add_source_location() =
2909 address_map.at(label_number).source->source_location;
2911 code_branch.add_source_location() = location;
2913
2914 return code_branch;
2915}
2916
2919 const u1 bytecode,
2920 const exprt::operandst &op,
2921 const mp_integer &number,
2922 const source_locationt &location) const
2923{
2924 const irep_idt cmp_op = get_if_cmp_operator(bytecode);
2925 binary_relation_exprt condition(
2926 op[0], cmp_op, typecast_exprt::conditional_cast(op[1], op[0].type()));
2927 condition.add_source_location() = location;
2928
2929 const method_offsett label_number = numeric_cast_v<method_offsett>(number);
2930
2931 code_ifthenelset code_branch(
2932 std::move(condition), code_gotot(label(std::to_string(label_number))));
2933
2934 code_branch.then_case().add_source_location() =
2935 address_map.at(label_number).source->source_location;
2936 code_branch.add_source_location() = location;
2937
2938 return code_branch;
2939}
2940
2942 const std::vector<method_offsett> &jsr_ret_targets,
2943 const exprt &arg0,
2944 const source_locationt &location,
2945 const method_offsett address)
2946{
2947 code_blockt c;
2948 auto retvar = variable(arg0, 'a', address);
2949 for(size_t idx = 0, idxlim = jsr_ret_targets.size(); idx != idxlim; ++idx)
2950 {
2951 irep_idt number = std::to_string(jsr_ret_targets[idx]);
2952 code_gotot g(label(number));
2953 g.add_source_location() = location;
2954 if(idx == idxlim - 1)
2955 c.add(g);
2956 else
2957 {
2958 auto address_ptr =
2959 from_integer(jsr_ret_targets[idx], unsignedbv_typet(64));
2960 address_ptr.type() = pointer_type(java_void_type());
2961
2962 code_ifthenelset branch(equal_exprt(retvar, address_ptr), std::move(g));
2963
2964 branch.cond().add_source_location() = location;
2965 branch.add_source_location() = location;
2966
2967 c.add(std::move(branch));
2968 }
2969 }
2970 return c;
2971}
2972
2978static exprt conditional_array_cast(const exprt &expr, char type_char)
2979{
2980 const auto ref_type =
2982 const bool typecast_not_needed =
2983 ref_type && ((type_char == 'b' && ref_type->subtype().get_identifier() ==
2984 "java::array[boolean]") ||
2985 *ref_type == java_array_type(type_char));
2986 return typecast_not_needed ? expr
2987 : typecast_exprt(expr, java_array_type(type_char));
2988}
2989
2991 const irep_idt &statement,
2992 const exprt::operandst &op)
2993{
2994 PRECONDITION(op.size() == 2);
2995 const char type_char = statement[0];
2996 const exprt op_with_right_type = conditional_array_cast(op[0], type_char);
2997 dereference_exprt deref{op_with_right_type};
2998 deref.set(ID_java_member_access, true);
2999
3001 INVARIANT(java_array_type, "Java array type should be a struct_tag_typet");
3002 member_exprt data_ptr{
3004 plus_exprt data_plus_offset{std::move(data_ptr), op[1]};
3005 // tag it so it's easy to identify during instrumentation
3006 data_plus_offset.set(ID_java_array_access, true);
3007 return java_bytecode_promotion(dereference_exprt{data_plus_offset});
3008}
3009
3011 const exprt &index,
3012 char type_char,
3013 size_t address)
3014{
3015 const exprt var = variable(index, type_char, address);
3016 if(type_char == 'i')
3017 {
3018 INVARIANT(
3020 type_try_dynamic_cast<bitvector_typet>(var.type())->get_width() <= 32,
3021 "iload can be used for boolean, byte, short, int and char");
3023 }
3024 INVARIANT(
3025 (type_char == 'a' && can_cast_type<reference_typet>(var.type())) ||
3026 var.type() == java_type_from_char(type_char),
3027 "Variable type must match [adflv]load return type");
3028 return var;
3029}
3030
3032 const irep_idt &statement,
3033 const exprt &arg0,
3034 const exprt::operandst &op,
3035 const method_offsett address,
3036 const source_locationt &location)
3037{
3038 const exprt var = variable(arg0, statement[0], address);
3039 const irep_idt &var_name = to_symbol_expr(var).get_identifier();
3040
3041 code_blockt block;
3042 block.add_source_location() = location;
3043
3045 "stack_store",
3046 block,
3048 var_name);
3049
3050 block.add(
3052 location);
3053 return block;
3054}
3055
3057 const irep_idt &statement,
3058 const exprt::operandst &op,
3059 const source_locationt &location)
3060{
3061 PRECONDITION(op.size() == 3);
3062 const char type_char = statement[0];
3063 const exprt op_with_right_type = conditional_array_cast(op[0], type_char);
3064 dereference_exprt deref{op_with_right_type};
3065 deref.set(ID_java_member_access, true);
3066
3068 INVARIANT(java_array_type, "Java array type should be a struct_tag_typet");
3069 member_exprt data_ptr{
3071 plus_exprt data_plus_offset{std::move(data_ptr), op[1]};
3072 // tag it so it's easy to identify during instrumentation
3073 data_plus_offset.set(ID_java_array_access, true);
3074
3075 code_blockt block;
3076 block.add_source_location() = location;
3077
3079 "stack_astore", block, bytecode_write_typet::ARRAY_REF, "");
3080
3081 code_assignt array_put{dereference_exprt{data_plus_offset}, op[2]};
3082 block.add(std::move(array_put), location);
3083 return block;
3084}
3085
3087 const source_locationt &location,
3088 std::size_t instruction_address,
3089 const exprt &arg0,
3090 codet &result_code)
3091{
3092 const java_method_typet &method_type = to_java_method_type(arg0.type());
3093 const java_method_typet::parameterst &parameters(method_type.parameters());
3094 const typet &return_type = method_type.return_type();
3095
3096 // Note these must be popped regardless of whether we understand the lambda
3097 // method or not
3098 code_function_callt::argumentst arguments = pop(parameters.size());
3099
3100 irep_idt synthetic_class_name =
3101 lambda_synthetic_class_name(method_id, instruction_address);
3102
3103 if(!symbol_table.has_symbol(synthetic_class_name))
3104 {
3105 // We failed to parse the invokedynamic handle as a Java 8+ lambda;
3106 // give up and return null.
3107 const auto value = zero_initializer(return_type, location, ns);
3108 if(!value.has_value())
3109 {
3110 log.error().source_location = location;
3111 log.error() << "failed to zero-initialize return type" << messaget::eom;
3112 throw 0;
3113 }
3114 return value;
3115 }
3116
3117 // Construct an instance of the synthetic class created for this invokedynamic
3118 // site:
3119
3120 irep_idt constructor_name = id2string(synthetic_class_name) + ".<init>";
3121
3122 const symbolt &constructor_symbol = ns.lookup(constructor_name);
3123
3124 code_blockt result;
3125
3126 // SyntheticType lambda_new = new SyntheticType;
3127 const reference_typet ref_type =
3128 java_reference_type(struct_tag_typet(synthetic_class_name));
3129 side_effect_exprt java_new_expr(ID_java_new, ref_type, location);
3130 const exprt new_instance = tmp_variable("lambda_new", ref_type);
3131 result.add(code_assignt(new_instance, java_new_expr, location));
3132
3133 // lambda_new.<init>(capture_1, capture_2, ...);
3134 // Add the implicit 'this' parameter:
3135 arguments.insert(arguments.begin(), new_instance);
3138
3139 code_function_callt constructor_call(
3140 constructor_symbol.symbol_expr(), arguments);
3141 constructor_call.add_source_location() = location;
3142 result.add(constructor_call);
3144 {
3145 needed_lazy_methods->add_needed_method(constructor_symbol.name);
3146 needed_lazy_methods->add_needed_class(synthetic_class_name);
3147 }
3148
3149 result_code = std::move(result);
3150
3151 if(return_type.id() == ID_empty)
3152 return {};
3153 else
3154 return new_instance;
3155}
3156
3159 const std::vector<method_offsett> &jsr_ret_targets,
3160 const std::vector<
3161 std::vector<java_bytecode_parse_treet::instructiont>::const_iterator>
3162 &ret_instructions) const
3163{ // Draw edges from every `ret` to every `jsr` successor. Could do better with
3164 // flow analysis to distinguish multiple subroutines within the same function.
3165 for(const auto &retinst : ret_instructions)
3166 {
3167 auto &a_entry = address_map.at(retinst->address);
3168 a_entry.successors.insert(
3169 a_entry.successors.end(), jsr_ret_targets.begin(), jsr_ret_targets.end());
3170 }
3171}
3172
3173std::vector<java_bytecode_convert_methodt::method_offsett>
3175 const method_offsett address,
3177 const
3178{
3179 std::vector<method_offsett> result;
3180 for(const auto &exception_row : exception_table)
3181 {
3182 if(address >= exception_row.start_pc && address < exception_row.end_pc)
3183 result.push_back(exception_row.handler_pc);
3184 }
3185 return result;
3186}
3187
3201 symbolt &method_symbol,
3203 &local_variable_table,
3204 symbol_table_baset &symbol_table)
3205{
3206 // Obtain a std::vector of java_method_typet::parametert objects from the
3207 // (function) type of the symbol
3208 java_method_typet &method_type = to_java_method_type(method_symbol.type);
3209 java_method_typet::parameterst &parameters = method_type.parameters();
3210
3211 // Find number of parameters
3212 unsigned slots_for_parameters = java_method_parameter_slots(method_type);
3213
3214 // Find parameter names in the local variable table:
3215 typedef std::pair<irep_idt, irep_idt> base_name_and_identifiert;
3216 std::map<std::size_t, base_name_and_identifiert> param_names;
3217 for(const auto &v : local_variable_table)
3218 {
3219 if(v.index < slots_for_parameters)
3220 param_names.emplace(
3221 v.index,
3222 make_pair(
3223 v.name, id2string(method_symbol.name) + "::" + id2string(v.name)));
3224 }
3225
3226 // Assign names to parameters
3227 std::size_t param_index = 0;
3228 for(auto &param : parameters)
3229 {
3230 irep_idt base_name, identifier;
3231
3232 // Construct a sensible base name (base_name) and a fully qualified name
3233 // (identifier) for every parameter of the method under translation,
3234 // regardless of whether we have an LVT or not; and assign it to the
3235 // parameter object (which is stored in the type of the symbol, not in the
3236 // symbol table)
3237 if(param_index == 0 && param.get_this())
3238 {
3239 // my.package.ClassName.myMethodName:(II)I::this
3240 base_name = ID_this;
3241 identifier = id2string(method_symbol.name) + "::" + id2string(base_name);
3242 }
3243 else
3244 {
3245 auto param_name = param_names.find(param_index);
3246 if(param_name != param_names.end())
3247 {
3248 base_name = param_name->second.first;
3249 identifier = param_name->second.second;
3250 }
3251 else
3252 {
3253 // my.package.ClassName.myMethodName:(II)I::argNT, where N is the local
3254 // variable slot where the parameter is stored and T is a character
3255 // indicating the type
3256 const typet &type = param.type();
3257 char suffix = java_char_from_type(type);
3258 base_name = "arg" + std::to_string(param_index) + suffix;
3259 identifier =
3260 id2string(method_symbol.name) + "::" + id2string(base_name);
3261 }
3262 }
3263 param.set_base_name(base_name);
3264 param.set_identifier(identifier);
3265
3266 // Build a new symbol for the parameter and add it to the symbol table
3267 parameter_symbolt parameter_symbol;
3268 parameter_symbol.base_name = base_name;
3269 parameter_symbol.mode = ID_java;
3270 parameter_symbol.name = identifier;
3271 parameter_symbol.type = param.type();
3272 symbol_table.insert(parameter_symbol);
3273
3274 param_index += java_local_variable_slots(param.type());
3275 }
3276}
3277
3279 const symbolt &class_symbol,
3281 symbol_table_baset &symbol_table,
3282 message_handlert &message_handler,
3283 size_t max_array_length,
3284 bool throw_assertion_error,
3285 optionalt<ci_lazy_methods_neededt> needed_lazy_methods,
3286 java_string_library_preprocesst &string_preprocess,
3287 const class_hierarchyt &class_hierarchy,
3288 bool threading_support,
3289 const optionalt<prefix_filtert> &method_context,
3290 bool assert_no_exceptions_thrown)
3291
3292{
3294 symbol_table,
3295 message_handler,
3296 max_array_length,
3297 throw_assertion_error,
3298 needed_lazy_methods,
3299 string_preprocess,
3300 class_hierarchy,
3301 threading_support,
3302 assert_no_exceptions_thrown);
3303
3304 java_bytecode_convert_method(class_symbol, method, method_context);
3305}
3306
3313 const irep_idt &classname,
3314 const irep_idt &mangled_method_name) const
3315{
3316 const auto inherited_method = get_inherited_method_implementation(
3317 mangled_method_name, classname, symbol_table);
3318
3319 return inherited_method.has_value();
3320}
3321
3328 const irep_idt &class_identifier,
3329 const irep_idt &component_name) const
3330{
3331 const auto inherited_method = get_inherited_component(
3332 class_identifier, component_name, symbol_table, true);
3333
3334 INVARIANT(
3335 inherited_method.has_value(), "static field should be in symbol table");
3336
3337 return inherited_method->get_full_component_identifier();
3338}
3339
3347 const std::string &tmp_var_prefix,
3348 code_blockt &block,
3349 const bytecode_write_typet write_type,
3350 const irep_idt &identifier)
3351{
3352 const std::function<bool(
3353 const std::function<tvt(const exprt &expr)>, const exprt &expr)>
3354 entry_matches = [&entry_matches](
3355 const std::function<tvt(const exprt &expr)> predicate,
3356 const exprt &expr) {
3357 const tvt &tvres = predicate(expr);
3358 if(tvres.is_unknown())
3359 {
3360 return std::any_of(
3361 expr.operands().begin(),
3362 expr.operands().end(),
3363 [&predicate, &entry_matches](const exprt &expr) {
3364 return entry_matches(predicate, expr);
3365 });
3366 }
3367 else
3368 {
3369 return tvres.is_true();
3370 }
3371 };
3372
3373 // Function that checks whether the expression accesses a member with the
3374 // given identifier name. These accesses are created in the case of `iinc`, or
3375 // non-array `?store` instructions.
3376 const std::function<tvt(const exprt &expr)> has_member_entry = [&identifier](
3377 const exprt &expr) {
3378 const auto member_expr = expr_try_dynamic_cast<member_exprt>(expr);
3379 return !member_expr ? tvt::unknown()
3380 : tvt(member_expr->get_component_name() == identifier);
3381 };
3382
3383 // Function that checks whether the expression is a symbol with the given
3384 // identifier name. These accesses are created in the case of `putstatic` or
3385 // `putfield` instructions.
3386 const std::function<tvt(const exprt &expr)> is_symbol_entry =
3387 [&identifier](const exprt &expr) {
3388 const auto symbol_expr = expr_try_dynamic_cast<symbol_exprt>(expr);
3389 return !symbol_expr ? tvt::unknown()
3390 : tvt(symbol_expr->get_identifier() == identifier);
3391 };
3392
3393 // Function that checks whether the expression is a dereference
3394 // expression. These accesses are created in `?astore` array write
3395 // instructions.
3396 const std::function<tvt(const exprt &expr)> is_dereference_entry =
3397 [](const exprt &expr) {
3398 const auto dereference_expr =
3400 return !dereference_expr ? tvt::unknown() : tvt(true);
3401 };
3402
3403 for(auto &stack_entry : stack)
3404 {
3405 bool replace = false;
3406 switch(write_type)
3407 {
3410 replace = entry_matches(is_symbol_entry, stack_entry);
3411 break;
3413 replace = entry_matches(is_dereference_entry, stack_entry);
3414 break;
3416 replace = entry_matches(has_member_entry, stack_entry);
3417 break;
3418 }
3419 if(replace)
3420 {
3422 tmp_var_prefix, stack_entry.type(), block, stack_entry);
3423 }
3424 }
3425}
3426
3429 const std::string &tmp_var_prefix,
3430 const typet &tmp_var_type,
3431 code_blockt &block,
3432 exprt &stack_entry)
3433{
3434 const exprt tmp_var=
3435 tmp_variable(tmp_var_prefix, tmp_var_type);
3436 block.add(code_assignt(tmp_var, stack_entry));
3437 stack_entry=tmp_var;
3438}
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Target numeric_cast_v(const mp_integer &arg)
Convert an mp_integer to integral type Target An invariant will fail if the conversion is not possibl...
API to expression classes for bitvectors.
const bitvector_typet & to_bitvector_type(const typet &type)
Cast a typet to a bitvector_typet.
const integer_bitvector_typet & to_integer_bitvector_type(const typet &type)
Cast a typet to an integer_bitvector_typet.
void branch(goto_modelt &goto_model, const irep_idt &id)
Definition branch.cpp:22
struct bytecode_infot const bytecode_info[]
#define BC_ifnull
#define BC_jsr
#define BC_invokestatic
#define BC_iinc
#define BC_tableswitch
#define BC_drem
#define BC_d2l
#define BC_dup_x1
#define BC_invokespecial
#define BC_getfield
#define BC_nop
#define BC_pop2
#define BC_instanceof
#define BC_goto
#define BC_arraylength
#define BC_dup_x2
#define BC_ldc
#define BC_ldc2_w
#define BC_ldiv
#define BC_iconst_m1
#define BC_monitorexit
#define BC_new
#define BC_lookupswitch
#define BC_ifge
#define BC_newarray
#define BC_getstatic
#define BC_jsr_w
#define BC_anewarray
#define BC_lrem
#define BC_dup2
#define BC_aconst_null
#define BC_pop
#define BC_return
#define BC_d2i
#define BC_dup
#define BC_athrow
#define BC_ifgt
#define BC_putfield
#define BC_f2i
#define BC_idiv
#define BC_ifeq
#define BC_monitorenter
#define BC_ret
#define BC_goto_w
#define BC_swap
#define BC_iflt
#define BC_checkcast
#define BC_putstatic
#define BC_ldc_w
#define BC_multianewarray
#define BC_dup2_x2
uint8_t u1
#define BC_dup2_x1
#define BC_invokedynamic
#define BC_irem
#define BC_invokeinterface
#define BC_frem
#define BC_ifnonnull
#define BC_invokevirtual
#define BC_ifle
#define BC_f2l
#define BC_ifne
pointer_typet pointer_type(const typet &subtype)
Definition c_types.cpp:240
Arithmetic right shift.
Internally generated symbol table entryThis is a symbol generated as part of translation to or modifi...
Definition symbol.h:153
A base class for binary expressions.
Definition std_expr.h:583
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition std_expr.h:707
Bit-wise AND.
Bit-wise OR.
std::size_t get_width() const
Definition std_types.h:876
Bit-wise XOR.
The Boolean type.
Definition std_types.h:36
Non-graph-based representation of the class hierarchy.
An expression describing a method on a class.
Definition std_expr.h:3448
const irep_idt & base_method_name() const
The name of the method to which this expression is applied as would be seen in the source code.
Definition std_expr.h:3500
const irep_idt & mangled_method_name() const
The method name after mangling it by combining it with the descriptor.
Definition std_expr.h:3485
const irep_idt & get_identifier() const
A unique identifier of the combination of class and method overload to which this expression refers.
Definition std_expr.h:3508
const irep_idt & class_id() const
Unique identifier in the symbol table, of the compile time type of the class which this expression is...
Definition std_expr.h:3493
A non-fatal assertion, which checks a condition then permits execution to continue.
Definition std_code.h:270
A goto_instruction_codet representing an assignment in the program.
An assumption, which must hold in subsequent code.
Definition std_code.h:217
A codet representing sequential composition of program statements.
Definition std_code.h:130
code_operandst & statements()
Definition std_code.h:138
void append(const code_blockt &extra_block)
Add all the codets from extra_block to the current code_blockt.
Definition std_code.cpp:86
void add(const codet &code)
Definition std_code.h:168
codet & find_last_statement()
Definition std_code.cpp:96
A goto_instruction_codet representing the declaration of a local variable.
codet representation of an expression statement.
Definition std_code.h:1394
codet representation of a "return from a function" statement.
Definition std_code.h:893
goto_instruction_codet representation of a function call statement.
codet representation of a goto statement.
Definition std_code.h:841
codet representation of an if-then-else statement.
Definition std_code.h:460
const exprt & cond() const
Definition std_code.h:478
const codet & then_case() const
Definition std_code.h:488
codet representation of a label for branch targets.
Definition std_code.h:959
codet & code()
Definition std_code.h:977
void set_label(const irep_idt &label)
Definition std_code.h:972
A statement that catches an exception, assigning the exception in flight to an expression (e....
Definition std_code.h:1936
Pops an exception handler from the stack of active handlers (i.e.
Definition std_code.h:1899
Pushes an exception handler, of the form: exception_tag1 -> label1 exception_tag2 -> label2 ....
Definition std_code.h:1805
exception_listt & exception_list()
Definition std_code.h:1860
std::vector< exception_list_entryt > exception_listt
Definition std_code.h:1849
A codet representing a skip statement.
Definition std_code.h:322
codet representation of a switch-case, i.e. a case statement within a switch.
Definition std_code.h:1023
codet representing a switch statement.
Definition std_code.h:548
void set_base_name(const irep_idt &name)
Definition std_types.h:585
const irep_idt & get_access() const
Definition std_types.h:675
void set_is_constructor()
Definition std_types.h:690
bool has_this() const
Definition std_types.h:616
const parameterst & parameters() const
Definition std_types.h:655
const typet & return_type() const
Definition std_types.h:645
void set_access(const irep_idt &access)
Definition std_types.h:680
bool get_is_constructor() const
Definition std_types.h:685
Data structure for representing an arbitrary statement in a program.
const irep_idt & get_statement() const
A constant literal expression.
Definition std_expr.h:2942
Operator to dereference a pointer.
Division.
Definition std_expr.h:1097
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:39
bool empty() const
Definition dstring.h:90
Equality.
Definition std_expr.h:1306
Base class for all expressions.
Definition expr.h:56
std::vector< exprt > operandst
Definition expr.h:58
void copy_to_operands(const exprt &expr)
Copy the given argument to the end of exprt's operands.
Definition expr.h:155
bool is_constant() const
Return whether the expression is a constant.
Definition expr.h:204
typet & type()
Return the type of the expression.
Definition expr.h:84
operandst & operands()
Definition expr.h:94
source_locationt & add_source_location()
Definition expr.h:228
The Boolean constant false.
Definition std_expr.h:3017
Represents the argument of an instruction that uses a CONSTANT_Fieldref This is used for example as a...
irep_idt class_name() const
irep_idt component_name() const
IEEE-floating-point equality.
static ieee_float_spect single_precision()
Definition ieee_float.h:70
static ieee_float_spect double_precision()
Definition ieee_float.h:76
The trinary if-then-else operator.
Definition std_expr.h:2323
constant_exprt largest_expr() const
Return an expression representing the largest value of this type.
constant_exprt smallest_expr() const
Return an expression representing the smallest value of this type.
There are a large number of kinds of tree structured or tree-like data in CPROVER.
Definition irep.h:372
const irep_idt & get(const irep_idt &name) const
Definition irep.cpp:44
void set(const irep_idt &name, const irep_idt &value)
Definition irep.h:420
void swap(irept &irep)
Definition irep.h:442
const irep_idt & id() const
Definition irep.h:396
const std::string & get_string(const irep_idt &name) const
Definition irep.h:409
Evaluates to true if the operand is NaN.
void convert_dup2_x2(exprt::operandst &op, exprt::operandst &results)
optionalt< ci_lazy_methods_neededt > needed_lazy_methods
method_offsett slots_for_parameters
Number of local variable slots used by the JVM to pass parameters upon invocation of the method under...
irep_idt get_static_field(const irep_idt &class_identifier, const irep_idt &component_name) const
Get static field identifier referred to by class_identifier.component_name Note this may be inherited...
void draw_edges_from_ret_to_jsr(address_mapt &address_map, const std::vector< method_offsett > &jsr_ret_targets, const std::vector< std::vector< java_bytecode_parse_treet::instructiont >::const_iterator > &ret_instructions) const
code_ifthenelset convert_if_cmp(const java_bytecode_convert_methodt::address_mapt &address_map, const u1 bytecode, const exprt::operandst &op, const mp_integer &number, const source_locationt &location) const
void create_stack_tmp_var(const std::string &, const typet &, code_blockt &, exprt &)
actually create a temporary variable to hold the value of a stack entry
codet & do_exception_handling(const methodt &method, const std::set< method_offsett > &working_set, method_offsett cur_pc, codet &c)
exprt::operandst & convert_ushr(const irep_idt &statement, const exprt::operandst &op, exprt::operandst &results) const
exprt convert_load(const exprt &index, char type_char, size_t address)
Load reference from local variable.
void setup_local_variables(const methodt &m, const address_mapt &amap)
See find_initializers_for_slot above for more detail.
const variablet & find_variable_for_slot(size_t address, variablest &var_list)
See above.
code_blockt convert_store(const irep_idt &statement, const exprt &arg0, const exprt::operandst &op, const method_offsett address, const source_locationt &location)
code_blockt convert_astore(const irep_idt &statement, const exprt::operandst &op, const source_locationt &location)
static irep_idt label(const irep_idt &address)
std::vector< method_offsett > try_catch_handler(method_offsett address, const java_bytecode_parse_treet::methodt::exception_tablet &exception_table) const
code_blockt convert_instructions(const methodt &)
codet & replace_call_to_cprover_assume(source_locationt location, codet &c)
java_string_library_preprocesst & string_preprocess
code_blockt convert_ret(const std::vector< method_offsett > &jsr_ret_targets, const exprt &arg0, const source_locationt &location, const method_offsett address)
codet get_clinit_call(const irep_idt &classname)
Each static access to classname should be prefixed with a check for necessary static init; this retur...
code_blockt convert_putstatic(const source_locationt &location, const exprt &arg0, const exprt::operandst &op, const symbol_exprt &symbol_expr)
code_ifthenelset convert_if(const java_bytecode_convert_methodt::address_mapt &address_map, const exprt::operandst &op, const irep_idt &id, const mp_integer &number, const source_locationt &location) const
code_ifthenelset convert_ifnull(const java_bytecode_convert_methodt::address_mapt &address_map, const exprt::operandst &op, const mp_integer &number, const source_locationt &location) const
code_blockt & get_block_for_pcrange(block_tree_nodet &tree, code_blockt &this_block, method_offsett address_start, method_offsett address_limit, method_offsett next_block_start_address)
'tree' describes a tree of code_blockt objects; this_block is the corresponding block (thus they are ...
codet convert_pop(const irep_idt &statement, const exprt::operandst &op)
exprt::operandst & convert_const(const irep_idt &statement, const constant_exprt &arg0, exprt::operandst &results) const
void convert_checkcast(const exprt &arg0, const exprt::operandst &op, codet &c, exprt::operandst &results) const
void convert_dup2_x1(exprt::operandst &op, exprt::operandst &results)
code_blockt convert_putfield(const fieldref_exprt &arg0, const exprt::operandst &op)
code_blockt convert_multianewarray(const source_locationt &location, const exprt &arg0, const exprt::operandst &op, exprt::operandst &results)
code_ifthenelset convert_ifnonull(const java_bytecode_convert_methodt::address_mapt &address_map, const exprt::operandst &op, const mp_integer &number, const source_locationt &location) const
void convert(const symbolt &class_symbol, const methodt &, const optionalt< prefix_filtert > &method_context)
code_blockt convert_parameter_annotations(const methodt &method, const java_method_typet &method_type)
code_switcht convert_switch(const exprt::operandst &op, const java_bytecode_parse_treet::instructiont::argst &args, const source_locationt &location)
void save_stack_entries(const std::string &, code_blockt &, const bytecode_write_typet, const irep_idt &)
Create temporary variables if a write instruction can have undesired side- effects.
void convert_new(const source_locationt &location, const exprt &arg0, codet &c, exprt::operandst &results)
exprt variable(const exprt &arg, char type_char, size_t address)
Returns an expression indicating a local variable suitable to load/store from a bytecode at address a...
std::map< method_offsett, converted_instructiont > address_mapt
static void replace_goto_target(codet &repl, const irep_idt &old_label, const irep_idt &new_label)
Find all goto statements in 'repl' that target 'old_label' and redirect them to 'new_label'.
code_blockt convert_iinc(const exprt &arg0, const exprt &arg1, const source_locationt &location, method_offsett address)
exprt::operandst & convert_cmp2(const irep_idt &statement, const exprt::operandst &op, exprt::operandst &results) const
void convert_dup2(exprt::operandst &op, exprt::operandst &results)
optionalt< exprt > convert_invoke_dynamic(const source_locationt &location, std::size_t instruction_address, const exprt &arg0, codet &result_code)
bool is_method_inherited(const irep_idt &classname, const irep_idt &mangled_method_name) const
Returns true iff method methodid from class classname is a method inherited from a class or interface...
void convert_athrow(const source_locationt &location, const exprt::operandst &op, codet &c, exprt::operandst &results) const
typet method_return_type
Return type of the method under conversion.
void convert_getstatic(const source_locationt &source_location, const exprt &arg0, const symbol_exprt &symbol_expr, bool is_assertions_disabled_field, codet &c, exprt::operandst &results)
irep_idt method_id
Fully qualified name of the method under translation.
code_blockt convert_newarray(const source_locationt &location, const irep_idt &statement, const exprt &arg0, const exprt::operandst &op, exprt::operandst &results)
exprt::operandst & convert_shl(const irep_idt &statement, const exprt::operandst &op, exprt::operandst &results) const
exprt::operandst & convert_cmp(const exprt::operandst &op, exprt::operandst &results) const
void pop_residue(std::size_t n)
removes minimum(n, stack.size()) elements from the stack
void convert_invoke(source_locationt location, const irep_idt &statement, class_method_descriptor_exprt &class_method_descriptor, codet &c, exprt::operandst &results)
code_blockt & get_or_create_block_for_pcrange(block_tree_nodet &tree, code_blockt &this_block, method_offsett address_start, method_offsett address_limit, method_offsett next_block_start_address, const address_mapt &amap, bool allow_merge=true)
As above, but this version can additionally create a new branch in the block_tree-node and code_block...
codet convert_monitorenterexit(const irep_idt &statement, const exprt::operandst &op, const source_locationt &source_location)
irep_idt current_method
A copy of method_id :/.
static exprt convert_aload(const irep_idt &statement, const exprt::operandst &op)
symbol_exprt tmp_variable(const std::string &prefix, const typet &type)
const methodst & methods() const
Definition java_types.h:300
void add_throws_exception(irep_idt exception)
Definition java_types.h:132
std::vector< parametert > parameterst
Definition std_types.h:542
void set_is_varargs(bool is_varargs)
Definition java_types.h:162
void set_is_synthetic(bool is_synthetic)
Definition java_types.h:172
void set_is_final(bool is_final)
Definition java_types.h:142
void set_native(bool is_native)
Definition java_types.h:152
This is a specialization of reference_typet.
Definition java_types.h:603
codet replace_character_call(code_function_callt call)
Logical right shift.
Extract member of struct or union.
Definition std_expr.h:2794
source_locationt source_location
Definition message.h:247
Class that provides messages with a built-in verbosity 'level'.
Definition message.h:155
mstreamt & error() const
Definition message.h:399
mstreamt & debug() const
Definition message.h:429
message_handlert & get_message_handler()
Definition message.h:184
static eomt eom
Definition message.h:297
Binary minus.
Definition std_expr.h:1006
Modulo defined as lhs-(rhs * truncate(lhs/rhs)).
Definition std_expr.h:1168
Binary multiplication Associativity is not specified.
Definition std_expr.h:1052
const typet & follow(const typet &) const
Resolve type symbol to the type it points to.
Definition namespace.cpp:49
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition namespace.h:91
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
The NIL expression.
Definition std_expr.h:3026
Disequality.
Definition std_expr.h:1365
The null pointer constant.
Boolean OR.
Definition std_expr.h:2179
Symbol table entry of function parameterThis is a symbol generated as part of type checking.
Definition symbol.h:179
Given a string of the format '?blah?', will return true when compared against a string that matches a...
Definition pattern.h:21
The plus expression Associativity is not specified.
Definition std_expr.h:947
const typet & base_type() const
The type of the data what we point to.
The reference type.
Left shift.
A side_effect_exprt representation of a side effect that throws an exception.
Definition std_code.h:1757
An expression containing a side effect.
Definition std_code.h:1450
void set_comment(const irep_idt &comment)
void set_property_class(const irep_idt &property_class)
const irep_idt & get_line() const
std::string as_string() const
void set_function(const irep_idt &function)
A struct tag type, i.e., struct_typet with an identifier.
Definition std_types.h:449
Expression to hold a symbol (variable)
Definition std_expr.h:113
static symbol_exprt typeless(const irep_idt &id)
Generate a symbol_exprt without a proper type.
Definition std_expr.h:132
const irep_idt & get_identifier() const
Definition std_expr.h:142
The symbol table base class interface.
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
virtual std::pair< symbolt &, bool > insert(symbolt symbol)=0
Move or copy a new symbol to the symbol table.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
Symbol table entry.
Definition symbol.h:28
irep_idt base_name
Base (non-scoped) name.
Definition symbol.h:46
irep_idt module
Name of module the symbol belongs to.
Definition symbol.h:43
bool is_static_lifetime
Definition symbol.h:70
source_locationt location
Source code location of definition of symbol.
Definition symbol.h:37
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition symbol.cpp:121
typet type
Type of symbol.
Definition symbol.h:31
irep_idt name
The unique identifier.
Definition symbol.h:40
irep_idt pretty_name
Language-specific display name.
Definition symbol.h:52
exprt value
Initial value of symbol.
Definition symbol.h:34
irep_idt mode
Language mode.
Definition symbol.h:49
const irep_idt & get_identifier() const
Definition std_types.h:410
Definition threeval.h:20
bool is_unknown() const
Definition threeval.h:27
bool is_true() const
Definition threeval.h:25
static tvt unknown()
Definition threeval.h:33
Semantic type conversion.
Definition std_expr.h:2017
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition std_expr.h:2025
The type of an expression, extends irept.
Definition type.h:29
The unary minus expression.
Definition std_expr.h:423
static irep_idt get_exception_type(const pointer_typet &)
Returns the compile type of an exception.
Fixed-width bit-vector with unsigned binary interpretation.
An exception that is raised for unsupported class signature.
bool has_prefix(const std::string &s, const std::string &prefix)
Definition converter.cpp:13
auto type_checked_cast(TType &base) -> typename detail::expr_dynamic_cast_return_typet< T, TType >::type
Cast a reference to a generic typet to a specific derived class and checks that the type could be con...
Definition expr_cast.h:242
auto type_try_dynamic_cast(TType &base) -> typename detail::expr_try_dynamic_cast_return_typet< T, TType >::type
Try to cast a reference to a generic typet to a specific derived class.
Definition expr_cast.h:135
auto expr_try_dynamic_cast(TExpr &base) -> typename detail::expr_try_dynamic_cast_return_typet< T, TExpr >::type
Try to cast a reference to a generic exprt to a specific derived class.
Definition expr_cast.h:81
auto expr_dynamic_cast(TExpr &base) -> typename detail::expr_dynamic_cast_return_typet< T, TExpr >::type
Cast a reference to a generic exprt to a specific derived class.
Definition expr_cast.h:207
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Expression Initialization.
API to expression classes for floating-point arithmetic.
const std::string & id2string(const irep_idt &d)
Definition irep.h:47
static int8_t r
Definition irep_hash.h:60
void convert_annotations(const java_bytecode_parse_treet::annotationst &parsed_annotations, std::vector< java_annotationt > &java_annotations)
Convert parsed annotations into the symbol table.
void create_parameter_symbols(const java_method_typet::parameterst &parameters, expanding_vectort< std::vector< java_bytecode_convert_methodt::variablet > > &variables, symbol_table_baset &symbol_table)
static void gather_symbol_live_ranges(java_bytecode_convert_methodt::method_offsett pc, const exprt &e, std::map< irep_idt, java_bytecode_convert_methodt::variablet > &result)
static bool is_constructor(const irep_idt &method_name)
static irep_idt get_method_identifier(const irep_idt &class_identifier, const java_bytecode_parse_treet::methodt &method)
void java_bytecode_initialize_parameter_names(symbolt &method_symbol, const java_bytecode_parse_treet::methodt::local_variable_tablet &local_variable_table, symbol_table_baset &symbol_table)
This uses a cut-down version of the logic in java_bytecode_convert_methodt::convert to initialize sym...
static member_exprt to_member(const exprt &pointer, const fieldref_exprt &field_reference, const namespacet &ns)
Build a member exprt for accessing a specific field that may come from a base class.
static exprt conditional_array_cast(const exprt &expr, char type_char)
Add typecast if necessary to expr to make it compatible with array type corresponding to type_char (s...
void java_bytecode_convert_method(const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &method, symbol_table_baset &symbol_table, message_handlert &message_handler, size_t max_array_length, bool throw_assertion_error, optionalt< ci_lazy_methods_neededt > needed_lazy_methods, java_string_library_preprocesst &string_preprocess, const class_hierarchyt &class_hierarchy, bool threading_support, const optionalt< prefix_filtert > &method_context, bool assert_no_exceptions_thrown)
java_method_typet member_type_lazy(const std::string &descriptor, const optionalt< std::string > &signature, const std::string &class_name, const std::string &method_name, message_handlert &message_handler)
Returns the member type for a method, based on signature or descriptor.
void create_parameter_names(const java_bytecode_parse_treet::methodt &m, const irep_idt &method_identifier, java_method_typet::parameterst &parameters, const java_bytecode_convert_methodt::method_offsett &slots_for_parameters)
Extracts the names of parameters from the local variable table in the method, and uses it to construc...
void create_method_stub_symbol(const irep_idt &identifier, const irep_idt &base_name, const irep_idt &pretty_name, const typet &type, const irep_idt &declaring_class, symbol_table_baset &symbol_table, message_handlert &message_handler)
static std::size_t get_bytecode_type_width(const typet &ty)
static void adjust_invoke_argument_types(const java_method_typet::parameterst &parameters, code_function_callt::argumentst &arguments)
static void assign_parameter_names(java_method_typet &ftype, const irep_idt &name_prefix, symbol_table_baset &symbol_table)
Iterates through the parameters of the function type ftype, finds a new new name for each parameter a...
static irep_idt get_if_cmp_operator(const u1 bytecode)
void java_bytecode_convert_method_lazy(symbolt &class_symbol, const irep_idt &method_identifier, const java_bytecode_parse_treet::methodt &m, symbol_table_baset &symbol_table, message_handlert &message_handler)
This creates a method symbol in the symtab, but doesn't actually perform method conversion just yet.
JAVA Bytecode Language Conversion.
JAVA Bytecode Language Conversion.
Java-specific exprt subclasses.
irep_idt clinit_wrapper_name(const irep_idt &class_name)
Get the Java static initializer wrapper name for a given class (the wrapper checks if static initiali...
Produce code for simple implementation of String Java libraries.
Representation of a constant Java string.
bool can_cast_expr< java_string_literal_exprt >(const exprt &base)
signedbv_typet java_int_type()
typet java_type_from_char(char t)
Constructs a type indicated by the given character:
java_reference_typet java_reference_array_type(const struct_tag_typet &subtype)
empty_typet java_void_type()
char java_char_from_type(const typet &type)
optionalt< typet > java_type_from_string(const std::string &src, const std::string &class_name_prefix)
Transforms a string representation of a Java type into an internal type representation thereof.
signedbv_typet java_byte_type()
std::string pretty_signature(const java_method_typet &method_type)
typet java_bytecode_promotion(const typet &type)
Java does not support byte/short return types. These are always promoted.
java_reference_typet java_array_type(const char subtype)
Construct an array pointer type.
signedbv_typet java_short_type()
reference_typet java_reference_type(const typet &subtype)
c_bool_typet java_boolean_type()
unsignedbv_typet java_char_type()
const typet & java_array_element_type(const struct_tag_typet &array_symbol)
Return a const reference to the element type of a given java array type.
const java_method_typet & to_java_method_type(const typet &type)
Definition java_types.h:184
const java_class_typet & to_java_class_type(const typet &type)
Definition java_types.h:582
void set_declaring_class(symbolt &symbol, const irep_idt &declaring_class)
Sets the identifier of the class which declared a given symbol to declaring_class.
unsigned java_method_parameter_slots(const java_method_typet &t)
Returns the the number of JVM local variables (slots) used by the JVM to pass, upon call,...
optionalt< irep_idt > declaring_class(const symbolt &symbol)
Gets the identifier of the class which declared a given symbol.
optionalt< resolve_inherited_componentt::inherited_componentt > get_inherited_component(const irep_idt &component_class_id, const irep_idt &component_name, const symbol_table_baset &symbol_table, bool include_interfaces)
Finds an inherited component (method or field), taking component visibility into account.
irep_idt strip_java_namespace_prefix(const irep_idt &to_strip)
Strip java:: prefix from given identifier.
dereference_exprt checked_dereference(const exprt &expr)
Dereference an expression and flag it for a null-pointer check.
unsigned java_local_variable_slots(const typet &t)
Returns the number of JVM local variables (slots) taken by a local variable that, when translated to ...
void merge_source_location_rec(exprt &expr, const source_locationt &source_location)
Attaches a source location to an expression and all of its subexpressions.
irep_idt lambda_synthetic_class_name(const irep_idt &method_identifier, std::size_t instruction_address)
static symbolt constructor_symbol(synthetic_methods_mapt &synthetic_methods, const irep_idt &synthetic_class_name, java_method_typet constructor_type)
Java lambda code synthesis.
const mp_integer string2integer(const std::string &n, unsigned base)
Definition mp_arith.cpp:54
const std::string integer2string(const mp_integer &n, unsigned base)
Definition mp_arith.cpp:103
nonstd::optional< T > optionalt
Definition optional.h:35
Pattern matching for bytecode instructions.
bool can_cast_type< reference_typet >(const typet &type)
Check whether a reference to a typet is a reference_typet.
const reference_typet & to_reference_type(const typet &type)
Cast a typet to a reference_typet.
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Prefix Filtering.
optionalt< resolve_inherited_componentt::inherited_componentt > get_inherited_method_implementation(const irep_idt &call_basename, const irep_idt &classname, const symbol_table_baset &symbol_table)
Given a class and a component, identify the concrete method it is resolved to.
Given a class and a component (either field or method), find the closest parent that defines that com...
BigInt mp_integer
Definition smt_terms.h:18
#define CHECK_RETURN(CONDITION)
Definition invariant.h:495
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition invariant.h:534
#define PRECONDITION(CONDITION)
Definition invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
const code_gotot & to_code_goto(const codet &code)
Definition std_code.h:875
const code_labelt & to_code_label(const codet &code)
Definition std_code.h:1004
const code_blockt & to_code_block(const codet &code)
Definition std_code.h:203
const codet & to_code(const exprt &expr)
API to expression classes.
const constant_exprt & to_constant_expr(const exprt &expr)
Cast an exprt to a constant_exprt.
Definition std_expr.h:2992
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition std_expr.h:222
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition std_types.h:744
const struct_typet & to_struct_type(const typet &type)
Cast a typet to a struct_typet.
Definition std_types.h:308
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition std_types.h:474
bool can_cast_type< bitvector_typet >(const typet &type)
Check whether a reference to a typet is a bitvector_typet.
Definition std_types.h:899
void replace(const union_find_replacet &replace_map, string_not_contains_constraintt &constraint)
const char * mnemonic
std::vector< annotationst > parameter_annotations
Java annotations that were applied to parameters of this method.
std::vector< local_variablet > local_variable_tablet
static optionalt< annotationt > find_annotation(const annotationst &annotations, const irep_idt &annotation_type_name)
Find an annotation given its name.
Author: Diffblue Ltd.
Over-approximative uncaught exceptions analysis.
dstringt irep_idt