cprover
java_bytecode_language.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
10 
11 #include <string>
12 
13 #include <util/symbol_table.h>
14 #include <util/suffix.h>
15 #include <util/config.h>
16 #include <util/cmdline.h>
17 #include <util/expr_iterator.h>
19 #include <util/string2int.h>
20 #include <util/invariant.h>
21 #include <json/json_parser.h>
22 
24 
31 #include "java_entry_point.h"
32 #include "java_bytecode_parser.h"
33 #include "java_class_loader.h"
34 #include "java_string_literals.h"
36 #include "java_utils.h"
37 #include "ci_lazy_methods.h"
38 
39 #include "expr2java.h"
40 
45 {
46  assume_inputs_non_null=cmd.isset("java-assume-inputs-non-null");
47  string_refinement_enabled=cmd.isset("refine-strings");
48  throw_runtime_exceptions=cmd.isset("java-throw-runtime-exceptions");
49  assert_uncaught_exceptions = !cmd.isset("disable-uncaught-exception-check");
50  throw_assertion_error = cmd.isset("throw-assertion-error");
51  threading_support = cmd.isset("java-threading");
52 
53  if(cmd.isset("java-max-input-array-length"))
55  std::stoi(cmd.get_value("java-max-input-array-length"));
56  if(cmd.isset("java-max-input-tree-depth"))
58  std::stoi(cmd.get_value("java-max-input-tree-depth"));
59  if(cmd.isset("string-max-input-length"))
61  std::stoi(cmd.get_value("string-max-input-length"));
62  else if(cmd.isset("string-max-length"))
64  std::stoi(cmd.get_value("string-max-length"));
65 
66  object_factory_parameters.string_printable = cmd.isset("string-printable");
67  if(cmd.isset("java-max-vla-length"))
68  max_user_array_length=std::stoi(cmd.get_value("java-max-vla-length"));
69  if(cmd.isset("symex-driven-lazy-loading"))
71  else if(cmd.isset("lazy-methods"))
73  else
75 
77  {
78  java_load_classes.insert(
79  java_load_classes.end(),
82  }
83  if(cmd.isset("java-load-class"))
84  {
85  const auto &values = cmd.get_values("java-load-class");
86  java_load_classes.insert(
87  java_load_classes.end(), values.begin(), values.end());
88  }
89  if(cmd.isset("java-no-load-class"))
90  {
91  const auto &values = cmd.get_values("java-no-load-class");
92  no_load_classes = {values.begin(), values.end()};
93  }
94 
95  const std::list<std::string> &extra_entry_points=
96  cmd.get_values("lazy-methods-extra-entry-point");
99  extra_entry_points.begin(),
100  extra_entry_points.end());
101 
102  if(cmd.isset("java-cp-include-files"))
103  {
104  java_cp_include_files=cmd.get_value("java-cp-include-files");
105  // load file list from JSON file
106  if(java_cp_include_files[0]=='@')
107  {
108  jsont json_cp_config;
109  if(parse_json(
110  java_cp_include_files.substr(1),
112  json_cp_config))
113  throw "cannot read JSON input configuration for JAR loading";
114 
115  if(!json_cp_config.is_object())
116  throw "the JSON file has a wrong format";
117  jsont include_files=json_cp_config["jar"];
118  if(!include_files.is_array())
119  throw "the JSON file has a wrong format";
120 
121  // add jars from JSON config file to classpath
122  for(const jsont &file_entry : include_files.array)
123  {
125  file_entry.is_string() && has_suffix(file_entry.value, ".jar"),
126  "classpath entry must be jar filename, but '" + file_entry.value +
127  "' found");
128  config.java.classpath.push_back(file_entry.value);
129  }
130  }
131  }
132  else
134 
136 }
137 
138 std::set<std::string> java_bytecode_languaget::extensions() const
139 {
140  return { "class", "jar" };
141 }
142 
143 void java_bytecode_languaget::modules_provided(std::set<std::string> &modules)
144 {
145  // modules.insert(translation_unit(parse_path));
146 }
147 
150  std::istream &instream,
151  const std::string &path,
152  std::ostream &outstream)
153 {
154  // there is no preprocessing!
155  return true;
156 }
157 
159  std::istream &instream,
160  const std::string &path)
161 {
167  {
169 
170  auto get_string_base_classes = [this](const irep_idt &id) {
172  };
173 
174  java_class_loader.set_extra_class_refs_function(get_string_base_classes);
175  }
176 
177  // look at extension
178  if(has_suffix(path, ".class"))
179  {
180  // override main_class
182  }
183  else if(has_suffix(path, ".jar"))
184  {
185  // build an object to potentially limit which classes are loaded
186  java_class_loader_limitt class_loader_limit(
190  {
191  auto manifest=
192  java_class_loader.jar_pool(class_loader_limit, path).get_manifest();
193  std::string manifest_main_class=manifest["Main-Class"];
194 
195  // if the manifest declares a Main-Class line, we got a main class
196  if(manifest_main_class!="")
197  main_class=manifest_main_class;
198  }
199  else
201 
202  // do we have one now?
203  if(main_class.empty())
204  {
205  status() << "JAR file without entry point: loading class files" << eom;
206  java_class_loader.load_entire_jar(class_loader_limit, path);
207  for(const auto &kv : java_class_loader.get_jar_index(path))
208  main_jar_classes.push_back(kv.first);
209  }
210  else
212  }
213  else
214  UNREACHABLE;
215 
216  if(!main_class.empty())
217  {
218  status() << "Java main class: " << main_class << eom;
220  }
221 
222  return false;
223 }
224 
234  const java_bytecode_parse_treet &parse_tree,
235  symbol_tablet &symbol_table)
236 {
237  namespacet ns(symbol_table);
238  for(const auto &method : parse_tree.parsed_class.methods)
239  {
240  for(const java_bytecode_parse_treet::instructiont &instruction :
241  method.instructions)
242  {
243  if(instruction.statement == "getfield" ||
244  instruction.statement == "putfield")
245  {
246  const exprt &fieldref = instruction.args[0];
247  irep_idt class_symbol_id = fieldref.get(ID_class);
248  const symbolt *class_symbol = symbol_table.lookup(class_symbol_id);
249  INVARIANT(
250  class_symbol != nullptr,
251  "all types containing fields should have been loaded");
252 
253  const class_typet *class_type = &to_class_type(class_symbol->type);
254  const irep_idt &component_name = fieldref.get(ID_component_name);
255  while(!class_type->has_component(component_name))
256  {
257  if(class_type->get_bool(ID_incomplete_class))
258  {
259  // Accessing a field of an incomplete (opaque) type.
260  symbolt &writable_class_symbol =
261  symbol_table.get_writeable_ref(class_symbol_id);
262  auto &components =
263  to_struct_type(writable_class_symbol.type).components();
264  components.emplace_back(component_name, fieldref.type());
265  components.back().set_base_name(component_name);
266  components.back().set_pretty_name(component_name);
267  break;
268  }
269  else
270  {
271  // Not present here: check the superclass.
272  INVARIANT(
273  !class_type->bases().empty(),
274  "class '" + id2string(class_symbol->name)
275  + "' (which was missing a field '" + id2string(component_name)
276  + "' referenced from method '" + id2string(method.name)
277  + "') should have an opaque superclass");
278  const symbol_typet &superclass_type =
279  to_symbol_type(class_type->bases().front().type());
280  class_symbol_id = superclass_type.get_identifier();
281  class_type = &to_class_type(ns.follow(superclass_type));
282  }
283  }
284  }
285  }
286  }
287 }
288 
295  const irep_idt &class_id, symbol_tablet &symbol_table)
296 {
297  symbol_typet java_lang_Class("java::java.lang.Class");
298  symbol_exprt symbol_expr(
300  java_lang_Class);
301  if(!symbol_table.has_symbol(symbol_expr.get_identifier()))
302  {
303  symbolt new_class_symbol;
304  new_class_symbol.name = symbol_expr.get_identifier();
305  new_class_symbol.type = symbol_expr.type();
306  INVARIANT(
307  has_prefix(id2string(new_class_symbol.name), "java::"),
308  "class identifier should have 'java::' prefix");
309  new_class_symbol.base_name =
310  id2string(new_class_symbol.name).substr(6);
311  new_class_symbol.mode = ID_java;
312  new_class_symbol.is_lvalue = true;
313  new_class_symbol.is_state_var = true;
314  new_class_symbol.is_static_lifetime = true;
315  symbol_table.add(new_class_symbol);
316  }
317 
318  return symbol_expr;
319 }
320 
336  const exprt &ldc_arg0,
337  symbol_tablet &symbol_table,
338  bool string_refinement_enabled)
339 {
340  if(ldc_arg0.id() == ID_type)
341  {
342  const irep_idt &class_id = ldc_arg0.type().get(ID_identifier);
343  return
345  get_or_create_class_literal_symbol(class_id, symbol_table));
346  }
347  else if(ldc_arg0.id() == ID_java_string_literal)
348  {
349  return
352  ldc_arg0, symbol_table, string_refinement_enabled));
353  }
354  else
355  {
356  INVARIANT(
357  ldc_arg0.id() == ID_constant,
358  "ldc argument should be constant, string literal or class literal");
359  return ldc_arg0;
360  }
361 }
362 
373  java_bytecode_parse_treet &parse_tree,
374  symbol_tablet &symbol_table,
375  bool string_refinement_enabled)
376 {
377  for(auto &method : parse_tree.parsed_class.methods)
378  {
379  for(java_bytecode_parse_treet::instructiont &instruction :
380  method.instructions)
381  {
382  // ldc* instructions are Java bytecode "load constant" ops, which can
383  // retrieve a numeric constant, String literal, or Class literal.
384  if(instruction.statement == "ldc" ||
385  instruction.statement == "ldc2" ||
386  instruction.statement == "ldc_w" ||
387  instruction.statement == "ldc2_w")
388  {
389  INVARIANT(
390  instruction.args.size() != 0,
391  "ldc instructions should have an argument");
392  instruction.args[0] =
394  instruction.args[0],
395  symbol_table,
396  string_refinement_enabled);
397  }
398  }
399  }
400 }
401 
414  symbol_table_baset &symbol_table,
415  const irep_idt &symbol_id,
416  const irep_idt &symbol_basename,
417  const typet &symbol_type,
418  const irep_idt &class_id,
419  bool force_nondet_init)
420 {
421  symbolt new_symbol;
422  new_symbol.is_static_lifetime = true;
423  new_symbol.is_lvalue = true;
424  new_symbol.is_state_var = true;
425  new_symbol.name = symbol_id;
426  new_symbol.base_name = symbol_basename;
427  new_symbol.type = symbol_type;
428  new_symbol.type.set(ID_C_class, class_id);
429  // Public access is a guess; it encourages merging like-typed static fields,
430  // whereas a more restricted visbility would encourage separating them.
431  // Neither is correct, as without the class file we can't know the truth.
432  new_symbol.type.set(ID_C_access, ID_public);
433  new_symbol.pretty_name = new_symbol.name;
434  new_symbol.mode = ID_java;
435  new_symbol.is_type = false;
436  // If pointer-typed, initialise to null and a static initialiser will be
437  // created to initialise on first reference. If primitive-typed, specify
438  // nondeterministic initialisation by setting a nil value.
439  if(symbol_type.id() == ID_pointer && !force_nondet_init)
440  new_symbol.value = null_pointer_exprt(to_pointer_type(symbol_type));
441  else
442  new_symbol.value.make_nil();
443  bool add_failed = symbol_table.add(new_symbol);
444  INVARIANT(
445  !add_failed, "caller should have checked symbol not already in table");
446 }
447 
457  const irep_idt &start_class_id,
458  const symbol_tablet &symbol_table,
459  const class_hierarchyt &class_hierarchy)
460 {
461  // Depth-first search: return the first ancestor with ID_incomplete_class, or
462  // irep_idt() if none found.
463  std::vector<irep_idt> classes_to_check;
464  classes_to_check.push_back(start_class_id);
465 
466  while(!classes_to_check.empty())
467  {
468  irep_idt to_check = classes_to_check.back();
469  classes_to_check.pop_back();
470 
471  // Exclude java.lang.Object because it can
472  if(symbol_table.lookup_ref(to_check).type.get_bool(ID_incomplete_class) &&
473  to_check != "java::java.lang.Object")
474  {
475  return to_check;
476  }
477 
478  const class_hierarchyt::idst &parents =
479  class_hierarchy.class_map.at(to_check).parents;
480  classes_to_check.insert(
481  classes_to_check.end(), parents.begin(), parents.end());
482  }
483 
484  return irep_idt();
485 }
486 
497  const java_bytecode_parse_treet &parse_tree,
498  symbol_table_baset &symbol_table,
499  const class_hierarchyt &class_hierarchy,
500  messaget &log)
501 {
502  namespacet ns(symbol_table);
503  for(const auto &method : parse_tree.parsed_class.methods)
504  {
505  for(const java_bytecode_parse_treet::instructiont &instruction :
506  method.instructions)
507  {
508  if(instruction.statement == "getstatic" ||
509  instruction.statement == "putstatic")
510  {
511  INVARIANT(
512  instruction.args.size() > 0,
513  "get/putstatic should have at least one argument");
514  irep_idt component = instruction.args[0].get_string(ID_component_name);
515  INVARIANT(
516  !component.empty(), "get/putstatic should specify a component");
517  irep_idt class_id = instruction.args[0].get_string(ID_class);
518  INVARIANT(
519  !class_id.empty(), "get/putstatic should specify a class");
520 
521  // The final 'true' parameter here includes interfaces, as they can
522  // define static fields.
525  class_id,
526  component,
527  "java::" + id2string(parse_tree.parsed_class.name),
528  symbol_table,
529  class_hierarchy,
530  true);
531  if(!referred_component.is_valid())
532  {
533  // Create a new stub global on an arbitrary incomplete ancestor of the
534  // class that was referred to. This is just a guess, but we have no
535  // better information to go on.
536  irep_idt add_to_class_id =
538  class_id, symbol_table, class_hierarchy);
539 
540  // If there are no incomplete ancestors to ascribe the missing field
541  // to, we must have an incomplete model of a class or simply a
542  // version mismatch of some kind. Normally this would be an error, but
543  // our models library currently triggers this error in some cases
544  // (notably java.lang.System, which is missing System.in/out/err).
545  // Therefore for this case we ascribe the missing field to the class
546  // it was directly referenced from, and fall back to initialising the
547  // field in __CPROVER_initialize, rather than try to create or augment
548  // a clinit method for a non-stub class.
549 
550  bool no_incomplete_ancestors = add_to_class_id.empty();
551  if(no_incomplete_ancestors)
552  {
553  add_to_class_id = class_id;
554 
555  // TODO forbid this again once the models library has been checked
556  // for missing static fields.
557  log.warning() << "Stub static field " << component << " found for "
558  << "non-stub type " << class_id << ". In future this "
559  << "will be a fatal error." << messaget::eom;
560  }
561 
562  irep_idt identifier =
563  id2string(add_to_class_id) + "." + id2string(component);
564 
566  symbol_table,
567  identifier,
568  component,
569  instruction.args[0].type(),
570  add_to_class_id,
571  no_incomplete_ancestors);
572  }
573  }
574  }
575  }
576 }
577 
579  symbol_tablet &symbol_table,
580  const std::string &module)
581 {
583 
584  java_internal_additions(symbol_table);
585 
588 
589  // Must load java.lang.Object first to avoid stubbing
590  // This ordering could alternatively be enforced by
591  // moving the code below to the class loader.
592  java_class_loadert::parse_tree_with_overridest_mapt::const_iterator it =
593  java_class_loader.get_class_with_overlays_map().find("java.lang.Object");
595  {
596  if(
598  it->second,
599  symbol_table,
605  {
606  return true;
607  }
608  }
609 
610  // first generate a new struct symbol for each class and a new function symbol
611  // for every method
612  for(const auto &class_trees : java_class_loader.get_class_with_overlays_map())
613  {
614  if(class_trees.second.front().parsed_class.name.empty())
615  continue;
616 
617  if(
619  class_trees.second,
620  symbol_table,
626  {
627  return true;
628  }
629  }
630 
631  // Now that all classes have been created in the symbol table we can populate
632  // the class hierarchy:
633  class_hierarchy(symbol_table);
634 
635  // find and mark all implicitly generic class types
636  // this can only be done once all the class symbols have been created
637  for(const auto &c : java_class_loader.get_class_with_overlays_map())
638  {
639  if(c.second.front().parsed_class.name.empty())
640  continue;
641  try
642  {
644  c.second.front().parsed_class.name, symbol_table);
645  }
647  {
649  << "Not marking class " << c.first
650  << " implicitly generic due to missing outer class symbols"
651  << messaget::eom;
652  }
653  }
654 
655  // Infer fields on opaque types based on the method instructions just loaded.
656  // For example, if we don't have bytecode for field x of class A, but we can
657  // see an int-typed getfield instruction referring to it, add that field now.
658  for(auto &class_to_trees : java_class_loader.get_class_with_overlays_map())
659  {
660  for(const java_bytecode_parse_treet &parse_tree : class_to_trees.second)
661  infer_opaque_type_fields(parse_tree, symbol_table);
662  }
663 
664  // Create global variables for constants (String and Class literals) up front.
665  // This means that when running with lazy loading, we will be aware of these
666  // literal globals' existence when __CPROVER_initialize is generated in
667  // `generate_support_functions`.
668  const std::size_t before_constant_globals_size = symbol_table.symbols.size();
669  for(auto &class_to_trees : java_class_loader.get_class_with_overlays_map())
670  {
671  for(java_bytecode_parse_treet &parse_tree : class_to_trees.second)
672  {
674  parse_tree, symbol_table, string_refinement_enabled);
675  }
676  }
677  status() << "Java: added "
678  << (symbol_table.symbols.size() - before_constant_globals_size)
679  << " String or Class constant symbols"
680  << messaget::eom;
681 
682  // For each reference to a stub global (that is, a global variable declared on
683  // a class we don't have bytecode for, and therefore don't know the static
684  // initialiser for), create a synthetic static initialiser (clinit method)
685  // to nondet initialise it.
686  // Note this must be done before making static initialiser wrappers below, as
687  // this makes a Classname.clinit method, then the next pass makes a wrapper
688  // that ensures it is only run once, and that static initialisation happens
689  // in class-graph topological order.
690 
691  {
692  journalling_symbol_tablet symbol_table_journal =
693  journalling_symbol_tablet::wrap(symbol_table);
694  for(auto &class_to_trees : java_class_loader.get_class_with_overlays_map())
695  {
696  for(const java_bytecode_parse_treet &parse_tree : class_to_trees.second)
697  {
699  parse_tree, symbol_table_journal, class_hierarchy, *this);
700  }
701  }
702 
704  symbol_table, symbol_table_journal.get_inserted(), synthetic_methods);
705  }
706 
707  // For each class that will require a static initializer wrapper, create a
708  // function named package.classname::clinit_wrapper, and a corresponding
709  // global tracking whether it has run or not:
711  symbol_table, synthetic_methods, threading_support);
712 
713  // Now incrementally elaborate methods
714  // that are reachable from this entry point.
716  {
717  // ci: context-insensitive.
719  return true;
720  }
722  {
723  journalling_symbol_tablet journalling_symbol_table =
724  journalling_symbol_tablet::wrap(symbol_table);
725 
726  // Convert all synthetic methods:
727  for(const auto &function_id_and_type : synthetic_methods)
728  {
730  function_id_and_type.first, journalling_symbol_table);
731  }
732  // Convert all methods for which we have bytecode now
733  for(const auto &method_sig : method_bytecode)
734  {
735  convert_single_method(method_sig.first, journalling_symbol_table);
736  }
737  // Now convert all newly added string methods
738  for(const auto &fn_name : journalling_symbol_table.get_inserted())
739  {
741  convert_single_method(fn_name, symbol_table);
742  }
743  }
744  // Otherwise our caller is in charge of elaborating methods on demand.
745 
746  // now instrument runtime exceptions
748  symbol_table,
751 
752  // now typecheck all
753  bool res = java_bytecode_typecheck(
755 
756  // now instrument thread-blocks
758  convert_threadblock(symbol_table);
759 
760  return res;
761 }
762 
764  symbol_tablet &symbol_table)
765 {
767 
770  if(!res.is_success())
771  return res.is_error();
772 
773  // Load the main function into the symbol table to get access to its
774  // parameter names
775  convert_lazy_method(res.main_function.name, symbol_table);
776 
777  // generate the test harness in __CPROVER__start and a call the entry point
778  return java_entry_point(
779  symbol_table,
780  main_class,
787 }
788 
803  symbol_tablet &symbol_table,
804  method_bytecodet &method_bytecode)
805 {
806  const method_convertert method_converter =
807  [this, &symbol_table]
808  (const irep_idt &function_id, ci_lazy_methods_neededt lazy_methods_needed)
809  {
810  return convert_single_method(
811  function_id, symbol_table, std::move(lazy_methods_needed));
812  };
813 
814  ci_lazy_methodst method_gather(
815  symbol_table,
816  main_class,
824 
825  return method_gather(symbol_table, method_bytecode, method_converter);
826 }
827 
828 const select_pointer_typet &
830 {
831  PRECONDITION(pointer_type_selector.get()!=nullptr);
832  return *pointer_type_selector;
833 }
834 
841  std::unordered_set<irep_idt> &methods) const
842 {
843  static std::string cprover_class_prefix = "java::org.cprover.CProver.";
844 
845  // Add all string solver methods to map
847  // Add all concrete methods to map
848  for(const auto &kv : method_bytecode)
849  {
850  const std::string &method_id = id2string(kv.first);
851 
852  // Avoid advertising org.cprover.CProver methods that the Java frontend will
853  // never provide bodies for (java_bytecode_convert_method always leaves them
854  // bodyless with intent for the driver program to stub them):
855  if(has_prefix(method_id, cprover_class_prefix))
856  {
857  std::size_t method_name_end_offset =
858  method_id.find(':', cprover_class_prefix.length());
859  INVARIANT(
860  method_name_end_offset != std::string::npos,
861  "org.cprover.CProver method should have a postfix type descriptor");
862 
863  const std::string method_name =
864  method_id.substr(
865  cprover_class_prefix.length(),
866  method_name_end_offset - cprover_class_prefix.length());
867 
868  if(cprover_methods_to_ignore.count(method_name))
869  continue;
870  }
871  methods.insert(kv.first);
872  }
873  // Add all synthetic methods to map
874  for(const auto &kv : synthetic_methods)
875  methods.insert(kv.first);
876 }
877 
887  const irep_idt &function_id,
888  symbol_table_baset &symtab)
889 {
890  const symbolt &symbol = symtab.lookup_ref(function_id);
891  if(symbol.value.is_not_nil())
892  return;
893 
894  journalling_symbol_tablet symbol_table=
895  journalling_symbol_tablet::wrap(symtab);
896 
897  convert_single_method(function_id, symbol_table);
898 
899  // Instrument runtime exceptions (unless symbol is a stub)
900  if(symbol.value.is_not_nil())
901  {
903  symbol_table,
904  symbol_table.get_writeable_ref(function_id),
907  }
908 
909  // now typecheck this function
912 }
913 
921  const exprt &function_body,
922  optionalt<ci_lazy_methods_neededt> needed_lazy_methods)
923 {
924  if(needed_lazy_methods)
925  {
926  for(const_depth_iteratort it = function_body.depth_cbegin();
927  it != function_body.depth_cend();
928  ++it)
929  {
930  if(it->id() == ID_code)
931  {
932  const auto fn_call = expr_try_dynamic_cast<code_function_callt>(*it);
933  if(!fn_call)
934  continue;
935  // Only support non-virtual function calls for now, if string solver
936  // starts to introduce virtual function calls then we will need to
937  // duplicate the behavior of java_bytecode_convert_method where it
938  // handles the invokevirtual instruction
939  const symbol_exprt &fn_sym =
940  expr_dynamic_cast<symbol_exprt>(fn_call->function());
941  needed_lazy_methods->add_needed_method(fn_sym.get_identifier());
942  }
943  }
944  }
945 }
946 
957  const irep_idt &function_id,
958  symbol_table_baset &symbol_table,
959  optionalt<ci_lazy_methods_neededt> needed_lazy_methods)
960 {
961  const symbolt &symbol = symbol_table.lookup_ref(function_id);
962 
963  // Nothing to do if body is already loaded
964  if(symbol.value.is_not_nil())
965  return false;
966 
967  // Get bytecode for specified function if we have it
969 
970  synthetic_methods_mapt::iterator synthetic_method_it;
971 
972  // Check if have a string solver implementation
973  if(string_preprocess.implements_function(function_id))
974  {
975  symbolt &symbol = symbol_table.get_writeable_ref(function_id);
976  // Load parameter names from any extant bytecode before filling in body
977  if(cmb)
978  {
980  symbol, cmb->get().method.local_variable_table, symbol_table);
981  }
982  // Populate body of the function with code generated by string preprocess
983  exprt generated_code =
984  string_preprocess.code_for_function(symbol, symbol_table);
985  INVARIANT(
986  generated_code.is_not_nil(), "Couldn't retrieve code for string method");
987  // String solver can make calls to functions that haven't yet been seen.
988  // Add these to the needed_lazy_methods collection
989  notify_static_method_calls(generated_code, needed_lazy_methods);
990  symbol.value = generated_code;
991  return false;
992  }
993  else if(
994  (synthetic_method_it = synthetic_methods.find(function_id)) !=
995  synthetic_methods.end())
996  {
997  // Synthetic method (i.e. one generated by the Java frontend and which
998  // doesn't occur in the source bytecode):
999  symbolt &symbol = symbol_table.get_writeable_ref(function_id);
1000  switch(synthetic_method_it->second)
1001  {
1003  if(threading_support)
1005  function_id, symbol_table);
1006  else
1007  symbol.value = get_clinit_wrapper_body(function_id, symbol_table);
1008  break;
1010  symbol.value =
1012  function_id,
1013  symbol_table,
1016  break;
1017  }
1018  // Notify lazy methods of static calls made from the newly generated
1019  // function:
1020  notify_static_method_calls(symbol.value, needed_lazy_methods);
1021  return false;
1022  }
1023 
1024  // No string solver or static init wrapper implementation;
1025  // check if have bytecode for it
1026  if(cmb)
1027  {
1029  symbol_table.lookup_ref(cmb->get().class_id),
1030  cmb->get().method,
1031  symbol_table,
1035  std::move(needed_lazy_methods),
1037  class_hierarchy);
1038  return false;
1039  }
1040 
1041  // The return of an opaque function is a source of an otherwise invisible
1042  // instantiation, so here we ensure we've loaded the appropriate classes.
1043  const code_typet function_type = to_code_type(symbol.type);
1044  if(
1045  const pointer_typet *pointer_return_type =
1046  type_try_dynamic_cast<pointer_typet>(function_type.return_type()))
1047  {
1048  // If the return type is abstract, we won't forcibly instantiate it here
1049  // otherwise this can cause abstract methods to be explictly called
1050  // TODO(tkiley): Arguably no abstract class should ever be added to
1051  // TODO(tkiley): ci_lazy_methods_neededt, but this needs further
1052  // TODO(tkiley): investigation
1053  namespacet ns{symbol_table};
1054  const java_class_typet &underlying_type =
1055  to_java_class_type(ns.follow(pointer_return_type->subtype()));
1056 
1057  if(!underlying_type.is_abstract())
1058  needed_lazy_methods->add_all_needed_classes(*pointer_return_type);
1059  }
1060 
1061  return true;
1062 }
1063 
1065 {
1067  return false;
1068 }
1069 
1071 {
1074  parse_trees.front().output(out);
1075  if(parse_trees.size() > 1)
1076  {
1077  out << "\n\nClass has the following overlays:\n\n";
1078  for(auto parse_tree_it = std::next(parse_trees.begin());
1079  parse_tree_it != parse_trees.end();
1080  ++parse_tree_it)
1081  {
1082  parse_tree_it->output(out);
1083  }
1084  out << "End of class overlays.\n";
1085  }
1086 }
1087 
1088 std::unique_ptr<languaget> new_java_bytecode_language()
1089 {
1090  return util_make_unique<java_bytecode_languaget>();
1091 }
1092 
1094  const exprt &expr,
1095  std::string &code,
1096  const namespacet &ns)
1097 {
1098  code=expr2java(expr, ns);
1099  return false;
1100 }
1101 
1103  const typet &type,
1104  std::string &code,
1105  const namespacet &ns)
1106 {
1107  code=type2java(type, ns);
1108  return false;
1109 }
1110 
1112  const std::string &code,
1113  const std::string &module,
1114  exprt &expr,
1115  const namespacet &ns)
1116 {
1117  #if 0
1118  expr.make_nil();
1119 
1120  // no preprocessing yet...
1121 
1122  std::istringstream i_preprocessed(code);
1123 
1124  // parsing
1125 
1126  java_bytecode_parser.clear();
1127  java_bytecode_parser.filename="";
1128  java_bytecode_parser.in=&i_preprocessed;
1129  java_bytecode_parser.set_message_handler(message_handler);
1130  java_bytecode_parser.grammar=java_bytecode_parsert::EXPRESSION;
1131  java_bytecode_parser.mode=java_bytecode_parsert::GCC;
1132  java_bytecode_scanner_init();
1133 
1134  bool result=java_bytecode_parser.parse();
1135 
1136  if(java_bytecode_parser.parse_tree.items.empty())
1137  result=true;
1138  else
1139  {
1140  expr=java_bytecode_parser.parse_tree.items.front().value();
1141 
1142  result=java_bytecode_convert(expr, "", message_handler);
1143 
1144  // typecheck it
1145  if(!result)
1147  }
1148 
1149  // save some memory
1150  java_bytecode_parser.clear();
1151 
1152  return result;
1153  #endif
1154 
1155  return true; // fail for now
1156 }
1157 
1159 {
1160 }
std::vector< irep_idt > java_load_classes
The type of an expression.
Definition: type.h:22
irep_idt name
The unique identifier.
Definition: symbol.h:43
const std::list< std::string > & get_values(const std::string &option) const
Definition: cmdline.cpp:110
std::string type2java(const typet &type, const namespacet &ns)
Definition: expr2java.cpp:462
void create_static_initializer_wrappers(symbol_tablet &symbol_table, synthetic_methods_mapt &synthetic_methods, const bool thread_safe)
Create static initializer wrappers for all classes that need them.
static exprt get_ldc_result(const exprt &ldc_arg0, symbol_tablet &symbol_table, bool string_refinement_enabled)
Get result of a Java load-constant (ldc) instruction.
optionalt< std::reference_wrapper< const class_method_and_bytecodet > > opt_reft
void java_bytecode_typecheck_updated_symbols(journalling_symbol_tablet &symbol_table, message_handlert &message_handler, bool string_refinement_enabled)
A static initializer wrapper (code of the form if(!already_run) clinit(); already_run = true;) These ...
Base type of functions.
Definition: std_types.h:764
bool is_object() const
Definition: json.h:49
const std::string & id2string(const irep_idt &d)
Definition: irep.h:43
bool is_not_nil() const
Definition: irep.h:103
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
const jar_indext & get_jar_index(const std::string &jar_path)
static symbol_exprt get_or_create_class_literal_symbol(const irep_idt &class_id, symbol_tablet &symbol_table)
Create if necessary, then return the constant global java.lang.Class symbol for a given class id...
const_depth_iteratort depth_cbegin() const
Definition: expr.cpp:307
static void generate_constant_global_variables(java_bytecode_parse_treet &parse_tree, symbol_tablet &symbol_table, bool string_refinement_enabled)
Creates global variables for constants mentioned in a given method.
java_class_loadert java_class_loader
std::function< bool(const irep_idt &function_id, ci_lazy_methods_neededt)> method_convertert
irep_idt mode
Language mode.
Definition: symbol.h:52
const code_typet & to_code_type(const typet &type)
Cast a generic typet to a code_typet.
Definition: std_types.h:987
main_function_resultt get_main_symbol(const symbol_table_baset &symbol_table, const irep_idt &main_class, message_handlert &message_handler)
Figures out the entry point of the code to verify.
void mark_java_implicitly_generic_class_type(const irep_idt &class_name, symbol_tablet &symbol_table)
Checks if the class is implicitly generic, i.e., it is an inner class of any generic class...
void convert_threadblock(symbol_tablet &symbol_table)
Iterate through the symbol table to find and appropriately instrument thread-blocks.
void modules_provided(std::set< std::string > &modules) override
const irep_idt & get_identifier() const
Definition: std_expr.h:128
Definition: json.h:23
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
bool java_bytecode_typecheck(symbol_table_baset &symbol_table, message_handlert &message_handler, bool string_refinement_enabled)
const symbol_typet & to_symbol_type(const typet &type)
Cast a generic typet to a symbol_typet.
Definition: std_types.h:139
The null pointer constant.
Definition: std_expr.h:4520
exprt value
Initial value of symbol.
Definition: symbol.h:37
const componentst & components() const
Definition: std_types.h:245
std::string get_value(char option) const
Definition: cmdline.cpp:45
jar_filet & jar_pool(java_class_loader_limitt &limit, const std::string &filename)
Load jar archive or retrieve from cache if already loaded.
codet get_thread_safe_clinit_wrapper_body(const irep_idt &function_id, symbol_table_baset &symbol_table)
Thread safe version of the static initialiser.
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:55
const select_pointer_typet & get_pointer_type_selector() const
typet & type()
Definition: expr.h:56
std::vector< irep_idt > lazy_methods_extra_entry_points
const class_typet & to_class_type(const typet &type)
Cast a generic typet to a class_typet.
Definition: std_types.h:435
Symbol table entry.This is a symbol in the symbol table, stored in an object of type symbol_tablet...
Definition: symbol.h:30
java_string_library_preprocesst string_preprocess
void show_parse(std::ostream &out) override
auto expr_dynamic_cast(TExpr &base) -> typename detail::expr_dynamic_cast_return_typet< T, TExpr >::type
Cast a reference to a generic exprt to a specific derived class.
Definition: expr_cast.h:165
void initialize_conversion_table()
fill maps with correspondence from java method names to conversion functions
static mstreamt & eom(mstreamt &m)
Definition: message.h:272
bool get_bool(const irep_namet &name) const
Definition: irep.cpp:240
configt config
Definition: config.cpp:23
codet get_clinit_wrapper_body(const irep_idt &function_id, symbol_table_baset &symbol_table)
Produces the static initialiser wrapper body for the given function.
static irep_idt get_any_incomplete_ancestor_for_stub_static_field(const irep_idt &start_class_id, const symbol_tablet &symbol_table, const class_hierarchyt &class_hierarchy)
Find any incomplete ancestor of a given class that can have a stub static field attached to it...
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Definition: json_parser.cpp:16
virtual void convert_lazy_method(const irep_idt &function_id, symbol_table_baset &symbol_table) override
Promote a lazy-converted method (one whose type is known but whose body hasn&#39;t been converted) into a...
bool is_static_lifetime
Definition: symbol.h:67
std::list< java_bytecode_parse_treet > parse_tree_with_overlayst
#define INVARIANT(CONDITION, REASON)
Definition: invariant.h:205
void load_entire_jar(java_class_loader_limitt &, const std::string &jar_path)
object_factory_parameterst object_factory_parameters
mstreamt & warning() const
Definition: message.h:307
bool from_expr(const exprt &expr, std::string &code, const namespacet &ns) override
Formats the given expression in a language-specific way.
Class Hierarchy.
static void create_stub_global_symbols(const java_bytecode_parse_treet &parse_tree, symbol_table_baset &symbol_table, const class_hierarchyt &class_hierarchy, messaget &log)
Search for getstatic and putstatic instructions in a class&#39; bytecode and create stub symbols for any ...
JAVA Bytecode Language Type Checking.
const irep_idt & id() const
Definition: irep.h:189
bool java_bytecode_convert_class(const java_class_loadert::parse_tree_with_overlayst &parse_trees, symbol_tablet &symbol_table, message_handlert &message_handler, size_t max_array_length, method_bytecodet &method_bytecode, java_string_library_preprocesst &string_preprocess, const std::unordered_set< std::string > &no_load_classes)
See class java_bytecode_convert_classt.
static void create_stub_global_symbol(symbol_table_baset &symbol_table, const irep_idt &symbol_id, const irep_idt &symbol_basename, const typet &symbol_type, const irep_idt &class_id, bool force_nondet_init)
Add a stub global symbol to the symbol table, initialising pointer-typed symbols with null and primit...
classpatht classpath
Definition: config.h:147
std::set< std::string > extensions() const override
virtual bool isset(char option) const
Definition: cmdline.cpp:27
static std::string file_to_class_name(const std::string &)
A reference into the symbol table.
Definition: std_types.h:110
size_t max_nondet_tree_depth
Maximum depth for object hierarchy on input.
virtual void methods_provided(std::unordered_set< irep_idt > &methods) const override
Provide feedback to language_filest so that when asked for a lazy method, it can delegate to this ins...
void java_bytecode_convert_method(const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &method, symbol_table_baset &symbol_table, message_handlert &message_handler, size_t max_array_length, bool throw_assertion_error, optionalt< ci_lazy_methods_neededt > needed_lazy_methods, java_string_library_preprocesst &string_preprocess, const class_hierarchyt &class_hierarchy)
The pointer type.
Definition: std_types.h:1426
void get_all_function_names(std::unordered_set< irep_idt > &methods) const
static void notify_static_method_calls(const exprt &function_body, optionalt< ci_lazy_methods_neededt > needed_lazy_methods)
Notify ci_lazy_methods, if present, of any static function calls made by the given function body...
JAVA Bytecode Language Conversion.
bool string_printable
Force string content to be ASCII printable characters when set to true.
virtual void get_language_options(const cmdlinet &) override
Consume options that are java bytecode specific.
fixed_keys_map_wrappert< parse_tree_with_overridest_mapt > get_class_with_overlays_map()
Map from class names to the bytecode parse trees.
void add_jar_file(const std::string &f)
bool typecheck(symbol_tablet &context, const std::string &module) override
void set_java_cp_include_files(const std::string &java_cp_include_files)
nonstd::optional< T > optionalt
Definition: optional.h:35
virtual bool preprocess(std::istream &instream, const std::string &path, std::ostream &outstream) override
ANSI-C preprocessing.
The symbol table.
Definition: symbol_table.h:19
const irep_idt & get(const irep_namet &name) const
Definition: irep.cpp:213
Collect methods needed to be loaded using the lazy method.
TO_BE_DOCUMENTED.
Definition: namespace.h:74
codet get_stub_initializer_body(const irep_idt &function_id, symbol_table_baset &symbol_table, const object_factory_parameterst &object_factory_parameters, const select_pointer_typet &pointer_type_selector)
Create the body of a synthetic static initializer (clinit method), which initialise stub globals in t...
std::string id() const override
#define PRECONDITION(CONDITION)
Definition: invariant.h:230
std::unique_ptr< languaget > new_java_bytecode_language()
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
class_mapt class_map
A function call.
Definition: std_code.h:828
const std::unordered_set< std::string > cprover_methods_to_ignore
Methods belonging to the class org.cprover.CProver that should be ignored (not converted), leaving the driver program to stub them if it wishes.
Definition: java_utils.cpp:433
virtual void set_message_handler(message_handlert &_message_handler)
Definition: message.h:148
struct configt::javat java
const typet & follow(const typet &) const
Definition: namespace.cpp:55
synthetic_methods_mapt synthetic_methods
Maps synthetic method names on to the particular type of synthetic method (static initializer...
const struct_typet & to_struct_type(const typet &type)
Cast a generic typet to a struct_typet.
Definition: std_types.h:318
Author: Diffblue Ltd.
Operator to return the address of an object.
Definition: std_expr.h:3170
bool language_options_initialized
Definition: language.h:182
const symbolst & symbols
bool to_expr(const std::string &code, const std::string &module, exprt &expr, const namespacet &ns) override
Parses the given string into an expression.
bool from_type(const typet &type, std::string &code, const namespacet &ns) override
Formats the given type in a language-specific way.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool implements_function(const irep_idt &function_id) const
void add_load_classes(const std::vector< irep_idt > &classes)
Adds the list of classes to the load queue, forcing them to be loaded even without explicit reference...
bool has_component(const irep_idt &component_name) const
Definition: std_types.h:255
An exception that is raised checking whether a class is implicitly generic if a symbol for an outer c...
std::vector< irep_idt > idst
const std::unique_ptr< const select_pointer_typet > pointer_type_selector
void create_stub_global_initializer_symbols(symbol_tablet &symbol_table, const std::unordered_set< irep_idt > &stub_globals_set, synthetic_methods_mapt &synthetic_methods)
Create static initializer symbols for each distinct class that has stub globals.
std::unordered_set< std::string > no_load_classes
void java_bytecode_instrument_symbol(symbol_table_baset &symbol_table, symbolt &symbol, const bool throw_runtime_exceptions, message_handlert &message_handler)
Instruments the code attached to symbol with runtime exceptions or corresponding assertions.
typet type
Type of symbol.
Definition: symbol.h:34
message_handlert & get_message_handler()
Definition: message.h:153
bool do_ci_lazy_method_conversion(symbol_tablet &, method_bytecodet &)
Uses a simple context-insensitive (&#39;ci&#39;) analysis to determine which methods may be reachable from th...
bool is_string() const
Definition: json.h:39
size_t max_nondet_array_length
Maximum value for the non-deterministically-chosen length of an array.
bool is_array() const
Definition: json.h:54
std::string value
Definition: json.h:137
mstreamt & result() const
Definition: message.h:312
mstreamt & status() const
Definition: message.h:317
void set_extra_class_refs_function(get_extra_class_refs_functiont func)
Sets a function that provides extra dependencies for a particular class.
exprt code_for_function(const symbolt &symbol, symbol_table_baset &symbol_table)
Should be called to provide code for string functions that are used in the code but for which no impl...
Base class for all expressions.
Definition: expr.h:42
bool is_state_var
Definition: symbol.h:63
static void infer_opaque_type_fields(const java_bytecode_parse_treet &parse_tree, symbol_tablet &symbol_table)
Infer fields that must exist on opaque types from field accesses against them.
size_t max_nondet_string_length
Maximum value for the non-deterministically-chosen length of a string.
bool parse(std::istream &instream, const std::string &path) override
irep_idt base_name
Base (non-scoped) name.
Definition: symbol.h:49
The symbol table base class interface.
const std::vector< std::string > exception_needed_classes
bool is_abstract() const
Definition: std_types.h:419
#define JAVA_CLASS_MODEL_SUFFIX
#define UNREACHABLE
Definition: invariant.h:250
const_depth_iteratort depth_cend() const
Definition: expr.cpp:309
symbol_exprt get_or_create_string_literal_symbol(const exprt &string_expr, symbol_table_baset &symbol_table, bool string_refinement_enabled)
Creates or gets an existing constant global symbol for a given string literal.
const basest & bases() const
Definition: std_types.h:386
void make_nil()
Definition: irep.h:243
void java_internal_additions(symbol_table_baset &dest)
Expression to hold a symbol (variable)
Definition: std_expr.h:90
A generated (synthetic) static initializer function for a stub type.
bool has_suffix(const std::string &s, const std::string &suffix)
Definition: suffix.h:15
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
const pointer_typet & to_pointer_type(const typet &type)
Cast a generic typet to a pointer_typet.
Definition: std_types.h:1450
Author: Diffblue Ltd.
lazy_methods_modet lazy_methods_mode
irep_idt main_class
Definition: config.h:148
dstringt irep_idt
Definition: irep.h:31
bool generate_support_functions(symbol_tablet &symbol_table) override
Create language-specific support functions, such as __CPROVER_start, __CPROVER_initialize and languag...
opt_reft get(const irep_idt &method_id)
void java_bytecode_instrument(symbol_tablet &symbol_table, const bool throw_runtime_exceptions, message_handlert &message_handler)
Instruments all the code in the symbol_table with runtime exceptions or corresponding assertions...
bool is_type
Definition: symbol.h:63
JAVA Bytecode Language Conversion.
#define DATA_INVARIANT(CONDITION, REASON)
Definition: invariant.h:257
C++ class type.
Definition: std_types.h:341
arrayt array
Definition: json.h:129
void java_bytecode_initialize_parameter_names(symbolt &method_symbol, const java_bytecode_parse_treet::methodt::local_variable_tablet &local_variable_table, symbol_table_baset &symbol_table)
This uses a cut-down version of the logic in java_bytecode_convert_methodt::convert to initialize sym...
resolve_inherited_componentt::inherited_componentt get_inherited_component(const irep_idt &component_class_id, const irep_idt &component_name, const irep_idt &user_class_id, const symbol_tablet &symbol_table, const class_hierarchyt &class_hierarchy, bool include_interfaces)
Finds an inherited component (method or field), taking component visibility into account.
Definition: java_utils.cpp:345
stub_global_initializer_factoryt stub_global_initializer_factory
const symbolt * lookup(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
message_handlert * message_handler
Definition: message.h:342
bool empty() const
Definition: dstring.h:61
std::unordered_map< std::string, std::string > get_manifest()
Get contents of the Manifest file in the jar archive.
Definition: jar_file.cpp:99
const typet & return_type() const
Definition: std_types.h:895
const java_class_typet & to_java_class_type(const typet &type)
Definition: java_types.h:163
virtual bool final(symbol_table_baset &context) override
Final adjustments, e.g.
std::vector< irep_idt > main_jar_classes
std::vector< irep_idt > get_string_type_base_classes(const irep_idt &class_name)
Gets the base classes for known String and String-related types, or returns an empty list for other t...
void set(const irep_namet &name, const irep_idt &value)
Definition: irep.h:214
std::string expr2java(const exprt &expr, const namespacet &ns)
Definition: expr2java.cpp:455
bool java_entry_point(symbol_table_baset &symbol_table, const irep_idt &main_class, message_handlert &message_handler, bool assume_init_pointers_not_null, bool assert_uncaught_exceptions, const object_factory_parameterst &object_factory_parameters, const select_pointer_typet &pointer_type_selector, bool string_refinement_enabled)
Given the symbol_table and the main_class to test, this function generates a new function __CPROVER__...
void convert_single_method(const irep_idt &function_id, symbol_table_baset &symbol_table)
auto expr_try_dynamic_cast(TExpr &base) -> typename detail::expr_try_dynamic_cast_return_typet< T, TExpr >::type
Try to cast a reference to a generic exprt to a specific derived class.
Definition: expr_cast.h:91
bool is_lvalue
Definition: symbol.h:68