cprover
ci_lazy_methods.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3  Module: Java Bytecode
4 
5  Author: Diffblue Ltd.
6 
7 \*******************************************************************/
8 
9 #include "ci_lazy_methods.h"
10 #include "java_entry_point.h"
11 #include "java_class_loader.h"
12 #include "java_utils.h"
14 #include "remove_exceptions.h"
15 
16 #include <util/expr_iterator.h>
17 #include <util/suffix.h>
18 
20 
38  const symbol_tablet &symbol_table,
39  const irep_idt &main_class,
40  const std::vector<irep_idt> &main_jar_classes,
41  const std::vector<load_extra_methodst> &lazy_methods_extra_entry_points,
42  java_class_loadert &java_class_loader,
43  const std::vector<irep_idt> &extra_instantiated_classes,
44  const select_pointer_typet &pointer_type_selector,
45  message_handlert &message_handler,
46  const synthetic_methods_mapt &synthetic_methods)
47  : messaget(message_handler),
48  main_class(main_class),
49  main_jar_classes(main_jar_classes),
50  lazy_methods_extra_entry_points(lazy_methods_extra_entry_points),
51  java_class_loader(java_class_loader),
52  extra_instantiated_classes(extra_instantiated_classes),
53  pointer_type_selector(pointer_type_selector),
54  synthetic_methods(synthetic_methods)
55 {
56  // build the class hierarchy
57  class_hierarchy(symbol_table);
58 }
59 
66 static bool references_class_model(const exprt &expr)
67 {
68  static const struct_tag_typet class_type("java::java.lang.Class");
69 
70  for(auto it = expr.depth_begin(); it != expr.depth_end(); ++it)
71  {
73  it->type() == class_type &&
74  has_suffix(
77  {
78  return true;
79  }
80  }
81 
82  return false;
83 }
84 
101  symbol_tablet &symbol_table,
102  method_bytecodet &method_bytecode,
103  const method_convertert &method_converter)
104 {
105  std::unordered_set<irep_idt> methods_to_convert_later =
106  entry_point_methods(symbol_table);
107 
108  // Add any extra entry points specified; we should elaborate these in the
109  // same way as the main function.
110  for(const auto &extra_function_generator : lazy_methods_extra_entry_points)
111  {
112  std::vector<irep_idt> extra_methods =
113  extra_function_generator(symbol_table);
114  methods_to_convert_later.insert(extra_methods.begin(), extra_methods.end());
115  }
116 
117  std::unordered_set<irep_idt> instantiated_classes;
118 
119  {
120  std::unordered_set<irep_idt> initial_callable_methods;
121  ci_lazy_methods_neededt initial_lazy_methods(
122  initial_callable_methods,
123  instantiated_classes,
124  symbol_table,
127  methods_to_convert_later, namespacet(symbol_table), initial_lazy_methods);
128  methods_to_convert_later.insert(
129  initial_callable_methods.begin(), initial_callable_methods.end());
130  }
131 
132  std::unordered_set<irep_idt> methods_already_populated;
133  std::unordered_set<exprt, irep_hash> virtual_function_calls;
134  bool class_initializer_seen = false;
135 
136  bool any_new_classes = true;
137  while(any_new_classes)
138  {
139  bool any_new_methods = true;
140  while(any_new_methods)
141  {
142  any_new_methods = false;
143  while(!methods_to_convert_later.empty())
144  {
145  std::unordered_set<irep_idt> methods_to_convert;
146  std::swap(methods_to_convert, methods_to_convert_later);
147  for(const auto &mname : methods_to_convert)
148  {
149  const auto conversion_result = convert_and_analyze_method(
150  method_converter,
151  methods_already_populated,
152  class_initializer_seen,
153  mname,
154  symbol_table,
155  methods_to_convert_later,
156  instantiated_classes,
157  virtual_function_calls);
158  any_new_methods |= conversion_result.new_method_seen;
159  class_initializer_seen |= conversion_result.class_initializer_seen;
160  }
161  }
162 
163  // Given the object types we now know may be created, populate more
164  // possible virtual function call targets:
165 
166  debug() << "CI lazy methods: add virtual method targets ("
167  << virtual_function_calls.size() << " callsites)" << eom;
168 
169  for(const exprt &function : virtual_function_calls)
170  {
172  function,
173  instantiated_classes,
174  methods_to_convert_later,
175  symbol_table);
176  }
177  }
178 
179  any_new_classes = handle_virtual_methods_with_no_callees(
180  methods_to_convert_later,
181  instantiated_classes,
182  virtual_function_calls,
183  symbol_table);
184  }
185 
186  // Remove symbols for methods that were declared but never used:
187  symbol_tablet keep_symbols;
188  // Manually keep @inflight_exception, as it is unused at this stage
189  // but will become used when the `remove_exceptions` pass is run:
190  keep_symbols.add(symbol_table.lookup_ref(INFLIGHT_EXCEPTION_VARIABLE_NAME));
191 
192  for(const auto &sym : symbol_table.symbols)
193  {
194  // Don't keep global variables (unless they're gathered below from a
195  // function that references them)
196  if(sym.second.is_static_lifetime)
197  continue;
198  if(sym.second.type.id()==ID_code)
199  {
200  // Don't keep functions that belong to this language that we haven't
201  // converted above
202  if(
203  (method_bytecode.contains_method(sym.first) ||
204  synthetic_methods.count(sym.first)) &&
205  !methods_already_populated.count(sym.first))
206  {
207  continue;
208  }
209  // If this is a function then add all the things used in it
210  gather_needed_globals(sym.second.value, symbol_table, keep_symbols);
211  }
212  keep_symbols.add(sym.second);
213  }
214 
215  debug() << "CI lazy methods: removed "
216  << symbol_table.symbols.size() - keep_symbols.symbols.size()
217  << " unreachable methods and globals" << eom;
218 
219  symbol_table.swap(keep_symbols);
220 
221  return false;
222 }
223 
232  std::unordered_set<irep_idt> &methods_to_convert_later,
233  std::unordered_set<irep_idt> &instantiated_classes,
234  const std::unordered_set<exprt, irep_hash> &virtual_function_calls,
235  symbol_tablet &symbol_table)
236 {
237  ci_lazy_methods_neededt lazy_methods_loader(
238  methods_to_convert_later,
239  instantiated_classes,
240  symbol_table,
242 
243  bool any_new_classes = false;
244  for(const exprt &virtual_function_call : virtual_function_calls)
245  {
246  std::unordered_set<irep_idt> candidate_target_methods;
248  virtual_function_call,
249  instantiated_classes,
250  candidate_target_methods,
251  symbol_table);
252 
253  if(!candidate_target_methods.empty())
254  continue;
255 
256  // Add the call class to instantiated_classes and assert that it
257  // didn't already exist
258  const irep_idt &call_class = virtual_function_call.get(ID_C_class);
259  bool added_class = instantiated_classes.count(call_class) == 0;
260  CHECK_RETURN(added_class);
261 
262  lazy_methods_loader.add_all_needed_classes(
264  to_java_method_type(virtual_function_call.type()).get_this()->type()));
265  any_new_classes = true;
266 
267  // Check that `get_virtual_method_target` returns a method now
268  const irep_idt &call_basename =
269  virtual_function_call.get(ID_component_name);
270  const irep_idt method_name = get_virtual_method_target(
271  instantiated_classes, call_basename, call_class, symbol_table);
272  CHECK_RETURN(!method_name.empty());
273 
274  // Add what it returns to methods_to_convert_later
275  methods_to_convert_later.insert(method_name);
276  }
277  return any_new_classes;
278 }
279 
291  const method_convertert &method_converter,
292  std::unordered_set<irep_idt> &methods_already_populated,
293  const bool class_initializer_already_seen,
294  const irep_idt &method_name,
295  symbol_tablet &symbol_table,
296  std::unordered_set<irep_idt> &methods_to_convert_later,
297  std::unordered_set<irep_idt> &instantiated_classes,
298  std::unordered_set<exprt, irep_hash> &virtual_function_calls)
299 {
301  if(!methods_already_populated.insert(method_name).second)
302  return result;
303 
304  debug() << "CI lazy methods: elaborate " << method_name << eom;
305 
306  // Note this wraps *references* to methods_to_convert_later &
307  // instantiated_classes
308  ci_lazy_methods_neededt needed_methods(
309  methods_to_convert_later,
310  instantiated_classes,
311  symbol_table,
313 
314  if(method_converter(method_name, needed_methods))
315  return result;
316 
317  const exprt &method_body = symbol_table.lookup_ref(method_name).value;
318  gather_virtual_callsites(method_body, virtual_function_calls);
319 
320  if(!class_initializer_already_seen && references_class_model(method_body))
321  {
322  result.class_initializer_seen = true;
323  const irep_idt initializer_signature =
325  if(symbol_table.has_symbol(initializer_signature))
326  methods_to_convert_later.insert(initializer_signature);
327  }
328  result.new_method_seen = true;
329  return result;
330 }
331 
337 std::unordered_set<irep_idt>
339 {
340  std::unordered_set<irep_idt> methods_to_convert_later;
341 
342  const main_function_resultt main_function = get_main_symbol(
343  symbol_table, this->main_class, this->get_message_handler());
344  if(!main_function.is_success())
345  {
346  // Failed, mark all functions in the given main class(es)
347  // reachable.
348  std::vector<irep_idt> reachable_classes;
349  if(!this->main_class.empty())
350  reachable_classes.push_back(this->main_class);
351  else
352  reachable_classes = this->main_jar_classes;
353  for(const irep_idt &class_name : reachable_classes)
354  {
355  const auto &methods =
356  this->java_class_loader.get_original_class(class_name)
358  for(const auto &method : methods)
359  {
360  const irep_idt methodid = "java::" + id2string(class_name) + "." +
361  id2string(method.name) + ":" +
362  id2string(method.descriptor);
363  methods_to_convert_later.insert(methodid);
364  }
365  }
366  }
367  else
368  methods_to_convert_later.insert(main_function.main_function.name);
369  return methods_to_convert_later;
370 }
371 
381  const std::unordered_set<irep_idt> &entry_points,
382  const namespacet &ns,
383  ci_lazy_methods_neededt &needed_lazy_methods)
384 {
385  for(const auto &mname : entry_points)
386  {
387  const auto &symbol=ns.lookup(mname);
388  const auto &mtype = to_java_method_type(symbol.type);
389  for(const auto &param : mtype.parameters())
390  {
391  if(param.type().id()==ID_pointer)
392  {
393  const pointer_typet &original_pointer = to_pointer_type(param.type());
394  needed_lazy_methods.add_all_needed_classes(original_pointer);
395  }
396  }
397  }
398 
399  // Also add classes whose instances are magically
400  // created by the JVM and so won't be spotted by
401  // looking for constructors and calls as usual:
402  needed_lazy_methods.add_needed_class("java::java.lang.String");
403  needed_lazy_methods.add_needed_class("java::java.lang.Class");
404  needed_lazy_methods.add_needed_class("java::java.lang.Object");
405 
406  // As in class_loader, ensure these classes stay available
407  for(const auto &id : extra_instantiated_classes)
408  needed_lazy_methods.add_needed_class("java::" + id2string(id));
409 }
410 
416  const exprt &e,
417  std::unordered_set<exprt, irep_hash> &result)
418 {
419  if(e.id()!=ID_code)
420  return;
421  const codet &c=to_code(e);
422  if(c.get_statement()==ID_function_call &&
423  to_code_function_call(c).function().id()==ID_virtual_function)
424  {
425  result.insert(to_code_function_call(c).function());
426  }
427  else
428  {
429  for(const exprt &op : e.operands())
431  }
432 }
433 
445  const exprt &called_function,
446  const std::unordered_set<irep_idt> &instantiated_classes,
447  std::unordered_set<irep_idt> &callable_methods,
448  symbol_tablet &symbol_table)
449 {
450  PRECONDITION(called_function.id()==ID_virtual_function);
451 
452  const auto &call_class=called_function.get(ID_C_class);
453  INVARIANT(
454  !call_class.empty(), "All virtual calls should be aimed at a class");
455  const auto &call_basename=called_function.get(ID_component_name);
456  INVARIANT(
457  !call_basename.empty(),
458  "Virtual function must have a reasonable name after removing class");
459 
460  class_hierarchyt::idst self_and_child_classes =
462  self_and_child_classes.push_back(call_class);
463 
464  for(const irep_idt &class_name : self_and_child_classes)
465  {
466  const irep_idt method_name = get_virtual_method_target(
467  instantiated_classes, call_basename, class_name, symbol_table);
468  if(!method_name.empty())
469  callable_methods.insert(method_name);
470  }
471 }
472 
479  const exprt &e,
480  const symbol_tablet &symbol_table,
481  symbol_tablet &needed)
482 {
483  if(e.id()==ID_symbol)
484  {
485  // If the symbol isn't in the symbol table at all, then it is defined
486  // on an opaque type (i.e. we don't have the class definition at this point)
487  // and will be created during the typecheck phase.
488  // We don't mark it as 'needed' as it doesn't exist yet to keep.
489  const auto findit=
490  symbol_table.symbols.find(to_symbol_expr(e).get_identifier());
491  if(findit!=symbol_table.symbols.end() &&
492  findit->second.is_static_lifetime)
493  {
494  needed.add(findit->second);
495  // Gather any globals referenced in the initialiser:
496  gather_needed_globals(findit->second.value, symbol_table, needed);
497  }
498  }
499  else
500  forall_operands(opit, e)
501  gather_needed_globals(*opit, symbol_table, needed);
502 }
503 
517  const std::unordered_set<irep_idt> &instantiated_classes,
518  const irep_idt &call_basename,
519  const irep_idt &classname,
520  const symbol_tablet &symbol_table)
521 {
522  // Program-wide, is this class ever instantiated?
523  if(!instantiated_classes.count(classname))
524  return irep_idt();
525 
526  resolve_inherited_componentt call_resolver(symbol_table, class_hierarchy);
528  call_resolver(classname, call_basename, false);
529 
530  if(resolved_call.is_valid())
531  return resolved_call.get_full_component_identifier();
532  else
533  return irep_idt();
534 }
const irep_idt & get_statement() const
Definition: std_code.h:56
irep_idt name
The unique identifier.
Definition: symbol.h:40
convert_method_resultt convert_and_analyze_method(const method_convertert &method_converter, std::unordered_set< irep_idt > &methods_already_populated, const bool class_initializer_already_seen, const irep_idt &method_name, symbol_tablet &symbol_table, std::unordered_set< irep_idt > &methods_to_convert_later, std::unordered_set< irep_idt > &instantiated_classes, std::unordered_set< exprt, irep_hash > &virtual_function_calls)
Convert a method, add it to the populated set, add needed methods to methods_to_convert_later and add...
bool handle_virtual_methods_with_no_callees(std::unordered_set< irep_idt > &methods_to_convert_later, std::unordered_set< irep_idt > &instantiated_classes, const std::unordered_set< exprt, irep_hash > &virtual_function_calls, symbol_tablet &symbol_table)
Look for virtual callsites with no candidate targets.
const select_pointer_typet & pointer_type_selector
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
#define INFLIGHT_EXCEPTION_VARIABLE_NAME
Remove function exceptional returns.
std::function< bool(const irep_idt &function_id, ci_lazy_methods_neededt)> method_convertert
const std::vector< irep_idt > & extra_instantiated_classes
irep_idt get_virtual_method_target(const std::unordered_set< irep_idt > &instantiated_classes, const irep_idt &call_basename, const irep_idt &classname, const symbol_tablet &symbol_table)
Find a virtual callee, if one is defined and the callee type is known to exist.
main_function_resultt get_main_symbol(const symbol_table_baset &symbol_table, const irep_idt &main_class, message_handlert &message_handler)
Figures out the entry point of the code to verify.
const irep_idt & get_identifier() const
Definition: std_expr.h:176
bool is_valid() const
Use to check if this inherited_componentt has been fully constructed.
void gather_virtual_callsites(const exprt &e, std::unordered_set< exprt, irep_hash > &result)
Get places where virtual functions are called.
exprt value
Initial value of symbol.
Definition: symbol.h:34
A struct tag type, i.e., struct_typet with an identifier.
Definition: std_types.h:517
depth_iteratort depth_begin()
Definition: expr.cpp:282
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:470
Forward depth-first search iterators These iterators&#39; copy operations are expensive, so use auto&, and avoid std::next(), std::prev() and post-increment iterator.
void initialize_instantiated_classes(const std::unordered_set< irep_idt > &entry_points, const namespacet &ns, ci_lazy_methods_neededt &needed_lazy_methods)
Build up a list of methods whose type may be passed around reachable from the entry point...
const irep_idt & id() const
Definition: irep.h:259
The pointer type These are both &#39;bitvector_typet&#39; (they have a width) and &#39;type_with_subtypet&#39; (they ...
Definition: std_types.h:1507
void gather_needed_globals(const exprt &e, const symbol_tablet &symbol_table, symbol_tablet &needed)
See output.
irep_idt get_java_class_literal_initializer_signature()
Get the symbol name of java.lang.Class&#39; initializer method.
java_class_loadert & java_class_loader
The symbol table.
Definition: symbol_table.h:19
const irep_idt & get(const irep_namet &name) const
Definition: irep.cpp:212
Collect methods needed to be loaded using the lazy method.
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:93
#define PRECONDITION(CONDITION)
Definition: invariant.h:438
Given a class and a component (either field or method), find the closest parent that defines that com...
#define forall_operands(it, expr)
Definition: expr.h:20
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:251
Class that provides messages with a built-in verbosity &#39;level&#39;.
Definition: message.h:144
const synthetic_methods_mapt & synthetic_methods
dstringt has one field, an unsigned integer no which is an index into a static table of strings...
Definition: dstring.h:35
const symbolst & symbols
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
static bool references_class_model(const exprt &expr)
Checks if an expression refers to any class literals (e.g.
const std::vector< load_extra_methodst > & lazy_methods_extra_entry_points
std::vector< irep_idt > idst
static eomt eom
Definition: message.h:284
bool can_cast_expr< symbol_exprt >(const exprt &base)
Definition: std_expr.h:266
message_handlert & get_message_handler()
Definition: message.h:174
const java_method_typet & to_java_method_type(const typet &type)
Definition: java_types.h:338
bool contains_method(const irep_idt &method_id) const
mstreamt & result() const
Definition: message.h:396
exprt & function()
Definition: std_code.h:1099
bool add_needed_class(const irep_idt &)
Notes class class_symbol_name will be instantiated, or a static field belonging to it will be accesse...
Base class for all expressions.
Definition: expr.h:54
std::unordered_set< irep_idt > entry_point_methods(const symbol_tablet &symbol_table)
Entry point methods are either:
#define JAVA_CLASS_MODEL_SUFFIX
Class responsible to load .class files.
std::vector< irep_idt > main_jar_classes
const codet & to_code(const exprt &expr)
Definition: std_code.h:136
depth_iteratort depth_end()
Definition: expr.cpp:284
void add_all_needed_classes(const pointer_typet &pointer_type)
Add to the needed classes all classes specified, the replacement type if it will be replaced...
std::unordered_map< irep_idt, synthetic_method_typet > synthetic_methods_mapt
Maps method names on to a synthetic method kind.
bool has_suffix(const std::string &s, const std::string &suffix)
Definition: suffix.h:15
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: std_types.h:1544
dstringt irep_idt
Definition: irep.h:32
void swap(symbol_tablet &other)
Swap symbol maps between two symbol tables.
Definition: symbol_table.h:77
const java_bytecode_parse_treet & get_original_class(const irep_idt &class_name)
mstreamt & debug() const
Definition: message.h:416
Data structure for representing an arbitrary statement in a program.
Definition: std_code.h:34
void get_virtual_method_targets(const exprt &called_function, const std::unordered_set< irep_idt > &instantiated_classes, std::unordered_set< irep_idt > &callable_methods, symbol_tablet &symbol_table)
Find possible callees, excluding types that are not known to be instantiated.
idst get_children_trans(const irep_idt &id) const
operandst & operands()
Definition: expr.h:78
bool operator()(symbol_tablet &symbol_table, method_bytecodet &method_bytecode, const method_convertert &method_converter)
Uses a simple context-insensitive (&#39;ci&#39;) analysis to determine which methods may be reachable from th...
bool empty() const
Definition: dstring.h:75
irep_idt get_full_component_identifier() const
Get the full name of this function.
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:166
class_hierarchyt class_hierarchy
ci_lazy_methodst(const symbol_tablet &symbol_table, const irep_idt &main_class, const std::vector< irep_idt > &main_jar_classes, const std::vector< load_extra_methodst > &lazy_methods_extra_entry_points, java_class_loadert &java_class_loader, const std::vector< irep_idt > &extra_instantiated_classes, const select_pointer_typet &pointer_type_selector, message_handlert &message_handler, const synthetic_methods_mapt &synthetic_methods)
Constructor for lazy-method loading.
const code_function_callt & to_code_function_call(const codet &code)
Definition: std_code.h:1173
Produce code for simple implementation of String Java libraries.