cprover
ci_lazy_methods.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3  Module: Java Bytecode
4 
5  Author: Diffblue Ltd.
6 
7 \*******************************************************************/
8 
9 #include "ci_lazy_methods.h"
10 #include "java_entry_point.h"
11 #include "java_class_loader.h"
12 #include "java_utils.h"
14 #include "remove_exceptions.h"
15 
16 #include <util/expr_iterator.h>
17 #include <util/suffix.h>
18 
20 
34  const symbol_tablet &symbol_table,
35  const irep_idt &main_class,
36  const std::vector<irep_idt> &main_jar_classes,
37  const std::vector<irep_idt> &lazy_methods_extra_entry_points,
38  java_class_loadert &java_class_loader,
39  const std::vector<irep_idt> &extra_instantiated_classes,
40  const select_pointer_typet &pointer_type_selector,
42  const synthetic_methods_mapt &synthetic_methods)
44  main_class(main_class),
45  main_jar_classes(main_jar_classes),
46  lazy_methods_extra_entry_points(lazy_methods_extra_entry_points),
47  java_class_loader(java_class_loader),
48  extra_instantiated_classes(extra_instantiated_classes),
49  pointer_type_selector(pointer_type_selector),
50  synthetic_methods(synthetic_methods)
51 {
52  // build the class hierarchy
53  class_hierarchy(symbol_table);
54 }
55 
62 static bool references_class_model(const exprt &expr)
63 {
64  static const symbol_typet class_type("java::java.lang.Class");
65 
66  for(auto it = expr.depth_begin(); it != expr.depth_end(); ++it)
67  {
69  it->type() == class_type &&
70  has_suffix(
73  {
74  return true;
75  }
76  }
77 
78  return false;
79 }
80 
97  symbol_tablet &symbol_table,
98  method_bytecodet &method_bytecode,
99  const method_convertert &method_converter)
100 {
101  std::unordered_set<irep_idt> methods_to_convert_later =
102  entry_point_methods(symbol_table);
103 
104  // Add any extra entry points specified; we should elaborate these in the
105  // same way as the main function.
106  std::vector<irep_idt> extra_entry_points=lazy_methods_extra_entry_points;
107  resolve_method_names(extra_entry_points, symbol_table);
108  methods_to_convert_later.insert(
109  extra_entry_points.begin(), extra_entry_points.end());
110 
111  std::unordered_set<irep_idt> instantiated_classes;
112 
113  {
114  std::unordered_set<irep_idt> initial_callable_methods;
115  ci_lazy_methods_neededt initial_lazy_methods(
116  initial_callable_methods,
117  instantiated_classes,
118  symbol_table,
121  methods_to_convert_later, namespacet(symbol_table), initial_lazy_methods);
122  methods_to_convert_later.insert(
123  initial_callable_methods.begin(), initial_callable_methods.end());
124  }
125 
126  std::unordered_set<irep_idt> methods_already_populated;
127  std::unordered_set<exprt, irep_hash> virtual_function_calls;
128  bool class_initializer_seen = false;
129 
130  bool any_new_classes = true;
131  while(any_new_classes)
132  {
133  bool any_new_methods = true;
134  while(any_new_methods)
135  {
136  any_new_methods = false;
137  while(!methods_to_convert_later.empty())
138  {
139  std::unordered_set<irep_idt> methods_to_convert;
140  std::swap(methods_to_convert, methods_to_convert_later);
141  for(const auto &mname : methods_to_convert)
142  {
143  const auto conversion_result = convert_and_analyze_method(
144  method_converter,
145  methods_already_populated,
146  class_initializer_seen,
147  mname,
148  symbol_table,
149  methods_to_convert_later,
150  instantiated_classes,
151  virtual_function_calls);
152  any_new_methods |= conversion_result.new_method_seen;
153  class_initializer_seen |= conversion_result.class_initializer_seen;
154  }
155  }
156 
157  // Given the object types we now know may be created, populate more
158  // possible virtual function call targets:
159 
160  debug() << "CI lazy methods: add virtual method targets ("
161  << virtual_function_calls.size() << " callsites)" << eom;
162 
163  for(const exprt &function : virtual_function_calls)
164  {
166  function,
167  instantiated_classes,
168  methods_to_convert_later,
169  symbol_table);
170  }
171  }
172 
173  any_new_classes = handle_virtual_methods_with_no_callees(
174  methods_to_convert_later,
175  instantiated_classes,
176  virtual_function_calls,
177  symbol_table);
178  }
179 
180  // Remove symbols for methods that were declared but never used:
181  symbol_tablet keep_symbols;
182  // Manually keep @inflight_exception, as it is unused at this stage
183  // but will become used when the `remove_exceptions` pass is run:
184  keep_symbols.add(symbol_table.lookup_ref(INFLIGHT_EXCEPTION_VARIABLE_NAME));
185 
186  for(const auto &sym : symbol_table.symbols)
187  {
188  // Don't keep global variables (unless they're gathered below from a
189  // function that references them)
190  if(sym.second.is_static_lifetime)
191  continue;
192  if(sym.second.type.id()==ID_code)
193  {
194  // Don't keep functions that belong to this language that we haven't
195  // converted above
196  if(
197  (method_bytecode.contains_method(sym.first) ||
198  synthetic_methods.count(sym.first)) &&
199  !methods_already_populated.count(sym.first))
200  {
201  continue;
202  }
203  // If this is a function then add all the things used in it
204  gather_needed_globals(sym.second.value, symbol_table, keep_symbols);
205  }
206  keep_symbols.add(sym.second);
207  }
208 
209  debug() << "CI lazy methods: removed "
210  << symbol_table.symbols.size() - keep_symbols.symbols.size()
211  << " unreachable methods and globals" << eom;
212 
213  symbol_table.swap(keep_symbols);
214 
215  return false;
216 }
217 
226  std::unordered_set<irep_idt> &methods_to_convert_later,
227  std::unordered_set<irep_idt> &instantiated_classes,
228  const std::unordered_set<exprt, irep_hash> &virtual_function_calls,
229  symbol_tablet &symbol_table)
230 {
231  bool any_new_classes = false;
232  for(const exprt &virtual_function_call : virtual_function_calls)
233  {
234  std::unordered_set<irep_idt> candidate_target_methods;
236  virtual_function_call,
237  instantiated_classes,
238  candidate_target_methods,
239  symbol_table);
240 
241  if(!candidate_target_methods.empty())
242  continue;
243 
244  // Add the call class to instantiated_classes and assert that it
245  // didn't already exist
246  const irep_idt &call_class = virtual_function_call.get(ID_C_class);
247  auto ret_class = instantiated_classes.insert(call_class);
248  CHECK_RETURN(ret_class.second);
249  any_new_classes = true;
250 
251  // Check that `get_virtual_method_target` returns a method now
252  const irep_idt &call_basename =
253  virtual_function_call.get(ID_component_name);
254  const irep_idt method_name = get_virtual_method_target(
255  instantiated_classes, call_basename, call_class, symbol_table);
256  CHECK_RETURN(!method_name.empty());
257 
258  // Add what it returns to methods_to_convert_later
259  methods_to_convert_later.insert(method_name);
260  }
261  return any_new_classes;
262 }
263 
275  const method_convertert &method_converter,
276  std::unordered_set<irep_idt> &methods_already_populated,
277  const bool class_initializer_already_seen,
278  const irep_idt &method_name,
279  symbol_tablet &symbol_table,
280  std::unordered_set<irep_idt> &methods_to_convert_later,
281  std::unordered_set<irep_idt> &instantiated_classes,
282  std::unordered_set<exprt, irep_hash> &virtual_function_calls)
283 {
285  if(!methods_already_populated.insert(method_name).second)
286  return result;
287 
288  debug() << "CI lazy methods: elaborate " << method_name << eom;
289 
290  // Note this wraps *references* to methods_to_convert_later &
291  // instantiated_classes
292  ci_lazy_methods_neededt needed_methods(
293  methods_to_convert_later,
294  instantiated_classes,
295  symbol_table,
297 
298  const bool could_not_convert_function =
299  method_converter(method_name, needed_methods);
300  if(could_not_convert_function)
301  return result;
302 
303  const exprt &method_body = symbol_table.lookup_ref(method_name).value;
304  gather_virtual_callsites(method_body, virtual_function_calls);
305 
306  if(!class_initializer_already_seen && references_class_model(method_body))
307  {
308  result.class_initializer_seen = true;
309  const irep_idt initializer_signature =
311  if(symbol_table.has_symbol(initializer_signature))
312  methods_to_convert_later.insert(initializer_signature);
313  }
314  result.new_method_seen = true;
315  return result;
316 }
317 
323 std::unordered_set<irep_idt>
325 {
326  std::unordered_set<irep_idt> methods_to_convert_later;
327 
328  const main_function_resultt main_function = get_main_symbol(
329  symbol_table, this->main_class, this->get_message_handler());
330  if(!main_function.is_success())
331  {
332  // Failed, mark all functions in the given main class(es)
333  // reachable.
334  std::vector<irep_idt> reachable_classes;
335  if(!this->main_class.empty())
336  reachable_classes.push_back(this->main_class);
337  else
338  reachable_classes = this->main_jar_classes;
339  for(const irep_idt &class_name : reachable_classes)
340  {
341  const auto &methods =
342  this->java_class_loader.get_original_class(class_name)
344  for(const auto &method : methods)
345  {
346  const irep_idt methodid = "java::" + id2string(class_name) + "." +
347  id2string(method.name) + ":" +
348  id2string(method.descriptor);
349  methods_to_convert_later.insert(methodid);
350  }
351  }
352  }
353  else
354  methods_to_convert_later.insert(main_function.main_function.name);
355  return methods_to_convert_later;
356 }
357 
366  std::vector<irep_idt> &methods,
367  const symbol_tablet &symbol_table)
368 {
369  std::vector<irep_idt> new_methods;
370  for(const irep_idt &method : methods)
371  {
372  const std::string &method_str=id2string(method);
373  if(!has_suffix(method_str, ".*"))
374  {
375  std::string error_message;
376  irep_idt internal_name=
378  method_str,
379  symbol_table,
380  error_message);
381  if(internal_name==irep_idt())
382  throw "entry point "+error_message;
383  new_methods.push_back(internal_name);
384  }
385  else
386  {
387  irep_idt classname="java::"+method_str.substr(0, method_str.length()-2);
388  if(!symbol_table.has_symbol(classname))
389  throw "wildcard entry point '"+method_str+"': unknown class";
390 
391  for(const auto &name_symbol : symbol_table.symbols)
392  {
393  if(name_symbol.second.type.id()!=ID_code)
394  continue;
395  if(!to_code_type(name_symbol.second.type).has_this())
396  continue;
397  if(has_prefix(id2string(name_symbol.first), id2string(classname)))
398  new_methods.push_back(name_symbol.first);
399  }
400  }
401  }
402 
403  methods=std::move(new_methods);
404 }
405 
415  const std::unordered_set<irep_idt> &entry_points,
416  const namespacet &ns,
417  ci_lazy_methods_neededt &needed_lazy_methods)
418 {
419  for(const auto &mname : entry_points)
420  {
421  const auto &symbol=ns.lookup(mname);
422  const auto &mtype=to_code_type(symbol.type);
423  for(const auto &param : mtype.parameters())
424  {
425  if(param.type().id()==ID_pointer)
426  {
427  const pointer_typet &original_pointer=to_pointer_type(param.type());
428  needed_lazy_methods.add_all_needed_classes(original_pointer);
429  }
430  }
431  }
432 
433  // Also add classes whose instances are magically
434  // created by the JVM and so won't be spotted by
435  // looking for constructors and calls as usual:
436  needed_lazy_methods.add_needed_class("java::java.lang.String");
437  needed_lazy_methods.add_needed_class("java::java.lang.Class");
438  needed_lazy_methods.add_needed_class("java::java.lang.Object");
439 
440  // As in class_loader, ensure these classes stay available
441  for(const auto &id : extra_instantiated_classes)
442  needed_lazy_methods.add_needed_class("java::" + id2string(id));
443 }
444 
445 
451  const exprt &e,
452  std::unordered_set<exprt, irep_hash> &result)
453 {
454  if(e.id()!=ID_code)
455  return;
456  const codet &c=to_code(e);
457  if(c.get_statement()==ID_function_call &&
458  to_code_function_call(c).function().id()==ID_virtual_function)
459  {
460  result.insert(to_code_function_call(c).function());
461  }
462  else
463  {
464  for(const exprt &op : e.operands())
466  }
467 }
468 
480  const exprt &called_function,
481  const std::unordered_set<irep_idt> &instantiated_classes,
482  std::unordered_set<irep_idt> &callable_methods,
483  symbol_tablet &symbol_table)
484 {
485  PRECONDITION(called_function.id()==ID_virtual_function);
486 
487  const auto &call_class=called_function.get(ID_C_class);
488  INVARIANT(
489  !call_class.empty(), "All virtual calls should be aimed at a class");
490  const auto &call_basename=called_function.get(ID_component_name);
491  INVARIANT(
492  !call_basename.empty(),
493  "Virtual function must have a reasonable name after removing class");
494 
495  class_hierarchyt::idst self_and_child_classes =
497  self_and_child_classes.push_back(call_class);
498 
499  for(const irep_idt &class_name : self_and_child_classes)
500  {
501  const irep_idt method_name = get_virtual_method_target(
502  instantiated_classes, call_basename, class_name, symbol_table);
503  if(!method_name.empty())
504  callable_methods.insert(method_name);
505  }
506 }
507 
514  const exprt &e,
515  const symbol_tablet &symbol_table,
516  symbol_tablet &needed)
517 {
518  if(e.id()==ID_symbol)
519  {
520  // If the symbol isn't in the symbol table at all, then it is defined
521  // on an opaque type (i.e. we don't have the class definition at this point)
522  // and will be created during the typecheck phase.
523  // We don't mark it as 'needed' as it doesn't exist yet to keep.
524  const auto findit=
525  symbol_table.symbols.find(to_symbol_expr(e).get_identifier());
526  if(findit!=symbol_table.symbols.end() &&
527  findit->second.is_static_lifetime)
528  {
529  needed.add(findit->second);
530  // Gather any globals referenced in the initialiser:
531  gather_needed_globals(findit->second.value, symbol_table, needed);
532  }
533  }
534  else
535  forall_operands(opit, e)
536  gather_needed_globals(*opit, symbol_table, needed);
537 }
538 
552  const std::unordered_set<irep_idt> &instantiated_classes,
553  const irep_idt &call_basename,
554  const irep_idt &classname,
555  const symbol_tablet &symbol_table)
556 {
557  // Program-wide, is this class ever instantiated?
558  if(!instantiated_classes.count(classname))
559  return irep_idt();
560 
561  resolve_inherited_componentt call_resolver(symbol_table, class_hierarchy);
563  call_resolver(classname, call_basename, false);
564 
565  if(resolved_call.is_valid())
566  return resolved_call.get_full_component_identifier();
567  else
568  return irep_idt();
569 }
const irep_idt & get_statement() const
Definition: std_code.h:39
irep_idt name
The unique identifier.
Definition: symbol.h:43
ci_lazy_methodst(const symbol_tablet &symbol_table, const irep_idt &main_class, const std::vector< irep_idt > &main_jar_classes, const std::vector< irep_idt > &lazy_methods_extra_entry_points, java_class_loadert &java_class_loader, const std::vector< irep_idt > &extra_instantiated_classes, const select_pointer_typet &pointer_type_selector, message_handlert &message_handler, const synthetic_methods_mapt &synthetic_methods)
Constructor for lazy-method loading.
convert_method_resultt convert_and_analyze_method(const method_convertert &method_converter, std::unordered_set< irep_idt > &methods_already_populated, const bool class_initializer_already_seen, const irep_idt &method_name, symbol_tablet &symbol_table, std::unordered_set< irep_idt > &methods_to_convert_later, std::unordered_set< irep_idt > &instantiated_classes, std::unordered_set< exprt, irep_hash > &virtual_function_calls)
Convert a method, add it to the populated set, add needed methods to methods_to_convert_later and add...
bool handle_virtual_methods_with_no_callees(std::unordered_set< irep_idt > &methods_to_convert_later, std::unordered_set< irep_idt > &instantiated_classes, const std::unordered_set< exprt, irep_hash > &virtual_function_calls, symbol_tablet &symbol_table)
Look for virtual callsites with no candidate targets.
const select_pointer_typet & pointer_type_selector
const std::string & id2string(const irep_idt &d)
Definition: irep.h:43
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
#define INFLIGHT_EXCEPTION_VARIABLE_NAME
Remove function exceptional returns.
std::function< bool(const irep_idt &function_id, ci_lazy_methods_neededt)> method_convertert
void resolve_method_names(std::vector< irep_idt > &methods, const symbol_tablet &symbol_table)
Translates the given list of method names from human-readable to internal syntax. ...
const std::vector< irep_idt > & extra_instantiated_classes
irep_idt get_virtual_method_target(const std::unordered_set< irep_idt > &instantiated_classes, const irep_idt &call_basename, const irep_idt &classname, const symbol_tablet &symbol_table)
Find a virtual callee, if one is defined and the callee type is known to exist.
const code_typet & to_code_type(const typet &type)
Cast a generic typet to a code_typet.
Definition: std_types.h:987
main_function_resultt get_main_symbol(const symbol_table_baset &symbol_table, const irep_idt &main_class, message_handlert &message_handler)
Figures out the entry point of the code to verify.
const irep_idt & get_identifier() const
Definition: std_expr.h:128
bool is_valid() const
Use to check if this inherited_componentt has been fully constructed.
void gather_virtual_callsites(const exprt &e, std::unordered_set< exprt, irep_hash > &result)
Get places where virtual functions are called.
exprt value
Initial value of symbol.
Definition: symbol.h:37
depth_iteratort depth_begin()
Definition: expr.cpp:299
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:245
static mstreamt & eom(mstreamt &m)
Definition: message.h:272
#define INVARIANT(CONDITION, REASON)
Definition: invariant.h:205
std::vector< irep_idt > lazy_methods_extra_entry_points
void initialize_instantiated_classes(const std::unordered_set< irep_idt > &entry_points, const namespacet &ns, ci_lazy_methods_neededt &needed_lazy_methods)
Build up a list of methods whose type may be passed around reachable from the entry point...
const irep_idt & id() const
Definition: irep.h:189
A reference into the symbol table.
Definition: std_types.h:110
The pointer type.
Definition: std_types.h:1426
void gather_needed_globals(const exprt &e, const symbol_tablet &symbol_table, symbol_tablet &needed)
See output.
irep_idt get_java_class_literal_initializer_signature()
Get the symbol name of java.lang.Class&#39; initializer method.
java_class_loadert & java_class_loader
The symbol table.
Definition: symbol_table.h:19
const irep_idt & get(const irep_namet &name) const
Definition: irep.cpp:213
Collect methods needed to be loaded using the lazy method.
TO_BE_DOCUMENTED.
Definition: namespace.h:74
#define PRECONDITION(CONDITION)
Definition: invariant.h:230
Given a class and a component (either field or method), find the closest parent that defines that com...
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
#define forall_operands(it, expr)
Definition: expr.h:17
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast a generic exprt to a symbol_exprt.
Definition: std_expr.h:210
const synthetic_methods_mapt & synthetic_methods
irep_idt resolve_friendly_method_name(const std::string &friendly_name, const symbol_table_baset &symbol_table, std::string &error)
Resolves a user-friendly method name (like packagename.Class.method) into an internal name (like java...
Definition: java_utils.cpp:121
const symbolst & symbols
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
static bool references_class_model(const exprt &expr)
Checks if an expression refers to any class literals (e.g.
std::vector< irep_idt > idst
bool can_cast_expr< symbol_exprt >(const exprt &base)
Definition: std_expr.h:227
message_handlert & get_message_handler()
Definition: message.h:153
bool contains_method(const irep_idt &method_id) const
mstreamt & result() const
Definition: message.h:312
exprt & function()
Definition: std_code.h:848
bool add_needed_class(const irep_idt &)
Notes class class_symbol_name will be instantiated, or a static field belonging to it will be accesse...
Base class for all expressions.
Definition: expr.h:42
std::unordered_set< irep_idt > entry_point_methods(const symbol_tablet &symbol_table)
Entry point methods are either:
bool has_this() const
Definition: std_types.h:866
#define JAVA_CLASS_MODEL_SUFFIX
std::vector< irep_idt > main_jar_classes
const codet & to_code(const exprt &expr)
Definition: std_code.h:74
depth_iteratort depth_end()
Definition: expr.cpp:301
void add_all_needed_classes(const pointer_typet &pointer_type)
Add to the needed classes all classes specified, the replacement type if it will be replaced...
std::unordered_map< irep_idt, synthetic_method_typet > synthetic_methods_mapt
Maps method names on to a synthetic method kind.
bool has_suffix(const std::string &s, const std::string &suffix)
Definition: suffix.h:15
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
const pointer_typet & to_pointer_type(const typet &type)
Cast a generic typet to a pointer_typet.
Definition: std_types.h:1450
goto_programt coverage_criteriont message_handlert & message_handler
Definition: cover.cpp:66
dstringt irep_idt
Definition: irep.h:31
void swap(symbol_tablet &other)
Definition: symbol_table.h:71
const java_bytecode_parse_treet & get_original_class(const irep_idt &class_name)
mstreamt & debug() const
Definition: message.h:332
A statement in a programming language.
Definition: std_code.h:21
void get_virtual_method_targets(const exprt &called_function, const std::unordered_set< irep_idt > &instantiated_classes, std::unordered_set< irep_idt > &callable_methods, symbol_tablet &symbol_table)
Find possible callees, excluding types that are not known to be instantiated.
idst get_children_trans(const irep_idt &id) const
operandst & operands()
Definition: expr.h:66
bool operator()(symbol_tablet &symbol_table, method_bytecodet &method_bytecode, const method_convertert &method_converter)
Uses a simple context-insensitive (&#39;ci&#39;) analysis to determine which methods may be reachable from th...
bool empty() const
Definition: dstring.h:61
irep_idt get_full_component_identifier() const
Get the full name of this function.
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See namespace_baset::lookup().
Definition: namespace.cpp:130
class_hierarchyt class_hierarchy
const code_function_callt & to_code_function_call(const codet &code)
Definition: std_code.h:879
Produce code for simple implementation of String Java libraries.