cprover
Loading...
Searching...
No Matches
java_bytecode_parser.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module:
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
10
11#include <algorithm>
12#include <fstream>
13#include <map>
14#include <string>
15
16#include <util/arith_tools.h>
17#include <util/ieee_float.h>
18#include <util/parser.h>
19#include <util/std_expr.h>
21
22#include "bytecode_info.h"
25#include "java_types.h"
26
27class java_bytecode_parsert final : public parsert
28{
29public:
34
35 bool parse() override;
36
38 {
39 u1 tag = 0;
40 u2 ref1 = 0;
41 u2 ref2 = 0;
45 };
46
48
49private:
58
59 using constant_poolt = std::vector<pool_entryt>;
60
62
63 const bool skip_instructions = false;
64
66 {
67 if(index==0 || index>=constant_pool.size())
68 {
69 log.error() << "invalid constant pool index (" << index << ")"
71 log.error() << "constant pool size: " << constant_pool.size()
73 throw 0;
74 }
75
76 return constant_pool[index];
77 }
78
80 {
81 return pool_entry(index).expr;
82 }
83
84 const typet type_entry(u2 index)
85 {
87 }
88
89 void rClassFile();
90 void rconstant_pool();
91 void rinterfaces();
92 void rfields();
93 void rmethods();
94 void rmethod();
96 std::vector<irep_idt> rexceptions_attribute();
97 void rclass_attribute();
98 void rRuntimeAnnotation_attribute(std::vector<annotationt> &);
102 void rmethod_attribute(methodt &method);
103 void rfield_attribute(fieldt &);
104 void rcode_attribute(methodt &method);
105 void read_verification_type_info(methodt::verification_type_infot &);
106 void rbytecode(std::vector<instructiont> &);
107 void get_class_refs();
108 void get_class_refs_rec(const typet &);
109 void get_annotation_class_refs(const std::vector<annotationt> &annotations);
110 void get_annotation_value_class_refs(const exprt &value);
113 parse_method_handle(const class method_handle_infot &entry);
115
116 void skip_bytes(std::size_t bytes)
117 {
118 for(std::size_t i=0; i<bytes; i++)
119 {
120 if(!*in)
121 {
122 log.error() << "unexpected end of bytecode file" << messaget::eom;
123 throw 0;
124 }
125 in->get();
126 }
127 }
128
129 template <typename T>
131 {
132 static_assert(
133 std::is_unsigned<T>::value, "T should be an unsigned integer");
134 const constexpr size_t bytes = sizeof(T);
135 u8 result = 0;
136 for(size_t i = 0; i < bytes; i++)
137 {
138 if(!*in)
139 {
140 log.error() << "unexpected end of bytecode file" << messaget::eom;
141 throw 0;
142 }
143 result <<= 8u;
144 result |= static_cast<u1>(in->get());
145 }
146 return narrow_cast<T>(result);
147 }
148
150};
151
152#define CONSTANT_Class 7
153#define CONSTANT_Fieldref 9
154#define CONSTANT_Methodref 10
155#define CONSTANT_InterfaceMethodref 11
156#define CONSTANT_String 8
157#define CONSTANT_Integer 3
158#define CONSTANT_Float 4
159#define CONSTANT_Long 5
160#define CONSTANT_Double 6
161#define CONSTANT_NameAndType 12
162#define CONSTANT_Utf8 1
163#define CONSTANT_MethodHandle 15
164#define CONSTANT_MethodType 16
165#define CONSTANT_InvokeDynamic 18
166
167#define VTYPE_INFO_TOP 0
168#define VTYPE_INFO_INTEGER 1
169#define VTYPE_INFO_FLOAT 2
170#define VTYPE_INFO_LONG 3
171#define VTYPE_INFO_DOUBLE 4
172#define VTYPE_INFO_ITEM_NULL 5
173#define VTYPE_INFO_UNINIT_THIS 6
174#define VTYPE_INFO_OBJECT 7
175#define VTYPE_INFO_UNINIT 8
176
178{
179public:
181 using pool_entry_lookupt = std::function<pool_entryt &(u2)>;
182
183 explicit structured_pool_entryt(const pool_entryt &entry) : tag(entry.tag)
184 {
185 }
186
187 u1 get_tag() const
188 {
189 return tag;
190 }
191
192protected:
193 static std::string read_utf8_constant(const pool_entryt &entry)
194 {
195 INVARIANT(
196 entry.tag == CONSTANT_Utf8, "Name entry must be a constant UTF-8");
197 return id2string(entry.s);
198 }
199
200private:
202};
203
207{
208public:
209 explicit class_infot(const pool_entryt &entry) : structured_pool_entryt(entry)
210 {
212 name_index = entry.ref1;
213 }
214
215 std::string get_name(const pool_entry_lookupt &pool_entry) const
216 {
217 const pool_entryt &name_entry = pool_entry(name_index);
219 }
220
221private:
223};
224
228{
229public:
230 explicit name_and_type_infot(const pool_entryt &entry)
232 {
234 name_index = entry.ref1;
235 descriptor_index = entry.ref2;
236 }
237
238 std::string get_name(const pool_entry_lookupt &pool_entry) const
239 {
240 const pool_entryt &name_entry = pool_entry(name_index);
242 }
243
244 std::string get_descriptor(const pool_entry_lookupt &pool_entry) const
245 {
246 const pool_entryt &descriptor_entry = pool_entry(descriptor_index);
248 }
249
250private:
253};
254
256{
257public:
258 explicit base_ref_infot(const pool_entryt &entry)
260 {
262 entry.tag == CONSTANT_Fieldref || entry.tag == CONSTANT_Methodref ||
264 class_index = entry.ref1;
266 }
267
269 {
270 return class_index;
271 }
273 {
274 return name_and_type_index;
275 }
276
278 get_name_and_type(const pool_entry_lookupt &pool_entry) const
279 {
281
282 INVARIANT(
284 "name_and_typeindex did not correspond to a name_and_type in the "
285 "constant pool");
286
288 }
289
290 class_infot get_class(const pool_entry_lookupt &pool_entry) const
291 {
292 const pool_entryt &class_entry = pool_entry(class_index);
293
294 return class_infot{class_entry};
295 }
296
297private:
300};
301
303{
304public:
309 {
310 REF_getField = 1,
311 REF_getStatic = 2,
312 REF_putField = 3,
313 REF_putStatic = 4,
319 };
320
321 explicit method_handle_infot(const pool_entryt &entry)
323 {
325 PRECONDITION(entry.ref1 > 0 && entry.ref1 < 10); // Java 8 spec 4.4.8
326 handle_kind = static_cast<method_handle_kindt>(entry.ref1);
327 reference_index = entry.ref2;
328 }
329
331 {
332 return handle_kind;
333 }
334
336 {
337 const base_ref_infot ref_entry{pool_entry(reference_index)};
338
339 // validate the correctness of the constant pool entry
340 switch(handle_kind)
341 {
346 {
347 INVARIANT(ref_entry.get_tag() == CONSTANT_Fieldref, "4.4.2");
348 break;
349 }
352 {
353 INVARIANT(ref_entry.get_tag() == CONSTANT_Methodref, "4.4.2");
354 break;
355 }
358 {
359 INVARIANT(
360 ref_entry.get_tag() == CONSTANT_Methodref ||
362 "4.4.2");
363 break;
364 }
366 {
367 INVARIANT(ref_entry.get_tag() == CONSTANT_InterfaceMethodref, "4.4.8");
368 break;
369 }
370 }
371
372 return ref_entry;
373 }
374
375private:
378};
379
381{
382 try
383 {
384 rClassFile();
385 }
386
387 catch(const char *message)
388 {
389 log.error() << message << messaget::eom;
390 return true;
391 }
392
393 catch(const std::string &message)
394 {
395 log.error() << message << messaget::eom;
396 return true;
397 }
398
399 catch(...)
400 {
401 log.error() << "parsing error" << messaget::eom;
402 return true;
403 }
404
405 return false;
406}
407
408#define ACC_PUBLIC 0x0001u
409#define ACC_PRIVATE 0x0002u
410#define ACC_PROTECTED 0x0004u
411#define ACC_STATIC 0x0008u
412#define ACC_FINAL 0x0010u
413#define ACC_SYNCHRONIZED 0x0020u
414#define ACC_BRIDGE 0x0040u
415#define ACC_NATIVE 0x0100u
416#define ACC_INTERFACE 0x0200u
417#define ACC_ABSTRACT 0x0400u
418#define ACC_STRICT 0x0800u
419#define ACC_SYNTHETIC 0x1000u
420#define ACC_ANNOTATION 0x2000u
421#define ACC_ENUM 0x4000u
422
423#define UNUSED_u2(x) \
424 { \
425 const u2 x = read<u2>(); \
426 (void)x; \
427 } \
428 (void)0
429
431{
433
434 const u4 magic = read<u4>();
436 const u2 major_version = read<u2>();
437
438 if(magic!=0xCAFEBABE)
439 {
440 log.error() << "wrong magic" << messaget::eom;
441 throw 0;
442 }
443
444 if(major_version<44)
445 {
446 log.error() << "unexpected major version" << messaget::eom;
447 throw 0;
448 }
449
451
452 classt &parsed_class=parse_tree.parsed_class;
453
454 const u2 access_flags = read<u2>();
455 const u2 this_class = read<u2>();
456 const u2 super_class = read<u2>();
457
458 parsed_class.is_abstract=(access_flags&ACC_ABSTRACT)!=0;
459 parsed_class.is_enum=(access_flags&ACC_ENUM)!=0;
460 parsed_class.is_public=(access_flags&ACC_PUBLIC)!=0;
461 parsed_class.is_protected=(access_flags&ACC_PROTECTED)!=0;
462 parsed_class.is_private=(access_flags&ACC_PRIVATE)!=0;
463 parsed_class.is_final = (access_flags & ACC_FINAL) != 0;
464 parsed_class.is_interface = (access_flags & ACC_INTERFACE) != 0;
465 parsed_class.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0;
466 parsed_class.is_annotation = (access_flags & ACC_ANNOTATION) != 0;
467 parsed_class.name=
469
470 if(super_class!=0)
471 parsed_class.super_class = constant(super_class).type().get(ID_C_base_name);
472
473 rinterfaces();
474 rfields();
475 rmethods();
476
477 // count elements of enum
478 if(parsed_class.is_enum)
480 if(field.is_enum)
482
483 const u2 attributes_count = read<u2>();
484
485 for(std::size_t j=0; j<attributes_count; j++)
487
489
491}
492
495{
496 for(const auto &c : constant_pool)
497 {
498 switch(c.tag)
499 {
500 case CONSTANT_Class:
501 get_class_refs_rec(c.expr.type());
502 break;
503
507 break;
508
509 default: {}
510 }
511 }
512
514
515 for(const auto &field : parse_tree.parsed_class.fields)
516 {
517 get_annotation_class_refs(field.annotations);
518
519 if(field.signature.has_value())
520 {
522 field.descriptor,
523 field.signature,
525
526 // add generic type args to class refs as dependencies, same below for
527 // method types and entries from the local variable type table
529 field_type, parse_tree.class_refs);
530 get_class_refs_rec(field_type);
531 }
532 else
533 {
535 }
536 }
537
538 for(const auto &method : parse_tree.parsed_class.methods)
539 {
540 get_annotation_class_refs(method.annotations);
541 for(const auto &parameter_annotations : method.parameter_annotations)
542 get_annotation_class_refs(parameter_annotations);
543
544 if(method.signature.has_value())
545 {
547 method.descriptor,
548 method.signature,
553 }
554 else
555 {
556 get_class_refs_rec(*java_type_from_string(method.descriptor));
557 }
558
559 for(const auto &var : method.local_variable_table)
560 {
561 if(var.signature.has_value())
562 {
564 var.descriptor,
565 var.signature,
570 }
571 else
572 {
574 }
575 }
576 }
577}
578
580{
581 if(src.id()==ID_code)
582 {
584 const typet &rt=ct.return_type();
586 for(const auto &p : ct.parameters())
587 get_class_refs_rec(p.type());
588 }
589 else if(src.id() == ID_struct_tag)
590 {
592 if(is_java_array_tag(struct_tag_type.get_identifier()))
594 else
596 }
597 else if(src.id()==ID_struct)
598 {
600 for(const auto &c : struct_type.components())
601 get_class_refs_rec(c.type());
602 }
603 else if(src.id()==ID_pointer)
604 get_class_refs_rec(to_pointer_type(src).base_type());
605}
606
610 const std::vector<annotationt> &annotations)
611{
612 for(const auto &annotation : annotations)
613 {
614 get_class_refs_rec(annotation.type);
615 for(const auto &element_value_pair : annotation.element_value_pairs)
617 }
618}
619
624{
625 if(const auto &symbol_expr = expr_try_dynamic_cast<symbol_exprt>(value))
626 {
627 const irep_idt &value_id = symbol_expr->get_identifier();
629 }
630 else if(const auto &array_expr = expr_try_dynamic_cast<array_exprt>(value))
631 {
632 for(const exprt &operand : array_expr->operands())
634 }
635 // TODO: enum and nested annotation cases (once these are correctly parsed by
636 // get_relement_value).
637 // Note that in the cases where expr is a string or primitive type, no
638 // additional class references are needed.
639}
640
642{
645 {
646 log.error() << "invalid constant_pool_count" << messaget::eom;
647 throw 0;
648 }
649
651
652 for(auto it = std::next(constant_pool.begin()); it != constant_pool.end();
653 it++)
654 {
655 it->tag = read<u1>();
656
657 switch(it->tag)
658 {
659 case CONSTANT_Class:
660 it->ref1 = read<u2>();
661 break;
662
668 it->ref1 = read<u2>();
669 it->ref2 = read<u2>();
670 break;
671
672 case CONSTANT_String:
674 it->ref1 = read<u2>();
675 break;
676
677 case CONSTANT_Integer:
678 case CONSTANT_Float:
679 it->number = read<u4>();
680 break;
681
682 case CONSTANT_Long:
683 case CONSTANT_Double:
684 it->number = read<u8>();
685 // Eight-byte constants take up two entries in the constant_pool table.
686 if(it==constant_pool.end())
687 {
688 log.error() << "invalid double entry" << messaget::eom;
689 throw 0;
690 }
691 it++;
692 it->tag=0;
693 break;
694
695 case CONSTANT_Utf8:
696 {
697 const u2 bytes = read<u2>();
698 std::string s;
699 s.resize(bytes);
700 for(auto &ch : s)
701 ch = read<u1>();
702 it->s = s; // Add to string table
703 }
704 break;
705
707 it->ref1 = read<u1>();
708 it->ref2 = read<u2>();
709 break;
710
711 default:
712 log.error() << "unknown constant pool entry (" << it->tag << ")"
713 << messaget::eom;
714 throw 0;
715 }
716 }
717
718 // we do a bit of post-processing after we have them all
719 std::for_each(
720 std::next(constant_pool.begin()),
721 constant_pool.end(),
722 [&](constant_poolt::value_type &entry) {
723 switch(entry.tag)
724 {
725 case CONSTANT_Class:
726 {
727 const std::string &s = id2string(pool_entry(entry.ref1).s);
728 entry.expr = type_exprt(java_classname(s));
729 }
730 break;
731
732 case CONSTANT_Fieldref:
733 {
734 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
735 const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
736 const pool_entryt &class_entry = pool_entry(entry.ref1);
737 const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
738 typet type=type_entry(nameandtype_entry.ref2);
739
740 auto class_tag = java_classname(id2string(class_name_entry.s));
741
742 fieldref_exprt fieldref(type, name_entry.s, class_tag.get_identifier());
743
744 entry.expr = fieldref;
745 }
746 break;
747
748 case CONSTANT_Methodref:
749 case CONSTANT_InterfaceMethodref:
750 {
751 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
752 const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
753 const pool_entryt &class_entry = pool_entry(entry.ref1);
754 const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
755 typet type=type_entry(nameandtype_entry.ref2);
756
757 auto class_tag = java_classname(id2string(class_name_entry.s));
758
759 irep_idt mangled_method_name =
760 id2string(name_entry.s) + ":" +
761 id2string(pool_entry(nameandtype_entry.ref2).s);
762
763 irep_idt class_id = class_tag.get_identifier();
764
765 entry.expr = class_method_descriptor_exprt{
766 type, mangled_method_name, class_id, name_entry.s};
767 }
768 break;
769
770 case CONSTANT_String:
771 {
772 // ldc turns these into references to java.lang.String
773 entry.expr = java_string_literal_exprt{pool_entry(entry.ref1).s};
774 }
775 break;
776
777 case CONSTANT_Integer:
778 entry.expr = from_integer(entry.number, java_int_type());
779 break;
780
781 case CONSTANT_Float:
782 {
783 ieee_floatt value(ieee_float_spect::single_precision());
784 value.unpack(entry.number);
785 entry.expr = value.to_expr();
786 }
787 break;
788
789 case CONSTANT_Long:
790 entry.expr = from_integer(entry.number, java_long_type());
791 break;
792
793 case CONSTANT_Double:
794 {
795 ieee_floatt value(ieee_float_spect::double_precision());
796 value.unpack(entry.number);
797 entry.expr = value.to_expr();
798 }
799 break;
800
801 case CONSTANT_NameAndType:
802 {
803 entry.expr.id("nameandtype");
804 }
805 break;
806
807 case CONSTANT_MethodHandle:
808 {
809 entry.expr.id("methodhandle");
810 }
811 break;
812
813 case CONSTANT_MethodType:
814 {
815 entry.expr.id("methodtype");
816 }
817 break;
818
819 case CONSTANT_InvokeDynamic:
820 {
821 entry.expr.id("invokedynamic");
822 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
823 typet type=type_entry(nameandtype_entry.ref2);
824 type.set(ID_java_lambda_method_handle_index, entry.ref1);
825 entry.expr.type() = type;
826 }
827 break;
828 }
829 });
830}
831
833{
834 const u2 interfaces_count = read<u2>();
835
836 for(std::size_t i=0; i<interfaces_count; i++)
838 constant(read<u2>()).type().get(ID_C_base_name));
839}
840
842{
843 const u2 fields_count = read<u2>();
844
845 for(std::size_t i=0; i<fields_count; i++)
846 {
848
849 const u2 access_flags = read<u2>();
850 const u2 name_index = read<u2>();
851 const u2 descriptor_index = read<u2>();
852 const u2 attributes_count = read<u2>();
853
854 field.name=pool_entry(name_index).s;
855 field.is_static=(access_flags&ACC_STATIC)!=0;
856 field.is_final=(access_flags&ACC_FINAL)!=0;
857 field.is_enum=(access_flags&ACC_ENUM)!=0;
858
859 field.descriptor=id2string(pool_entry(descriptor_index).s);
860 field.is_public=(access_flags&ACC_PUBLIC)!=0;
861 field.is_protected=(access_flags&ACC_PROTECTED)!=0;
862 field.is_private=(access_flags&ACC_PRIVATE)!=0;
863 const auto flags = (field.is_public ? 1 : 0) +
864 (field.is_protected?1:0)+
865 (field.is_private?1:0);
866 DATA_INVARIANT(flags<=1, "at most one of public, protected, private");
867
868 for(std::size_t j=0; j<attributes_count; j++)
870 }
871}
872
873#define T_BOOLEAN 4
874#define T_CHAR 5
875#define T_FLOAT 6
876#define T_DOUBLE 7
877#define T_BYTE 8
878#define T_SHORT 9
879#define T_INT 10
880#define T_LONG 11
881
882void java_bytecode_parsert::rbytecode(std::vector<instructiont> &instructions)
883{
884 const u4 code_length = read<u4>();
885
886 u4 address;
887 size_t bytecode_index=0; // index of bytecode instruction
888
889 for(address=0; address<code_length; address++)
890 {
891 bool wide_instruction=false;
892 u4 start_of_instruction=address;
893
894 u1 bytecode = read<u1>();
895
896 if(bytecode == BC_wide)
897 {
898 wide_instruction=true;
899 address++;
900 bytecode = read<u1>();
901 // The only valid instructions following a wide byte are
902 // [ifald]load, [ifald]store, ret and iinc
903 // All of these have either format of v, or V
904 INVARIANT(
905 bytecode_info[bytecode].format == 'v' ||
906 bytecode_info[bytecode].format == 'V',
907 std::string("Unexpected wide instruction: ") +
908 bytecode_info[bytecode].mnemonic);
909 }
910
911 instructions.emplace_back();
912 instructiont &instruction=instructions.back();
913 instruction.bytecode = bytecode;
914 instruction.address=start_of_instruction;
915 instruction.source_location
916 .set_java_bytecode_index(std::to_string(bytecode_index));
917
918 switch(bytecode_info[bytecode].format)
919 {
920 case ' ': // no further bytes
921 break;
922
923 case 'c': // a constant_pool index (one byte)
925 {
926 instruction.args.push_back(constant(read<u2>()));
927 address+=2;
928 }
929 else
930 {
931 instruction.args.push_back(constant(read<u1>()));
932 address+=1;
933 }
934 break;
935
936 case 'C': // a constant_pool index (two bytes)
937 instruction.args.push_back(constant(read<u2>()));
938 address+=2;
939 break;
940
941 case 'b': // a signed byte
942 {
943 const s1 c = read<u1>();
944 instruction.args.push_back(from_integer(c, signedbv_typet(8)));
945 }
946 address+=1;
947 break;
948
949 case 'o': // two byte branch offset, signed
950 {
951 const s2 offset = read<u2>();
952 // By converting the signed offset into an absolute address (by adding
953 // the current address) the number represented becomes unsigned.
954 instruction.args.push_back(
955 from_integer(address+offset, unsignedbv_typet(16)));
956 }
957 address+=2;
958 break;
959
960 case 'O': // four byte branch offset, signed
961 {
962 const s4 offset = read<u4>();
963 // By converting the signed offset into an absolute address (by adding
964 // the current address) the number represented becomes unsigned.
965 instruction.args.push_back(
966 from_integer(address+offset, unsignedbv_typet(32)));
967 }
968 address+=4;
969 break;
970
971 case 'v': // local variable index (one byte)
972 {
974 {
975 const u2 v = read<u2>();
976 instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
977 address += 2;
978 }
979 else
980 {
981 const u1 v = read<u1>();
982 instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
983 address += 1;
984 }
985 }
986
987 break;
988
989 case 'V':
990 // local variable index (two bytes) plus two signed bytes
992 {
993 const u2 v = read<u2>();
994 instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
995 const s2 c = read<u2>();
996 instruction.args.push_back(from_integer(c, signedbv_typet(16)));
997 address+=4;
998 }
999 else // local variable index (one byte) plus one signed byte
1000 {
1001 const u1 v = read<u1>();
1002 instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
1003 const s1 c = read<u1>();
1004 instruction.args.push_back(from_integer(c, signedbv_typet(8)));
1005 address+=2;
1006 }
1007 break;
1008
1009 case 'I': // two byte constant_pool index plus two bytes
1010 {
1011 const u2 c = read<u2>();
1012 instruction.args.push_back(constant(c));
1013 const u1 b1 = read<u1>();
1014 instruction.args.push_back(from_integer(b1, unsignedbv_typet(8)));
1015 const u1 b2 = read<u1>();
1016 instruction.args.push_back(from_integer(b2, unsignedbv_typet(8)));
1017 }
1018 address+=4;
1019 break;
1020
1021 case 'L': // lookupswitch
1022 {
1023 u4 base_offset=address;
1024
1025 // first a pad to 32-bit align
1026 while(((address + 1u) & 3u) != 0)
1027 {
1028 read<u1>();
1029 address++;
1030 }
1031
1032 // now default value
1033 const s4 default_value = read<u4>();
1034 // By converting the signed offset into an absolute address (by adding
1035 // the current address) the number represented becomes unsigned.
1036 instruction.args.push_back(
1037 from_integer(base_offset+default_value, unsignedbv_typet(32)));
1038 address+=4;
1039
1040 // number of pairs
1041 const u4 npairs = read<u4>();
1042 address+=4;
1043
1044 for(std::size_t i=0; i<npairs; i++)
1045 {
1046 const s4 match = read<u4>();
1047 const s4 offset = read<u4>();
1048 instruction.args.push_back(
1049 from_integer(match, signedbv_typet(32)));
1050 // By converting the signed offset into an absolute address (by adding
1051 // the current address) the number represented becomes unsigned.
1052 instruction.args.push_back(
1054 address+=8;
1055 }
1056 }
1057 break;
1058
1059 case 'T': // tableswitch
1060 {
1061 size_t base_offset=address;
1062
1063 // first a pad to 32-bit align
1064 while(((address + 1u) & 3u) != 0)
1065 {
1066 read<u1>();
1067 address++;
1068 }
1069
1070 // now default value
1071 const s4 default_value = read<u4>();
1072 instruction.args.push_back(
1073 from_integer(base_offset+default_value, signedbv_typet(32)));
1074 address+=4;
1075
1076 // now low value
1077 const s4 low_value = read<u4>();
1078 address+=4;
1079
1080 // now high value
1081 const s4 high_value = read<u4>();
1082 address+=4;
1083
1084 // there are high-low+1 offsets, and they are signed
1085 for(s4 i=low_value; i<=high_value; i++)
1086 {
1087 s4 offset = read<u4>();
1088 instruction.args.push_back(from_integer(i, signedbv_typet(32)));
1089 // By converting the signed offset into an absolute address (by adding
1090 // the current address) the number represented becomes unsigned.
1091 instruction.args.push_back(
1093 address+=4;
1094 }
1095 }
1096 break;
1097
1098 case 'm': // multianewarray: constant-pool index plus one unsigned byte
1099 {
1100 const u2 c = read<u2>(); // constant-pool index
1101 instruction.args.push_back(constant(c));
1102 const u1 dimensions = read<u1>(); // number of dimensions
1103 instruction.args.push_back(
1105 address+=3;
1106 }
1107 break;
1108
1109 case 't': // array subtype, one byte
1110 {
1111 typet t;
1112 switch(read<u1>())
1113 {
1114 case T_BOOLEAN: t.id(ID_bool); break;
1115 case T_CHAR: t.id(ID_char); break;
1116 case T_FLOAT: t.id(ID_float); break;
1117 case T_DOUBLE: t.id(ID_double); break;
1118 case T_BYTE: t.id(ID_byte); break;
1119 case T_SHORT: t.id(ID_short); break;
1120 case T_INT: t.id(ID_int); break;
1121 case T_LONG: t.id(ID_long); break;
1122 default:{};
1123 }
1124 instruction.args.push_back(type_exprt(t));
1125 }
1126 address+=1;
1127 break;
1128
1129 case 's': // a signed short
1130 {
1131 const s2 s = read<u2>();
1132 instruction.args.push_back(from_integer(s, signedbv_typet(16)));
1133 }
1134 address+=2;
1135 break;
1136
1137 default:
1138 throw "unknown JVM bytecode instruction";
1139 }
1141 }
1142
1143 if(address!=code_length)
1144 {
1145 log.error() << "bytecode length mismatch" << messaget::eom;
1146 throw 0;
1147 }
1148}
1149
1151{
1153 const u4 attribute_length = read<u4>();
1154
1156
1157 if(attribute_name == "Code")
1158 {
1161
1164 else
1165 rbytecode(method.instructions);
1166
1170 else
1171 {
1173
1174 for(std::size_t e = 0; e < exception_table_length; e++)
1175 {
1176 const u2 start_pc = read<u2>();
1177 const u2 end_pc = read<u2>();
1178
1179 // From the class file format spec ("4.7.3. The Code Attribute" for
1180 // Java8)
1181 INVARIANT(
1182 start_pc < end_pc,
1183 "The start_pc must be less than the end_pc as this is the range the "
1184 "exception is active");
1185
1186 const u2 handler_pc = read<u2>();
1187 const u2 catch_type = read<u2>();
1188 method.exception_table[e].start_pc = start_pc;
1189 method.exception_table[e].end_pc = end_pc;
1190 method.exception_table[e].handler_pc = handler_pc;
1191 if(catch_type != 0)
1192 method.exception_table[e].catch_type =
1193 to_struct_tag_type(pool_entry(catch_type).expr.type());
1194 }
1195 }
1196
1198
1199 for(std::size_t j=0; j<attributes_count; j++)
1200 rcode_attribute(method);
1201
1202 // method name
1204 "java::" + id2string(parse_tree.parsed_class.name) + "." +
1205 id2string(method.name) + ":" + method.descriptor);
1206
1207 irep_idt line_number;
1208
1209 // add missing line numbers
1210 for(auto &instruction : method.instructions)
1211 {
1212 if(!instruction.source_location.get_line().empty())
1213 line_number = instruction.source_location.get_line();
1214 else if(!line_number.empty())
1215 instruction.source_location.set_line(line_number);
1216 instruction.source_location.set_function(
1217 method.source_location.get_function());
1218 }
1219
1220 // line number of method (the first line number available)
1221 const auto it = std::find_if(
1222 method.instructions.begin(),
1223 method.instructions.end(),
1224 [&](const instructiont &instruction) {
1225 return !instruction.source_location.get_line().empty();
1226 });
1227 if(it != method.instructions.end())
1228 method.source_location.set_line(it->source_location.get_line());
1229 }
1230 else if(attribute_name=="Signature")
1231 {
1232 const u2 signature_index = read<u2>();
1234 }
1235 else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1236 attribute_name=="RuntimeVisibleAnnotations")
1237 {
1239 }
1240 else if(
1241 attribute_name == "RuntimeInvisibleParameterAnnotations" ||
1242 attribute_name == "RuntimeVisibleParameterAnnotations")
1243 {
1244 const u1 parameter_count = read<u1>();
1245 // There may be attributes for both runtime-visible and runtime-invisible
1246 // annotations, the length of either array may be longer than the other as
1247 // trailing parameters without annotations are omitted.
1248 // Extend our parameter_annotations if this one is longer than the one
1249 // previously recorded (if any).
1250 if(method.parameter_annotations.size() < parameter_count)
1254 }
1255 else if(attribute_name == "Exceptions")
1256 {
1258 }
1259 else
1261}
1262
1264{
1266 const u4 attribute_length = read<u4>();
1267
1269
1270 if(attribute_name=="Signature")
1271 {
1272 const u2 signature_index = read<u2>();
1274 }
1275 else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1276 attribute_name=="RuntimeVisibleAnnotations")
1277 {
1279 }
1280 else
1282}
1283
1285{
1287 const u4 attribute_length = read<u4>();
1288
1290
1291 if(attribute_name=="LineNumberTable")
1292 {
1293 std::map<unsigned, std::reference_wrapper<instructiont>> instruction_map;
1294 for(auto &instruction : method.instructions)
1295 instruction_map.emplace(instruction.address, instruction);
1296
1298
1299 for(std::size_t i=0; i<line_number_table_length; i++)
1300 {
1301 const u2 start_pc = read<u2>();
1302 const u2 line_number = read<u2>();
1303
1304 // annotate the bytecode program
1305 auto it = instruction_map.find(start_pc);
1306
1307 if(it!=instruction_map.end())
1308 it->second.get().source_location.set_line(line_number);
1309 }
1310 }
1311 else if(attribute_name=="LocalVariableTable")
1312 {
1314
1316
1317 for(std::size_t i=0; i<local_variable_table_length; i++)
1318 {
1319 const u2 start_pc = read<u2>();
1320 const u2 length = read<u2>();
1321 const u2 name_index = read<u2>();
1322 const u2 descriptor_index = read<u2>();
1323 const u2 index = read<u2>();
1324
1325 method.local_variable_table[i].index=index;
1326 method.local_variable_table[i].name=pool_entry(name_index).s;
1327 method.local_variable_table[i].descriptor=
1328 id2string(pool_entry(descriptor_index).s);
1329 method.local_variable_table[i].start_pc=start_pc;
1330 method.local_variable_table[i].length=length;
1331 }
1332 }
1333 else if(attribute_name=="LocalVariableTypeTable")
1334 {
1336 }
1337 else if(attribute_name=="StackMapTable")
1338 {
1339 const u2 stack_map_entries = read<u2>();
1340
1341 method.stack_map_table.resize(stack_map_entries);
1342
1343 for(size_t i=0; i<stack_map_entries; i++)
1344 {
1345 const u1 frame_type = read<u1>();
1346 if(frame_type<=63)
1347 {
1348 method.stack_map_table[i].type=methodt::stack_map_table_entryt::SAME;
1349 method.stack_map_table[i].locals.resize(0);
1350 method.stack_map_table[i].stack.resize(0);
1351 }
1352 else if(64<=frame_type && frame_type<=127)
1353 {
1354 method.stack_map_table[i].type=
1355 methodt::stack_map_table_entryt::SAME_LOCALS_ONE_STACK;
1356 method.stack_map_table[i].locals.resize(0);
1357 method.stack_map_table[i].stack.resize(1);
1360 method.stack_map_table[i].stack[0]=verification_type_info;
1361 }
1362 else if(frame_type==247)
1363 {
1364 method.stack_map_table[i].type=
1365 methodt::stack_map_table_entryt::SAME_LOCALS_ONE_STACK_EXTENDED;
1366 method.stack_map_table[i].locals.resize(0);
1367 method.stack_map_table[i].stack.resize(1);
1369 const u2 offset_delta = read<u2>();
1371 method.stack_map_table[i].stack[0]=verification_type_info;
1372 method.stack_map_table[i].offset_delta=offset_delta;
1373 }
1374 else if(248<=frame_type && frame_type<=250)
1375 {
1376 method.stack_map_table[i].type=methodt::stack_map_table_entryt::CHOP;
1377 method.stack_map_table[i].locals.resize(0);
1378 method.stack_map_table[i].stack.resize(0);
1379 const u2 offset_delta = read<u2>();
1380 method.stack_map_table[i].offset_delta=offset_delta;
1381 }
1382 else if(frame_type==251)
1383 {
1384 method.stack_map_table[i].type
1385 =methodt::stack_map_table_entryt::SAME_EXTENDED;
1386 method.stack_map_table[i].locals.resize(0);
1387 method.stack_map_table[i].stack.resize(0);
1388 const u2 offset_delta = read<u2>();
1389 method.stack_map_table[i].offset_delta=offset_delta;
1390 }
1391 else if(252<=frame_type && frame_type<=254)
1392 {
1393 size_t new_locals = frame_type - 251;
1394 method.stack_map_table[i].type=methodt::stack_map_table_entryt::APPEND;
1395 method.stack_map_table[i].locals.resize(new_locals);
1396 method.stack_map_table[i].stack.resize(0);
1397 const u2 offset_delta = read<u2>();
1398 method.stack_map_table[i].offset_delta=offset_delta;
1399 for(size_t k=0; k<new_locals; k++)
1400 {
1401 method.stack_map_table[i].locals
1404 method.stack_map_table[i].locals.back();
1406 }
1407 }
1408 else if(frame_type==255)
1409 {
1410 method.stack_map_table[i].type=methodt::stack_map_table_entryt::FULL;
1411 const u2 offset_delta = read<u2>();
1412 method.stack_map_table[i].offset_delta=offset_delta;
1413 const u2 number_locals = read<u2>();
1414 method.stack_map_table[i].locals.resize(number_locals);
1415 for(size_t k=0; k<(size_t) number_locals; k++)
1416 {
1417 method.stack_map_table[i].locals
1420 method.stack_map_table[i].locals.back();
1422 }
1423 const u2 number_stack_items = read<u2>();
1424 method.stack_map_table[i].stack.resize(number_stack_items);
1425 for(size_t k=0; k<(size_t) number_stack_items; k++)
1426 {
1427 method.stack_map_table[i].stack
1430 method.stack_map_table[i].stack.back();
1432 }
1433 }
1434 else
1435 throw "error: unknown stack frame type encountered";
1436 }
1437 }
1438 else
1440}
1441
1444{
1445 const u1 tag = read<u1>();
1446 switch(tag)
1447 {
1448 case VTYPE_INFO_TOP:
1449 v.type=methodt::verification_type_infot::TOP;
1450 break;
1451 case VTYPE_INFO_INTEGER:
1452 v.type=methodt::verification_type_infot::INTEGER;
1453 break;
1454 case VTYPE_INFO_FLOAT:
1455 v.type=methodt::verification_type_infot::FLOAT;
1456 break;
1457 case VTYPE_INFO_LONG:
1458 v.type=methodt::verification_type_infot::LONG;
1459 break;
1460 case VTYPE_INFO_DOUBLE:
1461 v.type=methodt::verification_type_infot::DOUBLE;
1462 break;
1464 v.type=methodt::verification_type_infot::ITEM_NULL;
1465 break;
1467 v.type=methodt::verification_type_infot::UNINITIALIZED_THIS;
1468 break;
1469 case VTYPE_INFO_OBJECT:
1470 v.type=methodt::verification_type_infot::OBJECT;
1471 v.cpool_index = read<u2>();
1472 break;
1473 case VTYPE_INFO_UNINIT:
1474 v.type=methodt::verification_type_infot::UNINITIALIZED;
1475 v.offset = read<u2>();
1476 break;
1477 default:
1478 throw "error: unknown verification type info encountered";
1479 }
1480}
1481
1483 std::vector<annotationt> &annotations)
1484{
1485 const u2 num_annotations = read<u2>();
1486
1487 for(u2 number=0; number<num_annotations; number++)
1488 {
1489 annotationt annotation;
1490 rRuntimeAnnotation(annotation);
1491 annotations.push_back(annotation);
1492 }
1493}
1494
1496 annotationt &annotation)
1497{
1498 const u2 type_index = read<u2>();
1499 annotation.type=type_entry(type_index);
1501}
1502
1504 annotationt::element_value_pairst &element_value_pairs)
1505{
1507 element_value_pairs.resize(num_element_value_pairs);
1508
1509 for(auto &element_value_pair : element_value_pairs)
1510 {
1511 const u2 element_name_index = read<u2>();
1514 }
1515}
1516
1524{
1525 const u1 tag = read<u1>();
1526
1527 switch(tag)
1528 {
1529 case 'e':
1530 {
1533 // todo: enum
1534 return exprt();
1535 }
1536
1537 case 'c':
1538 {
1539 const u2 class_info_index = read<u2>();
1541 }
1542
1543 case '@':
1544 {
1545 // TODO: return this wrapped in an exprt
1546 // another annotation, recursively
1547 annotationt annotation;
1548 rRuntimeAnnotation(annotation);
1549 return exprt();
1550 }
1551
1552 case '[':
1553 {
1554 const u2 num_values = read<u2>();
1555 exprt::operandst values;
1556 values.reserve(num_values);
1557 for(std::size_t i=0; i<num_values; i++)
1558 {
1559 values.push_back(get_relement_value());
1560 }
1561 return array_exprt(std::move(values), array_typet(typet(), exprt()));
1562 }
1563
1564 case 's':
1565 {
1566 const u2 const_value_index = read<u2>();
1568 }
1569
1570 default:
1571 {
1572 const u2 const_value_index = read<u2>();
1574 }
1575 }
1576}
1577
1590 const u4 &attribute_length)
1591{
1592 classt &parsed_class = parse_tree.parsed_class;
1593 std::string name = parsed_class.name.c_str();
1594 const u2 number_of_classes = read<u2>();
1596 INVARIANT(
1598 "The number of bytes to be read for the InnerClasses attribute does not "
1599 "match the attribute length.");
1600
1601 const auto pool_entry_lambda = [this](u2 index) -> pool_entryt & {
1602 return pool_entry(index);
1603 };
1604 const auto remove_separator_char = [](std::string str, char ch) {
1605 str.erase(std::remove(str.begin(), str.end(), ch), str.end());
1606 return str;
1607 };
1608
1609 for(int i = 0; i < number_of_classes; i++)
1610 {
1613 const u2 inner_name_index = read<u2>();
1615
1616 std::string inner_class_info_name =
1619 bool is_private = (inner_class_access_flags & ACC_PRIVATE) != 0;
1620 bool is_public = (inner_class_access_flags & ACC_PUBLIC) != 0;
1621 bool is_protected = (inner_class_access_flags & ACC_PROTECTED) != 0;
1622 bool is_static = (inner_class_access_flags & ACC_STATIC) != 0;
1623
1624 // If the original parsed class name matches the inner class name,
1625 // the parsed class is an inner class, so overwrite the parsed class'
1626 // access information and mark it as an inner class.
1627 bool is_inner_class = remove_separator_char(id2string(parsed_class.name), '.') ==
1629 if(!is_inner_class)
1630 continue;
1631 parsed_class.is_inner_class = is_inner_class;
1632 parsed_class.is_static_class = is_static;
1633 // This is a marker that a class is anonymous.
1634 if(inner_name_index == 0)
1635 parsed_class.is_anonymous_class = true;
1636 else
1638 // Note that if outer_class_info_index == 0, the inner class is an anonymous
1639 // or local class, and is treated as private.
1640 if(outer_class_info_index == 0)
1641 {
1642 parsed_class.is_private = true;
1643 parsed_class.is_protected = false;
1644 parsed_class.is_public = false;
1645 }
1646 else
1647 {
1648 std::string outer_class_info_name =
1651 parsed_class.outer_class =
1653 parsed_class.is_private = is_private;
1654 parsed_class.is_protected = is_protected;
1655 parsed_class.is_public = is_public;
1656 }
1657 }
1658}
1659
1666{
1668
1669 std::vector<irep_idt> exceptions;
1670 for(size_t i = 0; i < number_of_exceptions; i++)
1671 {
1673 const irep_idt exception_name =
1675 exceptions.push_back(exception_name);
1676 }
1677 return exceptions;
1678}
1679
1681{
1682 classt &parsed_class = parse_tree.parsed_class;
1683
1685 const u4 attribute_length = read<u4>();
1686
1688
1689 if(attribute_name=="SourceFile")
1690 {
1691 const u2 sourcefile_index = read<u2>();
1693
1694 const std::string &fqn(id2string(parsed_class.name));
1695 size_t last_index = fqn.find_last_of('.');
1696 if(last_index==std::string::npos)
1698 else
1699 {
1700 std::string package_name=fqn.substr(0, last_index+1);
1701 std::replace(package_name.begin(), package_name.end(), '.', '/');
1702 const std::string &full_file_name=
1705 }
1706
1707 for(auto &method : parsed_class.methods)
1708 {
1709 method.source_location.set_file(sourcefile_name);
1710 for(auto &instruction : method.instructions)
1711 {
1712 if(!instruction.source_location.get_line().empty())
1713 instruction.source_location.set_file(sourcefile_name);
1714 }
1715 }
1716 }
1717 else if(attribute_name=="Signature")
1718 {
1719 const u2 signature_index = read<u2>();
1722 parsed_class.signature.value(),
1724 }
1725 else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1726 attribute_name=="RuntimeVisibleAnnotations")
1727 {
1729 }
1730 else if(attribute_name == "BootstrapMethods")
1731 {
1732 // for this attribute
1733 // cf. https://docs.oracle.com/javase/specs/jvms/se8/html/jvms-4.html#jvms-4.7.23
1734 INVARIANT(
1735 !parsed_class.attribute_bootstrapmethods_read,
1736 "only one BootstrapMethods argument is allowed in a class file");
1737
1738 // mark as read in parsed class
1739 parsed_class.attribute_bootstrapmethods_read = true;
1741 }
1742 else if(attribute_name == "InnerClasses")
1743 {
1745 }
1746 else
1748}
1749
1751{
1752 const u2 methods_count = read<u2>();
1753
1754 for(std::size_t j=0; j<methods_count; j++)
1755 rmethod();
1756}
1757
1758#define ACC_PUBLIC 0x0001u
1759#define ACC_PRIVATE 0x0002u
1760#define ACC_PROTECTED 0x0004u
1761#define ACC_STATIC 0x0008u
1762#define ACC_FINAL 0x0010u
1763#define ACC_VARARGS 0x0080u
1764#define ACC_SUPER 0x0020u
1765#define ACC_VOLATILE 0x0040u
1766#define ACC_TRANSIENT 0x0080u
1767#define ACC_INTERFACE 0x0200u
1768#define ACC_ABSTRACT 0x0400u
1769#define ACC_SYNTHETIC 0x1000u
1770#define ACC_ANNOTATION 0x2000u
1771#define ACC_ENUM 0x4000u
1772
1774{
1776
1777 const u2 access_flags = read<u2>();
1778 const u2 name_index = read<u2>();
1779 const u2 descriptor_index = read<u2>();
1780
1781 method.is_final=(access_flags&ACC_FINAL)!=0;
1782 method.is_static=(access_flags&ACC_STATIC)!=0;
1784 method.is_public=(access_flags&ACC_PUBLIC)!=0;
1788 method.is_native=(access_flags&ACC_NATIVE)!=0;
1789 method.is_bridge = (access_flags & ACC_BRIDGE) != 0;
1790 method.is_varargs = (access_flags & ACC_VARARGS) != 0;
1791 method.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0;
1792 method.name=pool_entry(name_index).s;
1793 method.base_name=pool_entry(name_index).s;
1794 method.descriptor=id2string(pool_entry(descriptor_index).s);
1795
1796 const auto flags = (method.is_public ? 1 : 0) +
1797 (method.is_protected?1:0)+
1798 (method.is_private?1:0);
1799 DATA_INVARIANT(flags<=1, "at most one of public, protected, private");
1800 const u2 attributes_count = read<u2>();
1801
1802 for(std::size_t j=0; j<attributes_count; j++)
1803 rmethod_attribute(method);
1804}
1805
1807 std::istream &istream,
1808 const irep_idt &class_name,
1809 message_handlert &message_handler,
1810 bool skip_instructions)
1811{
1813 java_bytecode_parser.in=&istream;
1814 java_bytecode_parser.log.set_message_handler(message_handler);
1815
1817
1818 if(
1819 parser_result ||
1820 java_bytecode_parser.parse_tree.parsed_class.name != class_name)
1821 {
1822 return {};
1823 }
1824
1825 return std::move(java_bytecode_parser.parse_tree);
1826}
1827
1829 const std::string &file,
1830 const irep_idt &class_name,
1831 message_handlert &message_handler,
1832 bool skip_instructions)
1833{
1834 std::ifstream in(file, std::ios::binary);
1835
1836 if(!in)
1837 {
1838 return {};
1839 }
1840
1841 return java_bytecode_parse(
1842 in, class_name, message_handler, skip_instructions);
1843}
1844
1849{
1851
1852 INVARIANT(
1854 "Local variable type table cannot have more elements "
1855 "than the local variable table.");
1856 for(std::size_t i=0; i<local_variable_type_table_length; i++)
1857 {
1858 const u2 start_pc = read<u2>();
1859 const u2 length = read<u2>();
1860 const u2 name_index = read<u2>();
1861 const u2 signature_index = read<u2>();
1862 const u2 index = read<u2>();
1863
1864 bool found=false;
1865 for(auto &lvar : method.local_variable_table)
1866 {
1867 // compare to entry in LVT
1868 if(lvar.index==index &&
1869 lvar.name==pool_entry(name_index).s &&
1870 lvar.start_pc==start_pc &&
1871 lvar.length==length)
1872 {
1873 found=true;
1875 break;
1876 }
1877 }
1878 INVARIANT(
1879 found,
1880 "Entry in LocalVariableTypeTable must be present in LVT");
1881 }
1882}
1883
1911
1919{
1920 const std::function<pool_entryt &(u2)> pool_entry_lambda =
1921 [this](u2 index) -> pool_entryt & { return pool_entry(index); };
1922
1924
1927 ref_entry.get_name_and_type(pool_entry_lambda);
1928
1929 // The following lambda kinds specified in the JVMS (see for example
1930 // https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-5.html#jvms-5.4.3.5
1931 // but which aren't actually created by javac. Java has no syntax for a field-
1932 // refernce like this, and even writing a stereotypical lambda like
1933 // Producer<FieldType> = instance -> instance.field does not generate this
1934 // kind of handle, but rather a synthetic method implementing the getfield
1935 // operation.
1936 // We don't implement a handle kind that can't be produced yet, but should
1937 // they ever turn up this is where to fix them.
1938
1939 if(
1940 entry.get_handle_kind() ==
1942 entry.get_handle_kind() ==
1944 entry.get_handle_kind() ==
1946 entry.get_handle_kind() ==
1948 {
1949 return {};
1950 }
1951
1952 irep_idt class_name =
1954
1956 std::string descriptor = name_and_type.get_descriptor(pool_entry_lambda);
1957 irep_idt mangled_method_name = id2string(method_name) + ":" + descriptor;
1959
1960 method_handle_typet handle_type =
1962
1963 class_method_descriptor_exprt method_descriptor{
1964 method_type, mangled_method_name, class_name, method_name};
1965 lambda_method_handlet lambda_method_handle{method_descriptor, handle_type};
1966
1967 return lambda_method_handle;
1968}
1969
1972{
1974 for(size_t bootstrap_method_index = 0;
1977 {
1980
1982
1984 log.debug() << "INFO: parse BootstrapMethod handle "
1985 << num_bootstrap_arguments << " #args" << messaget::eom;
1986
1987 // read u2 values of entry into vector
1988 std::vector<u2> u2_values(num_bootstrap_arguments);
1989 for(size_t i = 0; i < num_bootstrap_arguments; i++)
1990 u2_values[i] = read<u2>();
1991
1992 // try parsing bootstrap method handle
1993 // each entry contains a MethodHandle structure
1994 // u2 tag
1995 // u2 reference kind which must be in the range from 1 to 9
1996 // u2 reference index into the constant pool
1997 //
1998 // reference kinds use the following
1999 // 1 to 4 must point to a CONSTANT_Fieldref structure
2000 // 5 or 8 must point to a CONSTANT_Methodref structure
2001 // 6 or 7 must point to a CONSTANT_Methodref or
2002 // CONSTANT_InterfaceMethodref structure, if the class file version
2003 // number is 52.0 or above, to a CONSTANT_Methodref only in the case
2004 // of less than 52.0
2005 // 9 must point to a CONSTANT_InterfaceMethodref structure
2006
2007 // the index must point to a CONSTANT_String
2008 // CONSTANT_Class
2009 // CONSTANT_Integer
2010 // CONSTANT_Long
2011 // CONSTANT_Float
2012 // CONSTANT_Double
2013 // CONSTANT_MethodHandle
2014 // CONSTANT_MethodType
2015
2016 // We read the three arguments here to see whether they correspond to
2017 // our hypotheses for this being a lambda function entry.
2018
2019 // Need at least 3 arguments, the interface type, the method hanlde
2020 // and the method_type, otherwise it doesn't look like a call that we
2021 // understand
2023 {
2025 log.debug()
2026 << "format of BootstrapMethods entry not recognized: too few arguments"
2027 << messaget::eom;
2028 continue;
2029 }
2030
2034
2035 // The additional arguments for the altmetafactory call are skipped,
2036 // as they are currently not used. We verify though that they are of
2037 // CONSTANT_Integer type, cases where this does not hold will be
2038 // analyzed further.
2039 bool recognized = true;
2040 for(size_t i = 3; i < num_bootstrap_arguments; i++)
2041 {
2044 }
2045
2046 if(!recognized)
2047 {
2048 log.debug() << "format of BootstrapMethods entry not recognized: extra "
2049 "arguments of wrong type"
2050 << messaget::eom;
2052 continue;
2053 }
2054
2059
2060 if(
2064 {
2065 log.debug()
2066 << "format of BootstrapMethods entry not recognized: arguments "
2067 "wrong type"
2068 << messaget::eom;
2070 continue;
2071 }
2072
2073 log.debug() << "INFO: parse lambda handle" << messaget::eom;
2076
2077 if(!lambda_method_handle.has_value())
2078 {
2079 log.debug() << "format of BootstrapMethods entry not recognized: method "
2080 "handle not recognised"
2081 << messaget::eom;
2083 continue;
2084 }
2085
2086 // If parse_method_handle can't parse the lambda method, it should return {}
2088 lambda_method_handle->handle_type != method_handle_typet::UNKNOWN_HANDLE);
2089
2090 log.debug()
2091 << "lambda function reference "
2092 << id2string(
2093 lambda_method_handle->get_method_descriptor().base_method_name())
2094 << " in class \"" << parse_tree.parsed_class.name << "\""
2095 << "\n interface type is "
2097 << "\n method type is "
2101 }
2102}
2103
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
struct bytecode_infot const bytecode_info[]
#define BC_wide
uint16_t u2
uint32_t u4
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:563
Array constructor from list of elements.
Definition std_expr.h:1563
Arrays with given size.
Definition std_types.h:763
class_infot get_class(const pool_entry_lookupt &pool_entry) const
name_and_type_infot get_name_and_type(const pool_entry_lookupt &pool_entry) const
u2 get_name_and_type_index() const
base_ref_infot(const pool_entryt &entry)
Corresponds to the CONSTANT_Class_info Structure Described in Java 8 specification 4....
class_infot(const pool_entryt &entry)
std::string get_name(const pool_entry_lookupt &pool_entry) const
An expression describing a method on a class.
Definition std_expr.h:3448
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:39
bool empty() const
Definition dstring.h:90
const char * c_str() const
Definition dstring.h:117
Base class for all expressions.
Definition expr.h:56
std::vector< exprt > operandst
Definition expr.h:58
typet & type()
Return the type of the expression.
Definition expr.h:84
const irep_idt & get(const irep_idt &name) const
Definition irep.cpp:44
const irep_idt & id() const
Definition irep.h:396
pool_entryt & pool_entry(u2 index)
void get_annotation_class_refs(const std::vector< annotationt > &annotations)
For each of the given annotations, get a reference to its class and recursively get class references ...
java_bytecode_parsert(bool skip_instructions)
void store_unknown_method_handle(size_t bootstrap_method_index)
Creates an unknown method handle and puts it into the parsed_class.
exprt get_relement_value()
Corresponds to the element_value structure Described in Java 8 specification 4.7.16....
java_bytecode_parse_treet::annotationt annotationt
java_bytecode_parse_treet::methodt methodt
void rRuntimeAnnotation_attribute(std::vector< annotationt > &)
const typet type_entry(u2 index)
optionalt< lambda_method_handlet > parse_method_handle(const class method_handle_infot &entry)
Read method handle pointed to from constant pool entry at index, return type of method handle and nam...
void rbytecode(std::vector< instructiont > &)
void rcode_attribute(methodt &method)
void read_bootstrapmethods_entry()
Read all entries of the BootstrapMethods attribute of a class file.
void get_class_refs()
Get the class references for the benefit of a dependency analysis.
void get_annotation_value_class_refs(const exprt &value)
See java_bytecode_parsert::get_annotation_class_refs.
java_bytecode_parse_treet parse_tree
std::vector< pool_entryt > constant_poolt
void skip_bytes(std::size_t bytes)
void read_verification_type_info(methodt::verification_type_infot &)
java_bytecode_parse_treet::fieldt fieldt
void get_class_refs_rec(const typet &)
void parse_local_variable_type_table(methodt &method)
Parses the local variable type table of a method.
void rRuntimeAnnotation(annotationt &)
void relement_value_pairs(annotationt::element_value_pairst &)
void rinner_classes_attribute(const u4 &attribute_length)
Corresponds to the element_value structure Described in Java 8 specification 4.7.6 https://docs....
std::vector< irep_idt > rexceptions_attribute()
Corresponds to the element_value structure Described in Java 8 specification 4.7.5 https://docs....
void rmethod_attribute(methodt &method)
method_handle_kindt
Indicates what sort of code should be synthesised for a lambda call:
Definition java_types.h:465
@ LAMBDA_STATIC_METHOD_HANDLE
Direct call to the given method.
@ LAMBDA_VIRTUAL_METHOD_HANDLE
Virtual call to the given interface or method.
@ LAMBDA_CONSTRUCTOR_HANDLE
Instantiate the needed type then call a constructor.
mstreamt & error() const
Definition message.h:399
mstreamt & debug() const
Definition message.h:429
static eomt eom
Definition message.h:297
method_handle_infot(const pool_entryt &entry)
base_ref_infot get_reference(const pool_entry_lookupt &pool_entry) const
method_handle_kindt handle_kind
method_handle_kindt get_handle_kind() const
method_handle_kindt
Correspond to the different valid values for field handle_kind From Java 8 spec 4....
Corresponds to the CONSTANT_NameAndType_info Structure Described in Java 8 specification 4....
name_and_type_infot(const pool_entryt &entry)
std::string get_descriptor(const pool_entry_lookupt &pool_entry) const
std::string get_name(const pool_entry_lookupt &pool_entry) const
std::istream * in
Definition parser.h:26
messaget log
Definition parser.h:136
Fixed-width bit-vector with two's complement interpretation.
const irep_idt & get_function() const
void set_java_bytecode_index(const irep_idt &index)
void set_line(const irep_idt &line)
void set_function(const irep_idt &function)
A struct tag type, i.e., struct_typet with an identifier.
Definition std_types.h:449
Structure type, corresponds to C style structs.
Definition std_types.h:231
std::function< pool_entryt &(u2)> pool_entry_lookupt
static std::string read_utf8_constant(const pool_entryt &entry)
structured_pool_entryt(const pool_entryt &entry)
static symbol_exprt typeless(const irep_idt &id)
Generate a symbol_exprt without a proper type.
Definition std_expr.h:132
const irep_idt & get_identifier() const
Definition std_types.h:410
An expression denoting a type.
Definition std_expr.h:2906
The type of an expression, extends irept.
Definition type.h:29
Fixed-width bit-vector with unsigned binary interpretation.
static format_containert< T > format(const T &o)
Definition format.h:37
const std::string & id2string(const irep_idt &d)
Definition irep.h:47
#define CONSTANT_Fieldref
#define CONSTANT_String
#define VTYPE_INFO_DOUBLE
#define UNUSED_u2(x)
#define ACC_PUBLIC
#define CONSTANT_InvokeDynamic
#define VTYPE_INFO_FLOAT
#define ACC_BRIDGE
#define ACC_NATIVE
#define T_SHORT
#define ACC_ENUM
#define CONSTANT_Methodref
#define T_LONG
#define T_FLOAT
#define ACC_VARARGS
#define CONSTANT_Long
#define T_INT
#define VTYPE_INFO_LONG
#define VTYPE_INFO_TOP
#define CONSTANT_NameAndType
#define ACC_ABSTRACT
#define VTYPE_INFO_OBJECT
#define ACC_ANNOTATION
optionalt< java_bytecode_parse_treet > java_bytecode_parse(std::istream &istream, const irep_idt &class_name, message_handlert &message_handler, bool skip_instructions)
Attempt to parse a Java class from the given stream.
#define T_DOUBLE
#define ACC_SYNCHRONIZED
#define T_BYTE
#define CONSTANT_Class
#define ACC_STATIC
#define CONSTANT_Integer
#define CONSTANT_MethodType
#define VTYPE_INFO_INTEGER
static java_class_typet::method_handle_kindt get_method_handle_type(method_handle_infot::method_handle_kindt java_handle_kind)
Translate the lambda method reference kind in a class file into the kind of handling the method requi...
#define ACC_PROTECTED
#define CONSTANT_Double
#define ACC_PRIVATE
#define CONSTANT_Float
#define VTYPE_INFO_UNINIT
#define CONSTANT_InterfaceMethodref
#define T_BOOLEAN
#define ACC_INTERFACE
#define ACC_SYNTHETIC
#define VTYPE_INFO_UNINIT_THIS
#define ACC_FINAL
#define CONSTANT_MethodHandle
#define T_CHAR
#define VTYPE_INFO_ITEM_NULL
#define CONSTANT_Utf8
Representation of a constant Java string.
void get_dependencies_from_generic_parameters(const std::string &signature, std::set< irep_idt > &refs)
Collect information about generic type parameters from a given signature.
optionalt< typet > java_type_from_string(const std::string &src, const std::string &class_name_prefix)
Transforms a string representation of a Java type into an internal type representation thereof.
struct_tag_typet java_classname(const std::string &id)
bool is_java_array_tag(const irep_idt &tag)
See above.
const typet & java_array_element_type(const struct_tag_typet &array_symbol)
Return a const reference to the element type of a given java array type.
const java_method_typet & to_java_method_type(const typet &type)
Definition java_types.h:184
optionalt< typet > java_type_from_string_with_exception(const std::string &descriptor, const optionalt< std::string > &signature, const std::string &class_name)
static optionalt< java_class_typet::java_lambda_method_handlet > lambda_method_handle(const symbol_table_baset &symbol_table, const irep_idt &method_identifier, const java_method_typet &dynamic_method_type)
Parser utilities.
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition invariant.h:534
#define PRECONDITION(CONDITION)
Definition invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
#define POSTCONDITION(CONDITION)
Definition invariant.h:479
API to expression classes.
const struct_typet & to_struct_type(const typet &type)
Cast a typet to a struct_typet.
Definition std_types.h:308
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition std_types.h:474
Definition kdev_t.h:19
std::vector< element_value_pairt > element_value_pairst
void add_method_handle(size_t bootstrap_index, const lambda_method_handlet &handle)
std::vector< annotationst > parameter_annotations
Java annotations that were applied to parameters of this method.