cprover
java_bytecode_parser.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
9 #include "java_bytecode_parser.h"
10 
11 #include <algorithm>
12 #include <fstream>
13 #include <map>
14 #include <string>
15 
16 #include <util/parser.h>
17 #include <util/std_expr.h>
18 #include <util/arith_tools.h>
19 #include <util/ieee_float.h>
20 #include <util/prefix.h>
21 
22 #include <ansi-c/string_constant.h>
23 
25 #include "java_types.h"
26 #include "bytecode_info.h"
27 
28 #ifdef DEBUG
29 #include <iostream>
30 #endif
31 
33 {
34 public:
36  {
37  get_bytecodes();
38  }
39 
40  virtual bool parse();
41 
51 
53 
54  struct pool_entryt
55  {
61  pool_entryt():tag(0), ref1(0), ref2(0), number(0) { }
62  };
63 
64  typedef std::vector<pool_entryt> constant_poolt;
65  constant_poolt constant_pool;
66 
67 protected:
68  class bytecodet
69  {
70  public:
72  char format;
73  };
74 
75  std::vector<bytecodet> bytecodes;
76 
78  {
79  if(index==0 || index>=constant_pool.size())
80  {
81  error() << "invalid constant pool index (" << index << ")" << eom;
82  throw 0;
83  }
84 
85  return constant_pool[index];
86  }
87 
88  exprt &constant(u2 index)
89  {
90  return pool_entry(index).expr;
91  }
92 
93  const typet type_entry(u2 index)
94  {
96  }
97 
99  {
100  // pre-hash the mnemonics, so we do this only once
101  bytecodes.resize(256);
102  for(const bytecode_infot *p=bytecode_info; p->mnemonic!=nullptr; p++)
103  {
104  assert(p->opcode<bytecodes.size());
105  bytecodes[p->opcode].mnemonic=p->mnemonic;
106  bytecodes[p->opcode].format=p->format;
107  }
108  }
109 
110  void rClassFile();
111  void rconstant_pool();
112  void rinterfaces(classt &parsed_class);
113  void rfields(classt &parsed_class);
114  void rmethods(classt &parsed_class);
115  void rmethod(classt &parsed_class);
116  void rclass_attribute(classt &parsed_class);
117  void rRuntimeAnnotation_attribute(annotationst &);
118  void rRuntimeAnnotation(annotationt &);
121  void rmethod_attribute(methodt &method);
122  void rfield_attribute(fieldt &);
123  void rcode_attribute(methodt &method);
126  void get_class_refs();
127  void get_class_refs_rec(const typet &);
128 
129  void skip_bytes(std::size_t bytes)
130  {
131  for(std::size_t i=0; i<bytes; i++)
132  {
133  if(!*in)
134  {
135  error() << "unexpected end of bytecode file" << eom;
136  throw 0;
137  }
138  in->get();
139  }
140  }
141 
142  u8 read_bytes(size_t bytes)
143  {
144  u8 result=0;
145  for(size_t i=0; i<bytes; i++)
146  {
147  if(!*in)
148  {
149  error() << "unexpected end of bytecode file" << eom;
150  throw 0;
151  }
152  result<<=8;
153  result|=in->get();
154  }
155  return result;
156  }
157 
159  {
160  return (u1)read_bytes(1);
161  }
162 
163  inline u2 read_u2()
164  {
165  return (u2)read_bytes(2);
166  }
167 
169  {
170  return (u4)read_bytes(4);
171  }
172 
174  {
175  return read_bytes(8);
176  }
177 };
178 
179 #define CONSTANT_Class 7
180 #define CONSTANT_Fieldref 9
181 #define CONSTANT_Methodref 10
182 #define CONSTANT_InterfaceMethodref 11
183 #define CONSTANT_String 8
184 #define CONSTANT_Integer 3
185 #define CONSTANT_Float 4
186 #define CONSTANT_Long 5
187 #define CONSTANT_Double 6
188 #define CONSTANT_NameAndType 12
189 #define CONSTANT_Utf8 1
190 #define CONSTANT_MethodHandle 15
191 #define CONSTANT_MethodType 16
192 #define CONSTANT_InvokeDynamic 18
193 
194 #define VTYPE_INFO_TOP 0
195 #define VTYPE_INFO_INTEGER 1
196 #define VTYPE_INFO_FLOAT 2
197 #define VTYPE_INFO_LONG 3
198 #define VTYPE_INFO_DOUBLE 4
199 #define VTYPE_INFO_ITEM_NULL 5
200 #define VTYPE_INFO_UNINIT_THIS 6
201 #define VTYPE_INFO_OBJECT 7
202 #define VTYPE_INFO_UNINIT 8
203 
205 {
206  try
207  {
208  rClassFile();
209  }
210 
211  catch(const char *message)
212  {
213  error() << message << eom;
214  return true;
215  }
216 
217  catch(const std::string &message)
218  {
219  error() << message << eom;
220  return true;
221  }
222 
223  catch(...)
224  {
225  error() << "parsing error" << eom;
226  return true;
227  }
228 
229  return false;
230 }
231 
232 #define ACC_PUBLIC 0x0001
233 #define ACC_PRIVATE 0x0002
234 #define ACC_PROTECTED 0x0004
235 #define ACC_STATIC 0x0008
236 #define ACC_FINAL 0x0010
237 #define ACC_SYNCHRONIZED 0x0020
238 #define ACC_BRIDGE 0x0040
239 #define ACC_VARARGS 0x0080
240 #define ACC_NATIVE 0x0100
241 #define ACC_ABSTRACT 0x0400
242 #define ACC_STRICT 0x0800
243 #define ACC_SYNTHETIC 0x1000
244 #define ACC_ENUM 0x4000
245 
246 #ifdef _MSC_VER
247 #define UNUSED
248 #else
249 #define UNUSED __attribute__((unused))
250 #endif
251 
253 {
255 
256  u4 magic=read_u4();
257  u2 UNUSED minor_version=read_u2();
258  u2 major_version=read_u2();
259 
260  if(magic!=0xCAFEBABE)
261  {
262  error() << "wrong magic" << eom;
263  throw 0;
264  }
265 
266  if(major_version<44)
267  {
268  error() << "unexpected major version" << eom;
269  throw 0;
270  }
271 
272  rconstant_pool();
273 
274  classt &parsed_class=parse_tree.parsed_class;
275 
276  u2 access_flags=read_u2();
277  u2 this_class=read_u2();
278  u2 super_class=read_u2();
279 
280  parsed_class.is_abstract=(access_flags&ACC_ABSTRACT)!=0;
281  parsed_class.is_enum=(access_flags&ACC_ENUM)!=0;
282  parsed_class.name=
283  constant(this_class).type().get(ID_C_base_name);
284 
285  if(super_class!=0)
286  parsed_class.extends=
287  constant(super_class).type().get(ID_C_base_name);
288 
289  rinterfaces(parsed_class);
290  rfields(parsed_class);
291  rmethods(parsed_class);
292 
293  // count elements of enum
294  if(parsed_class.is_enum)
295  for(fieldt &field : parse_tree.parsed_class.fields)
296  if(field.is_enum)
298 
299  u2 attributes_count=read_u2();
300 
301  for(std::size_t j=0; j<attributes_count; j++)
302  rclass_attribute(parsed_class);
303 
304  get_class_refs();
305 
307 }
308 
310 {
311  // Get the class references for the benefit of a dependency
312  // analysis.
313 
314  for(const auto &c : constant_pool)
315  {
316  switch(c.tag)
317  {
318  case CONSTANT_Class:
319  get_class_refs_rec(c.expr.type());
320  break;
321 
323  {
326  }
327  break;
328 
329  default: {}
330  }
331  }
332 
333  for(const auto &m : parse_tree.parsed_class.fields)
334  {
335  typet t=java_type_from_string(m.signature);
337  }
338 
339  for(const auto &m : parse_tree.parsed_class.methods)
340  {
341  typet t=java_type_from_string(m.signature);
343  for(const auto &var : m.local_variable_table)
344  {
345  typet var_type=java_type_from_string(var.signature);
346  get_class_refs_rec(var_type);
347  }
348  }
349 }
350 
352 {
353  if(src.id()==ID_code)
354  {
355  const code_typet &ct=to_code_type(src);
356  const typet &rt=ct.return_type();
357  get_class_refs_rec(rt);
358  for(const auto &p : ct.parameters())
359  get_class_refs_rec(p.type());
360  }
361  else if(src.id()==ID_symbol)
362  {
363  irep_idt name=src.get(ID_C_base_name);
364  if(has_prefix(id2string(name), "array["))
365  {
366  const typet &element_type=
367  static_cast<const typet &>(src.find(ID_C_element_type));
368  get_class_refs_rec(element_type);
369  }
370  else
371  parse_tree.class_refs.insert(name);
372  }
373  else if(src.id()==ID_struct)
374  {
375  const struct_typet &struct_type=to_struct_type(src);
376  for(const auto &c : struct_type.components())
377  get_class_refs_rec(c.type());
378  }
379  else if(src.id()==ID_pointer)
381 }
382 
384 {
385  u2 constant_pool_count=read_u2();
386  if(constant_pool_count==0)
387  {
388  error() << "invalid constant_pool_count" << eom;
389  throw 0;
390  }
391 
392  constant_pool.resize(constant_pool_count);
393 
394  for(constant_poolt::iterator
395  it=constant_pool.begin();
396  it!=constant_pool.end();
397  it++)
398  {
399  // the first entry isn't used
400  if(it==constant_pool.begin())
401  continue;
402 
403  it->tag=read_u1();
404 
405  switch(it->tag)
406  {
407  case CONSTANT_Class:
408  it->ref1=read_u2();
409  break;
410 
411  case CONSTANT_Fieldref:
412  case CONSTANT_Methodref:
416  it->ref1=read_u2();
417  it->ref2=read_u2();
418  break;
419 
420  case CONSTANT_String:
421  case CONSTANT_MethodType:
422  it->ref1=read_u2();
423  break;
424 
425  case CONSTANT_Integer:
426  case CONSTANT_Float:
427  it->number=read_u4();
428  break;
429 
430  case CONSTANT_Long:
431  case CONSTANT_Double:
432  it->number=read_u8();
433  // Eight-byte constants take up two entries
434  // in the constant_pool table, for annoying this programmer.
435  if(it==constant_pool.end())
436  {
437  error() << "invalid double entry" << eom;
438  throw 0;
439  }
440  it++;
441  it->tag=0;
442  break;
443 
444  case CONSTANT_Utf8:
445  {
446  u2 bytes=read_u2();
447  std::string s;
448  s.resize(bytes);
449  for(std::string::iterator s_it=s.begin(); s_it!=s.end(); s_it++)
450  *s_it=read_u1();
451  it->s=s; // hashes
452  }
453  break;
454 
456  it->ref1=read_u1();
457  it->ref2=read_u2();
458  break;
459 
460  default:
461  error() << "unknown constant pool entry (" << it->tag << ")"
462  << eom;
463  throw 0;
464  }
465  }
466 
467  // we do a bit of post-processing after we have them all
468  for(constant_poolt::iterator
469  it=constant_pool.begin();
470  it!=constant_pool.end();
471  it++)
472  {
473  // the first entry isn't used
474  if(it==constant_pool.begin())
475  continue;
476 
477  switch(it->tag)
478  {
479  case CONSTANT_Class:
480  {
481  const std::string &s=id2string(pool_entry(it->ref1).s);
482  it->expr=type_exprt(java_classname(s));
483  }
484  break;
485 
486  case CONSTANT_Fieldref:
487  {
488  const pool_entryt &nameandtype_entry=pool_entry(it->ref2);
489  const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
490  const pool_entryt &class_entry=pool_entry(it->ref1);
491  const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
492  typet type=type_entry(nameandtype_entry.ref2);
493 
494  symbol_typet class_symbol=
495  java_classname(id2string(class_name_entry.s));
496 
497  exprt fieldref("fieldref", type);
498  fieldref.set(ID_class, class_symbol.get_identifier());
499  fieldref.set(ID_component_name, name_entry.s);
500 
501  it->expr=fieldref;
502  }
503  break;
504 
505  case CONSTANT_Methodref:
507  {
508  const pool_entryt &nameandtype_entry=pool_entry(it->ref2);
509  const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
510  const pool_entryt &class_entry=pool_entry(it->ref1);
511  const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
512  typet type=type_entry(nameandtype_entry.ref2);
513 
514  symbol_typet class_symbol=
515  java_classname(id2string(class_name_entry.s));
516 
517  irep_idt component_name=
518  id2string(name_entry.s)+
519  ":"+id2string(pool_entry(nameandtype_entry.ref2).s);
520 
521  irep_idt class_name=
522  class_symbol.get_identifier();
523 
524  irep_idt identifier=
525  id2string(class_name)+"."+id2string(component_name);
526 
527  exprt virtual_function(ID_virtual_function, type);
528  virtual_function.set(ID_component_name, component_name);
529  virtual_function.set(ID_C_class, class_name);
530  virtual_function.set(ID_C_base_name, name_entry.s);
531  virtual_function.set(ID_identifier, identifier);
532 
533  it->expr=virtual_function;
534  }
535  break;
536 
537  case CONSTANT_String:
538  {
539  // ldc turns these into references to java.lang.String
540  exprt string_literal(ID_java_string_literal);
541  string_literal.set(ID_value, pool_entry(it->ref1).s);
542  it->expr=string_literal;
543  }
544  break;
545 
546  case CONSTANT_Integer:
547  it->expr=from_integer(it->number, java_int_type());
548  break;
549 
550  case CONSTANT_Float:
551  {
553  value.unpack(it->number);
554  it->expr=value.to_expr();
555  }
556  break;
557 
558  case CONSTANT_Long:
559  it->expr=from_integer(it->number, java_long_type());
560  break;
561 
562  case CONSTANT_Double:
563  {
565  value.unpack(it->number);
566  it->expr=value.to_expr();
567  }
568  break;
569 
571  {
572  it->expr.id("nameandtype");
573  }
574  break;
575 
577  {
578  it->expr.id("methodhandle");
579  }
580  break;
581 
582  case CONSTANT_MethodType:
583  {
584  it->expr.id("methodtype");
585  }
586  break;
587 
589  {
590  it->expr.id("invokedynamic");
591  const pool_entryt &nameandtype_entry=pool_entry(it->ref2);
592  typet type=type_entry(nameandtype_entry.ref2);
593  it->expr.type()=type;
594  }
595  break;
596 
597  default:{};
598  }
599  }
600 }
601 
603 {
604  u2 interfaces_count=read_u2();
605 
606  for(std::size_t i=0; i<interfaces_count; i++)
607  parsed_class.implements
608  .push_back(constant(read_u2()).type().get(ID_C_base_name));
609 }
610 
612 {
613  u2 fields_count=read_u2();
614 
615  for(std::size_t i=0; i<fields_count; i++)
616  {
617  fieldt &field=parsed_class.add_field();
618 
619  u2 access_flags=read_u2();
620  u2 name_index=read_u2();
621  u2 descriptor_index=read_u2();
622  u2 attributes_count=read_u2();
623 
624  field.name=pool_entry(name_index).s;
625  field.is_static=(access_flags&ACC_STATIC)!=0;
626  field.is_final=(access_flags&ACC_FINAL)!=0;
627  field.is_enum=(access_flags&ACC_ENUM)!=0;
628  field.signature=id2string(pool_entry(descriptor_index).s);
629  field.is_public=(access_flags&ACC_PUBLIC)!=0;
630  field.is_protected=(access_flags&ACC_PROTECTED)!=0;
631  field.is_private=(access_flags&ACC_PRIVATE)!=0;
632  size_t flags=(field.is_public?1:0)+
633  (field.is_protected?1:0)+
634  (field.is_private?1:0);
635  assert(flags<=1);
636 
637  for(std::size_t j=0; j<attributes_count; j++)
638  rfield_attribute(field);
639  }
640 }
641 
642 #define T_BOOLEAN 4
643 #define T_CHAR 5
644 #define T_FLOAT 6
645 #define T_DOUBLE 7
646 #define T_BYTE 8
647 #define T_SHORT 9
648 #define T_INT 10
649 #define T_LONG 11
650 
652  methodt::instructionst &instructions)
653 {
654  u4 code_length=read_u4();
655 
656  u4 address;
657  size_t bytecode_index=0; // index of bytecode instruction
658 
659  for(address=0; address<code_length; address++)
660  {
661  bool wide_instruction=false;
662  u4 start_of_instruction=address;
663 
664  u1 bytecode=read_u1();
665 
666  if(bytecode==0xc4) // wide
667  {
668  wide_instruction=true;
669  address++;
670  bytecode=read_u1();
671  }
672 
673  instructions.push_back(instructiont());
674  instructiont &instruction=instructions.back();
675  instruction.statement=bytecodes[bytecode].mnemonic;
676  instruction.address=start_of_instruction;
677  instruction.source_location
678  .set_java_bytecode_index(std::to_string(bytecode_index));
679 
680  switch(bytecodes[bytecode].format)
681  {
682  case ' ': // no further bytes
683  break;
684 
685  case 'c': // a constant_pool index (one byte)
686  if(wide_instruction)
687  {
688  instruction.args.push_back(constant(read_u2()));
689  address+=2;
690  }
691  else
692  {
693  instruction.args.push_back(constant(read_u1()));
694  address+=1;
695  }
696  break;
697 
698  case 'C': // a constant_pool index (two bytes)
699  instruction.args.push_back(constant(read_u2()));
700  address+=2;
701  break;
702 
703  case 'b': // a signed byte
704  {
705  s1 c=read_u1();
706  instruction.args.push_back(from_integer(c, signedbv_typet(8)));
707  }
708  address+=1;
709 
710  break;
711 
712  case 'o': // two byte branch offset, signed
713  {
714  s2 offset=read_u2();
715  instruction.args.push_back(
716  from_integer(address+offset, signedbv_typet(16)));
717  }
718  address+=2;
719  break;
720 
721  case 'O': // four byte branch offset, signed
722  {
723  s4 offset=read_u4();
724  instruction.args.push_back(
725  from_integer(address+offset, signedbv_typet(32)));
726  }
727  address+=4;
728  break;
729 
730  case 'v': // local variable index (one byte)
731  {
732  u1 v=read_u1();
733  instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
734  }
735  address+=1;
736  break;
737 
738  case 'V':
739  // local variable index (two bytes) plus two signed bytes
740  if(wide_instruction)
741  {
742  u2 v=read_u2();
743  instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
744  s2 c=read_u2();
745  instruction.args.push_back(from_integer(c, signedbv_typet(16)));
746  address+=4;
747  }
748  else // local variable index (one byte) plus one signed byte
749  {
750  u1 v=read_u1();
751  instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
752  s1 c=read_u1();
753  instruction.args.push_back(from_integer(c, signedbv_typet(8)));
754  address+=2;
755  }
756  break;
757 
758  case 'I': // two byte constant_pool index plus two bytes
759  {
760  u2 c=read_u2();
761  instruction.args.push_back(constant(c));
762  u1 b1=read_u1();
763  instruction.args.push_back(from_integer(b1, unsignedbv_typet(8)));
764  u1 b2=read_u1();
765  instruction.args.push_back(from_integer(b2, unsignedbv_typet(8)));
766  }
767  address+=4;
768  break;
769 
770  case 'L': // lookupswitch
771  {
772  u4 base_offset=address;
773 
774  // first a pad to 32-bit align
775  while(((address+1)&3)!=0) { read_u1(); address++; }
776 
777  // now default value
778  s4 default_value=read_u4();
779  instruction.args.push_back(
780  from_integer(base_offset+default_value, signedbv_typet(32)));
781  address+=4;
782 
783  // number of pairs
784  u4 npairs=read_u4();
785  address+=4;
786 
787  for(std::size_t i=0; i<npairs; i++)
788  {
789  s4 match=read_u4();
790  s4 offset=read_u4();
791  instruction.args.push_back(
792  from_integer(match, signedbv_typet(32)));
793  instruction.args.push_back(
794  from_integer(base_offset+offset, signedbv_typet(32)));
795  address+=8;
796  }
797  }
798  break;
799 
800  case 'T': // tableswitch
801  {
802  size_t base_offset=address;
803 
804  // first a pad to 32-bit align
805  while(((address+1)&3)!=0) { read_u1(); address++; }
806 
807  // now default value
808  s4 default_value=read_u4();
809  instruction.args.push_back(
810  from_integer(base_offset+default_value, signedbv_typet(32)));
811  address+=4;
812 
813  // now low value
814  s4 low_value=read_u4();
815  address+=4;
816 
817  // now high value
818  s4 high_value=read_u4();
819  address+=4;
820 
821  // there are high-low+1 offsets, and they are signed
822  for(s4 i=low_value; i<=high_value; i++)
823  {
824  s4 offset=read_u4();
825  instruction.args.push_back(from_integer(i, signedbv_typet(32)));
826  instruction.args.push_back(
827  from_integer(base_offset+offset, signedbv_typet(32)));
828  address+=4;
829  }
830  }
831  break;
832 
833  case 'm': // multianewarray: constant-pool index plus one unsigned byte
834  {
835  u2 c=read_u2(); // constant-pool index
836  instruction.args.push_back(constant(c));
837  u1 dimensions=read_u1(); // number of dimensions
838  instruction.args.push_back(
839  from_integer(dimensions, unsignedbv_typet(8)));
840  address+=3;
841  }
842  break;
843 
844  case 't': // array subtype, one byte
845  {
846  typet t;
847  switch(read_u1())
848  {
849  case T_BOOLEAN: t.id(ID_bool); break;
850  case T_CHAR: t.id(ID_char); break;
851  case T_FLOAT: t.id(ID_float); break;
852  case T_DOUBLE: t.id(ID_double); break;
853  case T_BYTE: t.id(ID_byte); break;
854  case T_SHORT: t.id(ID_short); break;
855  case T_INT: t.id(ID_int); break;
856  case T_LONG: t.id(ID_long); break;
857  default:{};
858  }
859  instruction.args.push_back(type_exprt(t));
860  }
861  address+=1;
862  break;
863 
864  case 's': // a signed short
865  {
866  s2 s=read_u2();
867  instruction.args.push_back(from_integer(s, signedbv_typet(16)));
868  }
869  address+=2;
870  break;
871 
872  default:
873  throw "unknown JVM bytecode instruction";
874  }
875  bytecode_index++;
876  }
877 
878  if(address!=code_length)
879  {
880  error() << "bytecode length mismatch" << eom;
881  throw 0;
882  }
883 }
884 
886 {
887  u2 attribute_name_index=read_u2();
888  u4 attribute_length=read_u4();
889 
890  irep_idt attribute_name=pool_entry(attribute_name_index).s;
891 
892  if(attribute_name=="Code")
893  {
894  u2 UNUSED max_stack=read_u2();
895  u2 UNUSED max_locals=read_u2();
896 
897  rbytecode(method.instructions);
898 
899  u2 exception_table_length=read_u2();
900  method.exception_table.resize(exception_table_length);
901 
902  for(std::size_t e=0; e<exception_table_length; e++)
903  {
904  u2 start_pc=read_u2();
905  u2 end_pc=read_u2();
906  u2 handler_pc=read_u2();
907  u2 catch_type=read_u2();
908  method.exception_table[e].start_pc=start_pc;
909  method.exception_table[e].end_pc=end_pc;
910  method.exception_table[e].handler_pc=handler_pc;
911  if(catch_type!=0)
912  method.exception_table[e].catch_type=
913  to_symbol_type(pool_entry(catch_type).expr.type());
914  }
915 
916  u2 attributes_count=read_u2();
917 
918  for(std::size_t j=0; j<attributes_count; j++)
919  rcode_attribute(method);
920 
921  irep_idt line_number;
922 
923  // add missing line numbers
924  for(methodt::instructionst::iterator
925  it=method.instructions.begin();
926  it!=method.instructions.end();
927  it++)
928  {
929  if(!it->source_location.get_line().empty())
930  line_number=it->source_location.get_line();
931  else if(!line_number.empty())
932  it->source_location.set_line(line_number);
933  it->source_location
934  .set_function(
935  "java::"+id2string(parse_tree.parsed_class.name)+"."+
936  id2string(method.name)+":"+method.signature);
937  }
938 
939  // line number of method
940  if(!method.instructions.empty())
941  method.source_location.set_line(
942  method.instructions.begin()->source_location.get_line());
943  }
944  else if(attribute_name=="RuntimeInvisibleAnnotations" ||
945  attribute_name=="RuntimeVisibleAnnotations")
946  {
948  }
949  else
950  skip_bytes(attribute_length);
951 }
952 
954 {
955  u2 attribute_name_index=read_u2();
956  u4 attribute_length=read_u4();
957 
958  irep_idt attribute_name=pool_entry(attribute_name_index).s;
959 
960  if(attribute_name=="RuntimeInvisibleAnnotations" ||
961  attribute_name=="RuntimeVisibleAnnotations")
962  {
964  }
965  else
966  skip_bytes(attribute_length);
967 }
968 
970 {
971  u2 attribute_name_index=read_u2();
972  u4 attribute_length=read_u4();
973 
974  irep_idt attribute_name=pool_entry(attribute_name_index).s;
975 
976  if(attribute_name=="LineNumberTable")
977  {
978  // address -> instructiont
979  typedef std::map<unsigned,
980  methodt::instructionst::iterator> instruction_mapt;
981  instruction_mapt instruction_map;
982 
983  for(methodt::instructionst::iterator
984  it=method.instructions.begin();
985  it!=method.instructions.end();
986  it++)
987  {
988  instruction_map[it->address]=it;
989  }
990 
991  u2 line_number_table_length=read_u2();
992 
993  for(std::size_t i=0; i<line_number_table_length; i++)
994  {
995  u2 start_pc=read_u2();
996  u2 line_number=read_u2();
997 
998  // annotate the bytecode program
999  instruction_mapt::const_iterator it=
1000  instruction_map.find(start_pc);
1001 
1002  if(it!=instruction_map.end())
1003  it->second->source_location.set_line(line_number);
1004  }
1005  }
1006  else if(attribute_name=="LocalVariableTable")
1007  {
1008  u2 local_variable_table_length=read_u2();
1009 
1010  method.local_variable_table.resize(local_variable_table_length);
1011 
1012  for(std::size_t i=0; i<local_variable_table_length; i++)
1013  {
1014  u2 start_pc=read_u2();
1015  u2 length=read_u2();
1016  u2 name_index=read_u2();
1017  u2 descriptor_index=read_u2();
1018  u2 index=read_u2();
1019 
1020  method.local_variable_table[i].index=index;
1021  method.local_variable_table[i].name=pool_entry(name_index).s;
1022  method.local_variable_table[i].signature=
1023  id2string(pool_entry(descriptor_index).s);
1024  method.local_variable_table[i].start_pc=start_pc;
1025  method.local_variable_table[i].length=length;
1026  }
1027  }
1028  else if(attribute_name=="StackMapTable")
1029  {
1030  u2 stack_map_entries=read_u2();
1031 
1032  method.stack_map_table.resize(stack_map_entries);
1033 
1034  for(size_t i=0; i<stack_map_entries; i++)
1035  {
1036  u1 frame_type=read_u1();
1037  if(0<=frame_type && frame_type<=63)
1038  {
1040  method.stack_map_table[i].locals.resize(0);
1041  method.stack_map_table[i].stack.resize(0);
1042  }
1043  else if(64<=frame_type && frame_type<=127)
1044  {
1045  method.stack_map_table[i].type=
1047  method.stack_map_table[i].locals.resize(0);
1048  method.stack_map_table[i].stack.resize(1);
1049  methodt::verification_type_infot verification_type_info;
1050  read_verification_type_info(verification_type_info);
1051  method.stack_map_table[i].stack[0]=verification_type_info;
1052  }
1053  else if(frame_type==247)
1054  {
1055  method.stack_map_table[i].type=
1057  method.stack_map_table[i].locals.resize(0);
1058  method.stack_map_table[i].stack.resize(1);
1059  methodt::verification_type_infot verification_type_info;
1060  u2 offset_delta=read_u2();
1061  read_verification_type_info(verification_type_info);
1062  method.stack_map_table[i].stack[0]=verification_type_info;
1063  method.stack_map_table[i].offset_delta=offset_delta;
1064  }
1065  else if(248<=frame_type && frame_type<=250)
1066  {
1068  method.stack_map_table[i].locals.resize(0);
1069  method.stack_map_table[i].stack.resize(0);
1070  u2 offset_delta=read_u2();
1071  method.stack_map_table[i].offset_delta=offset_delta;
1072  }
1073  else if(frame_type==251)
1074  {
1075  method.stack_map_table[i].type
1077  method.stack_map_table[i].locals.resize(0);
1078  method.stack_map_table[i].stack.resize(0);
1079  u2 offset_delta=read_u2();
1080  method.stack_map_table[i].offset_delta=offset_delta;
1081  }
1082  else if(252<=frame_type && frame_type<=254)
1083  {
1084  size_t new_locals=(size_t) (frame_type-251);
1086  method.stack_map_table[i].locals.resize(new_locals);
1087  method.stack_map_table[i].stack.resize(0);
1088  u2 offset_delta=read_u2();
1089  method.stack_map_table[i].offset_delta=offset_delta;
1090  for(size_t k=0; k<new_locals; k++)
1091  {
1092  method.stack_map_table[i].locals
1093  .push_back(methodt::verification_type_infot());
1095  method.stack_map_table[i].locals.back();
1097  }
1098  }
1099  else if(frame_type==255)
1100  {
1102  u2 offset_delta=read_u2();
1103  method.stack_map_table[i].offset_delta=offset_delta;
1104  u2 number_locals=read_u2();
1105  method.stack_map_table[i].locals.resize(number_locals);
1106  for(size_t k=0; k<(size_t) number_locals; k++)
1107  {
1108  method.stack_map_table[i].locals
1109  .push_back(methodt::verification_type_infot());
1111  method.stack_map_table[i].locals.back();
1113  }
1114  u2 number_stack_items=read_u2();
1115  method.stack_map_table[i].stack.resize(number_stack_items);
1116  for(size_t k=0; k<(size_t) number_stack_items; k++)
1117  {
1118  method.stack_map_table[i].stack
1119  .push_back(methodt::verification_type_infot());
1121  method.stack_map_table[i].stack.back();
1123  }
1124  }
1125  else
1126  throw "error: unknown stack frame type encountered";
1127  }
1128  }
1129  else
1130  skip_bytes(attribute_length);
1131 }
1132 
1135 {
1136  u1 tag=read_u1();
1137  switch(tag)
1138  {
1139  case VTYPE_INFO_TOP:
1141  break;
1142  case VTYPE_INFO_INTEGER:
1144  break;
1145  case VTYPE_INFO_FLOAT:
1147  break;
1148  case VTYPE_INFO_LONG:
1150  break;
1151  case VTYPE_INFO_DOUBLE:
1153  break;
1154  case VTYPE_INFO_ITEM_NULL:
1156  break;
1159  break;
1160  case VTYPE_INFO_OBJECT:
1162  v.cpool_index=read_u2();
1163  break;
1164  case VTYPE_INFO_UNINIT:
1166  v.offset=read_u2();
1167  break;
1168  default:
1169  throw "error: unknown verification type info encountered";
1170  }
1171 }
1172 
1174  annotationst &annotations)
1175 {
1176  u2 num_annotations=read_u2();
1177 
1178  for(u2 number=0; number<num_annotations; number++)
1179  {
1180  annotationt annotation;
1181  rRuntimeAnnotation(annotation);
1182  annotations.push_back(annotation);
1183  }
1184 }
1185 
1187  annotationt &annotation)
1188 {
1189  u2 type_index=read_u2();
1190  annotation.type=type_entry(type_index);
1192 }
1193 
1195  annotationt::element_value_pairst &element_value_pairs)
1196 {
1197  u2 num_element_value_pairs=read_u2();
1198  element_value_pairs.resize(num_element_value_pairs);
1199 
1200  for(auto &element_value_pair : element_value_pairs)
1201  {
1202  u2 element_name_index=read_u2();
1203  element_value_pair.element_name=pool_entry(element_name_index).s;
1204 
1205  relement_value_pair(element_value_pair);
1206  }
1207 }
1208 
1210  annotationt::element_value_pairt &element_value_pair)
1211 {
1212  u1 tag=read_u1();
1213 
1214  switch(tag)
1215  {
1216  case 'e':
1217  {
1218  UNUSED u2 type_name_index=read_u2();
1219  UNUSED u2 const_name_index=read_u2();
1220  // todo: enum
1221  }
1222  break;
1223 
1224  case 'c':
1225  {
1226  UNUSED u2 class_info_index=read_u2();
1227  // todo: class
1228  }
1229  break;
1230 
1231  case '@':
1232  {
1233  // another annotation, recursively
1234  annotationt annotation;
1235  rRuntimeAnnotation(annotation);
1236  }
1237  break;
1238 
1239  case '[':
1240  {
1241  u2 num_values=read_u2();
1242  for(std::size_t i=0; i<num_values; i++)
1243  {
1244  annotationt::element_value_pairt element_value;
1245  relement_value_pair(element_value); // recursive call
1246  }
1247  }
1248  break;
1249 
1250  case 's':
1251  {
1252  u2 const_value_index=read_u2();
1253  element_value_pair.value=string_constantt(
1254  pool_entry(const_value_index).s);
1255  }
1256  break;
1257 
1258  default:
1259  {
1260  u2 const_value_index=read_u2();
1261  element_value_pair.value=constant(const_value_index);
1262  }
1263 
1264  break;
1265  }
1266 }
1267 
1269 {
1270  u2 attribute_name_index=read_u2();
1271  u4 attribute_length=read_u4();
1272 
1273  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1274 
1275  if(attribute_name=="SourceFile")
1276  {
1277  u2 sourcefile_index=read_u2();
1278  irep_idt sourcefile_name;
1279 
1280  std::string fqn(id2string(parsed_class.name));
1281  size_t last_index=fqn.find_last_of(".");
1282  if(last_index==std::string::npos)
1283  sourcefile_name=pool_entry(sourcefile_index).s;
1284  else
1285  {
1286  std::string package_name=fqn.substr(0, last_index+1);
1287  std::replace(package_name.begin(), package_name.end(), '.', '/');
1288  const std::string &full_file_name=
1289  package_name+id2string(pool_entry(sourcefile_index).s);
1290  sourcefile_name=full_file_name;
1291  }
1292 
1293  for(methodst::iterator m_it=parsed_class.methods.begin();
1294  m_it!=parsed_class.methods.end();
1295  m_it++)
1296  {
1297  m_it->source_location.set_file(sourcefile_name);
1298  for(instructionst::iterator i_it=m_it->instructions.begin();
1299  i_it!=m_it->instructions.end();
1300  i_it++)
1301  {
1302  if(!i_it->source_location.get_line().empty())
1303  i_it->source_location.set_file(sourcefile_name);
1304  }
1305  }
1306  }
1307  else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1308  attribute_name=="RuntimeVisibleAnnotations")
1309  {
1311  }
1312  else
1313  skip_bytes(attribute_length);
1314 }
1315 
1317 {
1318  u2 methods_count=read_u2();
1319 
1320  for(std::size_t j=0; j<methods_count; j++)
1321  rmethod(parsed_class);
1322 }
1323 
1324 #define ACC_PUBLIC 0x0001
1325 #define ACC_PRIVATE 0x0002
1326 #define ACC_PROTECTED 0x0004
1327 #define ACC_STATIC 0x0008
1328 #define ACC_FINAL 0x0010
1329 #define ACC_SUPER 0x0020
1330 #define ACC_VOLATILE 0x0040
1331 #define ACC_TRANSIENT 0x0080
1332 #define ACC_INTERFACE 0x0200
1333 #define ACC_ABSTRACT 0x0400
1334 #define ACC_SYNTHETIC 0x1000
1335 #define ACC_ANNOTATION 0x2000
1336 #define ACC_ENUM 0x4000
1337 
1339 {
1340  methodt &method=parsed_class.add_method();
1341 
1342  u2 access_flags=read_u2();
1343  u2 name_index=read_u2();
1344  u2 descriptor_index=read_u2();
1345 
1346  method.is_final=(access_flags&ACC_FINAL)!=0;
1347  method.is_static=(access_flags&ACC_STATIC)!=0;
1348  method.is_abstract=(access_flags&ACC_ABSTRACT)!=0;
1349  method.is_public=(access_flags&ACC_PUBLIC)!=0;
1350  method.is_protected=(access_flags&ACC_PROTECTED)!=0;
1351  method.is_private=(access_flags&ACC_PRIVATE)!=0;
1352  method.is_synchronized=(access_flags&ACC_SYNCHRONIZED)!=0;
1353  method.is_native=(access_flags&ACC_NATIVE)!=0;
1354  method.name=pool_entry(name_index).s;
1355  method.base_name=pool_entry(name_index).s;
1356  method.signature=id2string(pool_entry(descriptor_index).s);
1357 
1358  size_t flags=(method.is_public?1:0)+
1359  (method.is_protected?1:0)+
1360  (method.is_private?1:0);
1361  assert(flags<=1);
1362  u2 attributes_count=read_u2();
1363 
1364  for(std::size_t j=0; j<attributes_count; j++)
1365  rmethod_attribute(method);
1366 }
1367 
1369  std::istream &istream,
1372 {
1373  java_bytecode_parsert java_bytecode_parser;
1374  java_bytecode_parser.in=&istream;
1375  java_bytecode_parser.set_message_handler(message_handler);
1376 
1377  bool parser_result=java_bytecode_parser.parse();
1378 
1379  parse_tree.swap(java_bytecode_parser.parse_tree);
1380 
1381  return parser_result;
1382 }
1383 
1385  const std::string &file,
1388 {
1389  std::ifstream in(file, std::ios::binary);
1390 
1391  if(!in)
1392  {
1393  messaget message(message_handler);
1394  message.error() << "failed to open input file `"
1395  << file << '\'' << messaget::eom;
1396  return true;
1397  }
1398 
1399  return java_bytecode_parse(in, parse_tree, message_handler);
1400 }
#define VTYPE_INFO_OBJECT
The type of an expression.
Definition: type.h:20
mstreamt & result()
Definition: message.h:233
Fixed-width bit-vector with unsigned binary interpretation.
Definition: std_types.h:1125
void rinterfaces(classt &parsed_class)
Base type of functions.
Definition: std_types.h:734
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
void set_java_bytecode_index(const irep_idt &index)
#define ACC_NATIVE
#define VTYPE_INFO_LONG
void rRuntimeAnnotation_attribute(annotationst &)
uint64_t u8
Definition: bytecode_info.h:58
#define CONSTANT_Methodref
constant_exprt to_expr() const
Definition: ieee_float.cpp:686
#define CONSTANT_MethodType
pool_entryt & pool_entry(u2 index)
void rmethod(classt &parsed_class)
std::istream * in
Definition: parser.h:26
bool java_bytecode_parse(std::istream &istream, java_bytecode_parse_treet &parse_tree, message_handlert &message_handler)
#define ACC_PROTECTED
typet java_int_type()
Definition: java_types.cpp:19
java_bytecode_parse_treet::annotationst annotationst
void rmethods(classt &parsed_class)
#define CONSTANT_InterfaceMethodref
struct bytecode_infot const bytecode_info[]
An expression denoting a type.
Definition: std_expr.h:3671
java_bytecode_parse_treet parse_tree
#define ACC_PRIVATE
#define VTYPE_INFO_DOUBLE
#define CONSTANT_Integer
#define UNUSED
const componentst & components() const
Definition: std_types.h:242
#define CONSTANT_Fieldref
typet java_long_type()
Definition: java_types.cpp:29
typet & type()
Definition: expr.h:60
#define CONSTANT_Float
#define VTYPE_INFO_ITEM_NULL
#define CONSTANT_Class
static mstreamt & eom(mstreamt &m)
Definition: message.h:193
Structure type.
Definition: std_types.h:296
uint32_t u4
Definition: bytecode_info.h:57
#define T_FLOAT
Definition: parser.h:23
#define ACC_ABSTRACT
#define T_BYTE
static ieee_float_spect double_precision()
Definition: ieee_float.h:69
void relement_value_pairs(annotationt::element_value_pairst &)
#define CONSTANT_MethodHandle
Parser utilities.
#define CONSTANT_Long
std::vector< annotationt > annotationst
const irep_idt & id() const
Definition: irep.h:189
typet java_type_from_string(const std::string &src)
Definition: java_types.cpp:152
java_bytecode_parse_treet::instructiont instructiont
const symbol_typet & to_symbol_type(const typet &type)
Cast a generic typet to a symbol_typet.
Definition: std_types.h:142
#define ACC_FINAL
void unpack(const mp_integer &i)
Definition: ieee_float.cpp:312
A reference into the symbol table.
Definition: std_types.h:109
std::vector< pool_entryt > constant_poolt
#define CONSTANT_Double
#define VTYPE_INFO_UNINIT
#define CONSTANT_NameAndType
Fixed-width bit-vector with two&#39;s complement interpretation.
Definition: std_types.h:1171
#define CONSTANT_Utf8
java_bytecode_parse_treet::classt classt
API to expression classes.
const irep_idt & get(const irep_namet &name) const
Definition: irep.cpp:213
void set_line(const irep_idt &line)
static ieee_float_spect single_precision()
Definition: ieee_float.h:63
std::vector< instructiont > instructionst
#define VTYPE_INFO_UNINIT_THIS
#define T_DOUBLE
#define ACC_SYNCHRONIZED
uint16_t u2
Definition: bytecode_info.h:56
#define VTYPE_INFO_TOP
virtual void set_message_handler(message_handlert &_message_handler)
Definition: message.h:122
void rbytecode(methodt::instructionst &)
int32_t s4
Definition: bytecode_info.h:61
#define T_LONG
void skip_bytes(std::size_t bytes)
#define CONSTANT_String
void rclass_attribute(classt &parsed_class)
void rmethod_attribute(methodt &method)
#define T_INT
void get_class_refs_rec(const typet &)
#define VTYPE_INFO_FLOAT
#define ACC_ENUM
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
const struct_typet & to_struct_type(const typet &type)
Cast a generic typet to a struct_typet.
Definition: std_types.h:317
void relement_value_pair(annotationt::element_value_pairt &)
#define T_BOOLEAN
java_bytecode_parse_treet::classt::fieldst fieldst
std::vector< element_value_pairt > element_value_pairst
void rfields(classt &parsed_class)
#define ACC_STATIC
std::vector< bytecodet > bytecodes
java_bytecode_parse_treet::annotationt annotationt
const char * mnemonic
Definition: bytecode_info.h:46
uint8_t u1
Definition: bytecode_info.h:55
Base class for all expressions.
Definition: expr.h:46
#define VTYPE_INFO_INTEGER
java_bytecode_parse_treet::methodt::instructionst instructionst
const parameterst & parameters() const
Definition: std_types.h:841
#define ACC_PUBLIC
java_bytecode_parse_treet::fieldt fieldt
#define T_SHORT
void read_verification_type_info(methodt::verification_type_infot &)
void swap(java_bytecode_parse_treet &other)
#define T_CHAR
java_bytecode_parse_treet::methodt methodt
const typet type_entry(u2 index)
const code_typet & to_code_type(const typet &type)
Cast a generic typet to a code_typet.
Definition: std_types.h:884
mstreamt & error()
Definition: message.h:223
void rcode_attribute(methodt &method)
int8_t s1
Definition: bytecode_info.h:59
int16_t s2
Definition: bytecode_info.h:60
void rRuntimeAnnotation(annotationt &)
const typet & subtype() const
Definition: type.h:31
java_bytecode_parse_treet::classt::methodst methodst
symbol_typet java_classname(const std::string &id)
Definition: java_types.cpp:294
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
message_handlert * message_handler
Definition: message.h:259
bool empty() const
Definition: dstring.h:61
const irept & find(const irep_namet &name) const
Definition: irep.cpp:285
#define CONSTANT_InvokeDynamic
const typet & return_type() const
Definition: std_types.h:831
const irep_idt & get_identifier() const
Definition: std_types.h:126
void set(const irep_namet &name, const irep_idt &value)
Definition: irep.h:214
Definition: kdev_t.h:19