cprover
java_bytecode_parser.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
9 #include "java_bytecode_parser.h"
10 
11 #include <algorithm>
12 #include <fstream>
13 #include <map>
14 #include <string>
15 
16 #include <util/arith_tools.h>
17 #include <util/ieee_float.h>
18 #include <util/parser.h>
19 #include <util/prefix.h>
20 #include <util/std_expr.h>
21 #include <util/string_constant.h>
22 #include <util/optional.h>
23 
25 #include "java_types.h"
26 #include "bytecode_info.h"
27 
28 #ifdef DEBUG
29 #include <iostream>
30 #endif
31 
33 {
34 public:
36  {
37  get_bytecodes();
38  }
39 
40  virtual bool parse();
41 
56 
58 
59  struct pool_entryt
60  {
66  pool_entryt():tag(0), ref1(0), ref2(0), number(0) { }
67  };
68 
69  typedef std::vector<pool_entryt> constant_poolt;
71 
72 protected:
73  class bytecodet
74  {
75  public:
77  char format;
78  };
79 
80  std::vector<bytecodet> bytecodes;
81 
83  {
84  if(index==0 || index>=constant_pool.size())
85  {
86  error() << "invalid constant pool index (" << index << ")" << eom;
87  error() << "constant pool size: " << constant_pool.size() << eom;
88  throw 0;
89  }
90 
91  return constant_pool[index];
92  }
93 
94  exprt &constant(u2 index)
95  {
96  return pool_entry(index).expr;
97  }
98 
99  const typet type_entry(u2 index)
100  {
101  return java_type_from_string(id2string(pool_entry(index).s));
102  }
103 
105  {
106  // pre-hash the mnemonics, so we do this only once
107  bytecodes.resize(256);
108  for(const bytecode_infot *p=bytecode_info; p->mnemonic!=nullptr; p++)
109  {
110  assert(p->opcode<bytecodes.size());
111  bytecodes[p->opcode].mnemonic=p->mnemonic;
112  bytecodes[p->opcode].format=p->format;
113  }
114  }
115 
116  void rClassFile();
117  void rconstant_pool();
118  void rinterfaces(classt &parsed_class);
119  void rfields(classt &parsed_class);
120  void rmethods(classt &parsed_class);
121  void rmethod(classt &parsed_class);
122  void
123  rinner_classes_attribute(classt &parsed_class, const u4 &attribute_length);
124  std::vector<irep_idt> rexceptions_attribute();
125  void rclass_attribute(classt &parsed_class);
129  void relement_value_pair(annotationt::element_value_pairt &);
130  void rmethod_attribute(methodt &method);
131  void rfield_attribute(fieldt &);
132  void rcode_attribute(methodt &method);
133  void read_verification_type_info(methodt::verification_type_infot &);
135  void get_class_refs();
136  void get_class_refs_rec(const typet &);
139  parse_method_handle(const class method_handle_infot &entry);
141 
142  void skip_bytes(std::size_t bytes)
143  {
144  for(std::size_t i=0; i<bytes; i++)
145  {
146  if(!*in)
147  {
148  error() << "unexpected end of bytecode file" << eom;
149  throw 0;
150  }
151  in->get();
152  }
153  }
154 
155  u8 read_bytes(size_t bytes)
156  {
157  u8 result=0;
158  for(size_t i=0; i<bytes; i++)
159  {
160  if(!*in)
161  {
162  error() << "unexpected end of bytecode file" << eom;
163  throw 0;
164  }
165  result<<=8;
166  result|=in->get();
167  }
168  return result;
169  }
170 
172  {
173  return (u1)read_bytes(1);
174  }
175 
176  inline u2 read_u2()
177  {
178  return (u2)read_bytes(2);
179  }
180 
182  {
183  return (u4)read_bytes(4);
184  }
185 
187  {
188  return read_bytes(8);
189  }
190 
192  classt &parsed_class,
193  size_t bootstrap_method_index,
194  u2_valuest u2_values) const;
195 };
196 
197 #define CONSTANT_Class 7
198 #define CONSTANT_Fieldref 9
199 #define CONSTANT_Methodref 10
200 #define CONSTANT_InterfaceMethodref 11
201 #define CONSTANT_String 8
202 #define CONSTANT_Integer 3
203 #define CONSTANT_Float 4
204 #define CONSTANT_Long 5
205 #define CONSTANT_Double 6
206 #define CONSTANT_NameAndType 12
207 #define CONSTANT_Utf8 1
208 #define CONSTANT_MethodHandle 15
209 #define CONSTANT_MethodType 16
210 #define CONSTANT_InvokeDynamic 18
211 
212 #define VTYPE_INFO_TOP 0
213 #define VTYPE_INFO_INTEGER 1
214 #define VTYPE_INFO_FLOAT 2
215 #define VTYPE_INFO_LONG 3
216 #define VTYPE_INFO_DOUBLE 4
217 #define VTYPE_INFO_ITEM_NULL 5
218 #define VTYPE_INFO_UNINIT_THIS 6
219 #define VTYPE_INFO_OBJECT 7
220 #define VTYPE_INFO_UNINIT 8
221 
223 {
224 public:
226  : tag(entry.tag)
227  {
228  }
229 
230  u1 get_tag() const
231  {
232  return tag;
233  }
234 
235  typedef std::function<java_bytecode_parsert::pool_entryt &(u2)>
238 
239 protected:
240  static std::string read_utf8_constant(const pool_entryt &entry)
241  {
242  INVARIANT(
243  entry.tag == CONSTANT_Utf8, "Name entry must be a constant UTF-8");
244  return id2string(entry.s);
245  }
246 
247 private:
249 };
250 
254 {
255 public:
256  explicit class_infot(const pool_entryt &entry) : structured_pool_entryt(entry)
257  {
258  PRECONDITION(entry.tag == CONSTANT_Class);
259  name_index = entry.ref1;
260  }
261 
262  std::string get_name(pool_entry_lookupt pool_entry) const
263  {
264  const pool_entryt &name_entry = pool_entry(name_index);
265  return read_utf8_constant(name_entry);
266  }
267 
268 private:
270 };
271 
275 {
276 public:
278  : structured_pool_entryt(entry)
279  {
281  name_index = entry.ref1;
282  descriptor_index = entry.ref2;
283  }
284 
285  std::string get_name(pool_entry_lookupt pool_entry) const
286  {
287  const pool_entryt &name_entry = pool_entry(name_index);
288  return read_utf8_constant(name_entry);
289  }
290 
291  std::string get_descriptor(pool_entry_lookupt pool_entry) const
292  {
293  const pool_entryt &descriptor_entry = pool_entry(descriptor_index);
294  return read_utf8_constant(descriptor_entry);
295  }
296 
297 private:
300 };
301 
303 {
304 public:
306  {
307  PRECONDITION(
308  entry.tag == CONSTANT_Fieldref || entry.tag == CONSTANT_Methodref ||
310  class_index = entry.ref1;
311  name_and_type_index = entry.ref2;
312  }
313 
315  {
316  return class_index;
317  }
319  {
320  return name_and_type_index;
321  }
322 
324  {
325  const pool_entryt &name_and_type_entry = pool_entry(name_and_type_index);
326 
327  INVARIANT(
328  name_and_type_entry.tag == CONSTANT_NameAndType,
329  "name_and_typeindex did not correspond to a name_and_type in the "
330  "constant pool");
331 
332  return name_and_type_infot{name_and_type_entry};
333  }
334 
336  {
337  const pool_entryt &class_entry = pool_entry(class_index);
338 
339  return class_infot{class_entry};
340  }
341 
342 private:
345 };
346 
348 {
349 public:
354  {
355  REF_getField = 1,
356  REF_getStatic = 2,
357  REF_putField = 3,
358  REF_putStatic = 4,
359  REF_invokeVirtual = 5,
360  REF_invokeStatic = 6,
361  REF_invokeSpecial = 7,
364  };
365 
367  : structured_pool_entryt(entry)
368  {
370  PRECONDITION(entry.ref1 > 0 && entry.ref1 < 10); // Java 8 spec 4.4.8
371  reference_kind = static_cast<method_handle_kindt>(entry.ref1);
372  reference_index = entry.ref2;
373  }
374 
376  {
377  const base_ref_infot ref_entry{pool_entry(reference_index)};
378 
379  // validate the correctness of the constant pool entry
380  switch(reference_kind)
381  {
386  {
387  INVARIANT(ref_entry.get_tag() == CONSTANT_Fieldref, "4.4.2");
388  break;
389  }
392  {
393  INVARIANT(ref_entry.get_tag() == CONSTANT_Methodref, "4.4.2");
394  break;
395  }
398  {
399  INVARIANT(
400  ref_entry.get_tag() == CONSTANT_Methodref ||
401  ref_entry.get_tag() == CONSTANT_InterfaceMethodref,
402  "4.4.2");
403  break;
404  }
406  {
407  INVARIANT(ref_entry.get_tag() == CONSTANT_InterfaceMethodref, "");
408  break;
409  }
410  }
411 
412  return ref_entry;
413  }
414 
415 private:
418 };
419 
421 {
422  try
423  {
424  rClassFile();
425  }
426 
427  catch(const char *message)
428  {
429  error() << message << eom;
430  return true;
431  }
432 
433  catch(const std::string &message)
434  {
435  error() << message << eom;
436  return true;
437  }
438 
439  catch(...)
440  {
441  error() << "parsing error" << eom;
442  return true;
443  }
444 
445  return false;
446 }
447 
448 #define ACC_PUBLIC 0x0001
449 #define ACC_PRIVATE 0x0002
450 #define ACC_PROTECTED 0x0004
451 #define ACC_STATIC 0x0008
452 #define ACC_FINAL 0x0010
453 #define ACC_SYNCHRONIZED 0x0020
454 #define ACC_BRIDGE 0x0040
455 #define ACC_VARARGS 0x0080
456 #define ACC_NATIVE 0x0100
457 #define ACC_INTERFACE 0x0200
458 #define ACC_ABSTRACT 0x0400
459 #define ACC_STRICT 0x0800
460 #define ACC_SYNTHETIC 0x1000
461 #define ACC_ANNOTATION 0x2000
462 #define ACC_ENUM 0x4000
463 
464 #define UNUSED_u2(x) { const u2 x = read_u2(); (void)x; } (void)0
465 
467 {
469 
470  u4 magic=read_u4();
471  UNUSED_u2(minor_version);
472  u2 major_version=read_u2();
473 
474  if(magic!=0xCAFEBABE)
475  {
476  error() << "wrong magic" << eom;
477  throw 0;
478  }
479 
480  if(major_version<44)
481  {
482  error() << "unexpected major version" << eom;
483  throw 0;
484  }
485 
486  rconstant_pool();
487 
488  classt &parsed_class=parse_tree.parsed_class;
489 
490  u2 access_flags=read_u2();
491  u2 this_class=read_u2();
492  u2 super_class=read_u2();
493 
494  parsed_class.is_abstract=(access_flags&ACC_ABSTRACT)!=0;
495  parsed_class.is_enum=(access_flags&ACC_ENUM)!=0;
496  parsed_class.is_public=(access_flags&ACC_PUBLIC)!=0;
497  parsed_class.is_protected=(access_flags&ACC_PROTECTED)!=0;
498  parsed_class.is_private=(access_flags&ACC_PRIVATE)!=0;
499  parsed_class.is_final = (access_flags & ACC_FINAL) != 0;
500  parsed_class.is_interface = (access_flags & ACC_INTERFACE) != 0;
501  parsed_class.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0;
502  parsed_class.is_annotation = (access_flags & ACC_ANNOTATION) != 0;
503  parsed_class.name=
504  constant(this_class).type().get(ID_C_base_name);
505 
506  if(super_class!=0)
507  parsed_class.super_class = constant(super_class).type().get(ID_C_base_name);
508 
509  rinterfaces(parsed_class);
510  rfields(parsed_class);
511  rmethods(parsed_class);
512 
513  // count elements of enum
514  if(parsed_class.is_enum)
515  for(fieldt &field : parse_tree.parsed_class.fields)
516  if(field.is_enum)
518 
519  u2 attributes_count=read_u2();
520 
521  for(std::size_t j=0; j<attributes_count; j++)
522  rclass_attribute(parsed_class);
523 
524  get_class_refs();
525 
527 }
528 
530 {
531  // Get the class references for the benefit of a dependency
532  // analysis.
533 
534  for(const auto &c : constant_pool)
535  {
536  switch(c.tag)
537  {
538  case CONSTANT_Class:
539  get_class_refs_rec(c.expr.type());
540  break;
541 
543  {
546  }
547  break;
548 
549  default: {}
550  }
551  }
552 
553  for(const auto &field : parse_tree.parsed_class.fields)
554  {
555  typet field_type;
556  if(field.signature.has_value())
557  {
559  field.descriptor,
560  field.signature,
562 
563  // add generic type args to class refs as dependencies, same below for
564  // method types and entries from the local variable type table
566  field_type, parse_tree.class_refs);
567  }
568  else
569  field_type=java_type_from_string(field.descriptor);
570 
571  get_class_refs_rec(field_type);
572  }
573 
574  for(const auto &method : parse_tree.parsed_class.methods)
575  {
576  typet method_type;
577  if(method.signature.has_value())
578  {
580  method.descriptor,
581  method.signature,
584  method_type, parse_tree.class_refs);
585  }
586  else
587  method_type=java_type_from_string(method.descriptor);
588 
589  get_class_refs_rec(method_type);
590  for(const auto &var : method.local_variable_table)
591  {
592  typet var_type;
593  if(var.signature.has_value())
594  {
596  var.descriptor,
597  var.signature,
600  var_type, parse_tree.class_refs);
601  }
602  else
603  var_type=java_type_from_string(var.descriptor);
604  get_class_refs_rec(var_type);
605  }
606  }
607 }
608 
610 {
611  if(src.id()==ID_code)
612  {
613  const java_method_typet &ct = to_java_method_type(src);
614  const typet &rt=ct.return_type();
615  get_class_refs_rec(rt);
616  for(const auto &p : ct.parameters())
617  get_class_refs_rec(p.type());
618  }
619  else if(src.id()==ID_symbol)
620  {
621  irep_idt name=src.get(ID_C_base_name);
622  if(has_prefix(id2string(name), "array["))
623  {
624  const typet &element_type =
625  static_cast<const typet &>(src.find(ID_element_type));
626  get_class_refs_rec(element_type);
627  }
628  else
629  parse_tree.class_refs.insert(name);
630  }
631  else if(src.id()==ID_struct)
632  {
633  const struct_typet &struct_type=to_struct_type(src);
634  for(const auto &c : struct_type.components())
635  get_class_refs_rec(c.type());
636  }
637  else if(src.id()==ID_pointer)
639 }
640 
642 {
643  u2 constant_pool_count=read_u2();
644  if(constant_pool_count==0)
645  {
646  error() << "invalid constant_pool_count" << eom;
647  throw 0;
648  }
649 
650  constant_pool.resize(constant_pool_count);
651 
652  for(constant_poolt::iterator
653  it=constant_pool.begin();
654  it!=constant_pool.end();
655  it++)
656  {
657  // the first entry isn't used
658  if(it==constant_pool.begin())
659  continue;
660 
661  it->tag=read_u1();
662 
663  switch(it->tag)
664  {
665  case CONSTANT_Class:
666  it->ref1=read_u2();
667  break;
668 
669  case CONSTANT_Fieldref:
670  case CONSTANT_Methodref:
674  it->ref1=read_u2();
675  it->ref2=read_u2();
676  break;
677 
678  case CONSTANT_String:
679  case CONSTANT_MethodType:
680  it->ref1=read_u2();
681  break;
682 
683  case CONSTANT_Integer:
684  case CONSTANT_Float:
685  it->number=read_u4();
686  break;
687 
688  case CONSTANT_Long:
689  case CONSTANT_Double:
690  it->number=read_u8();
691  // Eight-byte constants take up two entries
692  // in the constant_pool table, for annoying this programmer.
693  if(it==constant_pool.end())
694  {
695  error() << "invalid double entry" << eom;
696  throw 0;
697  }
698  it++;
699  it->tag=0;
700  break;
701 
702  case CONSTANT_Utf8:
703  {
704  u2 bytes=read_u2();
705  std::string s;
706  s.resize(bytes);
707  for(std::string::iterator s_it=s.begin(); s_it!=s.end(); s_it++)
708  *s_it=read_u1();
709  it->s=s; // hashes
710  }
711  break;
712 
714  it->ref1=read_u1();
715  it->ref2=read_u2();
716  break;
717 
718  default:
719  error() << "unknown constant pool entry (" << it->tag << ")"
720  << eom;
721  throw 0;
722  }
723  }
724 
725  // we do a bit of post-processing after we have them all
726  for(constant_poolt::iterator
727  it=constant_pool.begin();
728  it!=constant_pool.end();
729  it++)
730  {
731  // the first entry isn't used
732  if(it==constant_pool.begin())
733  continue;
734 
735  switch(it->tag)
736  {
737  case CONSTANT_Class:
738  {
739  const std::string &s=id2string(pool_entry(it->ref1).s);
740  it->expr=type_exprt(java_classname(s));
741  }
742  break;
743 
744  case CONSTANT_Fieldref:
745  {
746  const pool_entryt &nameandtype_entry=pool_entry(it->ref2);
747  const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
748  const pool_entryt &class_entry=pool_entry(it->ref1);
749  const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
750  typet type=type_entry(nameandtype_entry.ref2);
751 
752  symbol_typet class_symbol=
753  java_classname(id2string(class_name_entry.s));
754 
755  fieldref_exprt fieldref(
756  type, name_entry.s, class_symbol.get_identifier());
757 
758  it->expr=fieldref;
759  }
760  break;
761 
762  case CONSTANT_Methodref:
764  {
765  const pool_entryt &nameandtype_entry=pool_entry(it->ref2);
766  const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
767  const pool_entryt &class_entry=pool_entry(it->ref1);
768  const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
769  typet type=type_entry(nameandtype_entry.ref2);
770 
771  symbol_typet class_symbol=
772  java_classname(id2string(class_name_entry.s));
773 
774  irep_idt component_name=
775  id2string(name_entry.s)+
776  ":"+id2string(pool_entry(nameandtype_entry.ref2).s);
777 
778  irep_idt class_name=
779  class_symbol.get_identifier();
780 
781  irep_idt identifier=
782  id2string(class_name)+"."+id2string(component_name);
783 
784  exprt virtual_function(ID_virtual_function, type);
785  virtual_function.set(ID_component_name, component_name);
786  virtual_function.set(ID_C_class, class_name);
787  virtual_function.set(ID_C_base_name, name_entry.s);
788  virtual_function.set(ID_identifier, identifier);
789 
790  it->expr=virtual_function;
791  }
792  break;
793 
794  case CONSTANT_String:
795  {
796  // ldc turns these into references to java.lang.String
797  exprt string_literal(ID_java_string_literal);
798  string_literal.set(ID_value, pool_entry(it->ref1).s);
799  it->expr=string_literal;
800  }
801  break;
802 
803  case CONSTANT_Integer:
804  it->expr=from_integer(it->number, java_int_type());
805  break;
806 
807  case CONSTANT_Float:
808  {
810  value.unpack(it->number);
811  it->expr=value.to_expr();
812  }
813  break;
814 
815  case CONSTANT_Long:
816  it->expr=from_integer(it->number, java_long_type());
817  break;
818 
819  case CONSTANT_Double:
820  {
822  value.unpack(it->number);
823  it->expr=value.to_expr();
824  }
825  break;
826 
828  {
829  it->expr.id("nameandtype");
830  }
831  break;
832 
834  {
835  it->expr.id("methodhandle");
836  }
837  break;
838 
839  case CONSTANT_MethodType:
840  {
841  it->expr.id("methodtype");
842  }
843  break;
844 
846  {
847  it->expr.id("invokedynamic");
848  const pool_entryt &nameandtype_entry=pool_entry(it->ref2);
849  typet type=type_entry(nameandtype_entry.ref2);
850  type.set(ID_java_lambda_method_handle_index, it->ref1);
851  it->expr.type()=type;
852  }
853  break;
854 
855  default:{};
856  }
857  }
858 }
859 
861 {
862  u2 interfaces_count=read_u2();
863 
864  for(std::size_t i=0; i<interfaces_count; i++)
865  parsed_class.implements
866  .push_back(constant(read_u2()).type().get(ID_C_base_name));
867 }
868 
870 {
871  u2 fields_count=read_u2();
872 
873  for(std::size_t i=0; i<fields_count; i++)
874  {
875  fieldt &field=parsed_class.add_field();
876 
877  u2 access_flags=read_u2();
878  u2 name_index=read_u2();
879  u2 descriptor_index=read_u2();
880  u2 attributes_count=read_u2();
881 
882  field.name=pool_entry(name_index).s;
883  field.is_static=(access_flags&ACC_STATIC)!=0;
884  field.is_final=(access_flags&ACC_FINAL)!=0;
885  field.is_enum=(access_flags&ACC_ENUM)!=0;
886 
887  field.descriptor=id2string(pool_entry(descriptor_index).s);
888  field.is_public=(access_flags&ACC_PUBLIC)!=0;
889  field.is_protected=(access_flags&ACC_PROTECTED)!=0;
890  field.is_private=(access_flags&ACC_PRIVATE)!=0;
891  const auto flags = (field.is_public ? 1 : 0) +
892  (field.is_protected?1:0)+
893  (field.is_private?1:0);
894  DATA_INVARIANT(flags<=1, "at most one of public, protected, private");
895 
896  for(std::size_t j=0; j<attributes_count; j++)
897  rfield_attribute(field);
898  }
899 }
900 
901 #define T_BOOLEAN 4
902 #define T_CHAR 5
903 #define T_FLOAT 6
904 #define T_DOUBLE 7
905 #define T_BYTE 8
906 #define T_SHORT 9
907 #define T_INT 10
908 #define T_LONG 11
909 
911  methodt::instructionst &instructions)
912 {
913  u4 code_length=read_u4();
914 
915  u4 address;
916  size_t bytecode_index=0; // index of bytecode instruction
917 
918  for(address=0; address<code_length; address++)
919  {
920  bool wide_instruction=false;
921  u4 start_of_instruction=address;
922 
923  u1 bytecode=read_u1();
924 
925  if(bytecode==0xc4) // wide
926  {
927  wide_instruction=true;
928  address++;
929  bytecode=read_u1();
930  // The only valid instructions following a wide byte are
931  // [ifald]load, [ifald]store, ret and iinc
932  // All of these have either format of v, or V
933  INVARIANT(
934  bytecodes[bytecode].format == 'v' || bytecodes[bytecode].format == 'V',
935  "Unexpected wide instruction: " +
936  id2string(bytecodes[bytecode].mnemonic));
937  }
938 
939  instructions.push_back(instructiont());
940  instructiont &instruction=instructions.back();
941  instruction.statement=bytecodes[bytecode].mnemonic;
942  instruction.address=start_of_instruction;
943  instruction.source_location
944  .set_java_bytecode_index(std::to_string(bytecode_index));
945 
946  switch(bytecodes[bytecode].format)
947  {
948  case ' ': // no further bytes
949  break;
950 
951  case 'c': // a constant_pool index (one byte)
952  if(wide_instruction)
953  {
954  instruction.args.push_back(constant(read_u2()));
955  address+=2;
956  }
957  else
958  {
959  instruction.args.push_back(constant(read_u1()));
960  address+=1;
961  }
962  break;
963 
964  case 'C': // a constant_pool index (two bytes)
965  instruction.args.push_back(constant(read_u2()));
966  address+=2;
967  break;
968 
969  case 'b': // a signed byte
970  {
971  s1 c=read_u1();
972  instruction.args.push_back(from_integer(c, signedbv_typet(8)));
973  }
974  address+=1;
975  break;
976 
977  case 'o': // two byte branch offset, signed
978  {
979  s2 offset=read_u2();
980  // By converting the signed offset into an absolute address (by adding
981  // the current address) the number represented becomes unsigned.
982  instruction.args.push_back(
983  from_integer(address+offset, unsignedbv_typet(16)));
984  }
985  address+=2;
986  break;
987 
988  case 'O': // four byte branch offset, signed
989  {
990  s4 offset=read_u4();
991  // By converting the signed offset into an absolute address (by adding
992  // the current address) the number represented becomes unsigned.
993  instruction.args.push_back(
994  from_integer(address+offset, unsignedbv_typet(32)));
995  }
996  address+=4;
997  break;
998 
999  case 'v': // local variable index (one byte)
1000  {
1001  if(wide_instruction)
1002  {
1003  u2 v = read_u2();
1004  instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
1005  address += 2;
1006  }
1007  else
1008  {
1009  u1 v = read_u1();
1010  instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
1011  address += 1;
1012  }
1013  }
1014 
1015  break;
1016 
1017  case 'V':
1018  // local variable index (two bytes) plus two signed bytes
1019  if(wide_instruction)
1020  {
1021  u2 v=read_u2();
1022  instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
1023  s2 c=read_u2();
1024  instruction.args.push_back(from_integer(c, signedbv_typet(16)));
1025  address+=4;
1026  }
1027  else // local variable index (one byte) plus one signed byte
1028  {
1029  u1 v=read_u1();
1030  instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
1031  s1 c=read_u1();
1032  instruction.args.push_back(from_integer(c, signedbv_typet(8)));
1033  address+=2;
1034  }
1035  break;
1036 
1037  case 'I': // two byte constant_pool index plus two bytes
1038  {
1039  u2 c=read_u2();
1040  instruction.args.push_back(constant(c));
1041  u1 b1=read_u1();
1042  instruction.args.push_back(from_integer(b1, unsignedbv_typet(8)));
1043  u1 b2=read_u1();
1044  instruction.args.push_back(from_integer(b2, unsignedbv_typet(8)));
1045  }
1046  address+=4;
1047  break;
1048 
1049  case 'L': // lookupswitch
1050  {
1051  u4 base_offset=address;
1052 
1053  // first a pad to 32-bit align
1054  while(((address+1)&3)!=0) { read_u1(); address++; }
1055 
1056  // now default value
1057  s4 default_value=read_u4();
1058  // By converting the signed offset into an absolute address (by adding
1059  // the current address) the number represented becomes unsigned.
1060  instruction.args.push_back(
1061  from_integer(base_offset+default_value, unsignedbv_typet(32)));
1062  address+=4;
1063 
1064  // number of pairs
1065  u4 npairs=read_u4();
1066  address+=4;
1067 
1068  for(std::size_t i=0; i<npairs; i++)
1069  {
1070  s4 match=read_u4();
1071  s4 offset=read_u4();
1072  instruction.args.push_back(
1073  from_integer(match, signedbv_typet(32)));
1074  // By converting the signed offset into an absolute address (by adding
1075  // the current address) the number represented becomes unsigned.
1076  instruction.args.push_back(
1077  from_integer(base_offset+offset, unsignedbv_typet(32)));
1078  address+=8;
1079  }
1080  }
1081  break;
1082 
1083  case 'T': // tableswitch
1084  {
1085  size_t base_offset=address;
1086 
1087  // first a pad to 32-bit align
1088  while(((address+1)&3)!=0) { read_u1(); address++; }
1089 
1090  // now default value
1091  s4 default_value=read_u4();
1092  instruction.args.push_back(
1093  from_integer(base_offset+default_value, signedbv_typet(32)));
1094  address+=4;
1095 
1096  // now low value
1097  s4 low_value=read_u4();
1098  address+=4;
1099 
1100  // now high value
1101  s4 high_value=read_u4();
1102  address+=4;
1103 
1104  // there are high-low+1 offsets, and they are signed
1105  for(s4 i=low_value; i<=high_value; i++)
1106  {
1107  s4 offset=read_u4();
1108  instruction.args.push_back(from_integer(i, signedbv_typet(32)));
1109  // By converting the signed offset into an absolute address (by adding
1110  // the current address) the number represented becomes unsigned.
1111  instruction.args.push_back(
1112  from_integer(base_offset+offset, unsignedbv_typet(32)));
1113  address+=4;
1114  }
1115  }
1116  break;
1117 
1118  case 'm': // multianewarray: constant-pool index plus one unsigned byte
1119  {
1120  u2 c=read_u2(); // constant-pool index
1121  instruction.args.push_back(constant(c));
1122  u1 dimensions=read_u1(); // number of dimensions
1123  instruction.args.push_back(
1124  from_integer(dimensions, unsignedbv_typet(8)));
1125  address+=3;
1126  }
1127  break;
1128 
1129  case 't': // array subtype, one byte
1130  {
1131  typet t;
1132  switch(read_u1())
1133  {
1134  case T_BOOLEAN: t.id(ID_bool); break;
1135  case T_CHAR: t.id(ID_char); break;
1136  case T_FLOAT: t.id(ID_float); break;
1137  case T_DOUBLE: t.id(ID_double); break;
1138  case T_BYTE: t.id(ID_byte); break;
1139  case T_SHORT: t.id(ID_short); break;
1140  case T_INT: t.id(ID_int); break;
1141  case T_LONG: t.id(ID_long); break;
1142  default:{};
1143  }
1144  instruction.args.push_back(type_exprt(t));
1145  }
1146  address+=1;
1147  break;
1148 
1149  case 's': // a signed short
1150  {
1151  s2 s=read_u2();
1152  instruction.args.push_back(from_integer(s, signedbv_typet(16)));
1153  }
1154  address+=2;
1155  break;
1156 
1157  default:
1158  throw "unknown JVM bytecode instruction";
1159  }
1160  bytecode_index++;
1161  }
1162 
1163  if(address!=code_length)
1164  {
1165  error() << "bytecode length mismatch" << eom;
1166  throw 0;
1167  }
1168 }
1169 
1171 {
1172  u2 attribute_name_index=read_u2();
1173  u4 attribute_length=read_u4();
1174 
1175  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1176 
1177  if(attribute_name=="Code")
1178  {
1179  UNUSED_u2(max_stack);
1180  UNUSED_u2(max_locals);
1181 
1182  rbytecode(method.instructions);
1183 
1184  u2 exception_table_length=read_u2();
1185  method.exception_table.resize(exception_table_length);
1186 
1187  for(std::size_t e=0; e<exception_table_length; e++)
1188  {
1189  u2 start_pc=read_u2();
1190  u2 end_pc=read_u2();
1191 
1192  // from the class file format spec ("4.7.3. The Code Attribute" for Java8)
1193  INVARIANT(
1194  start_pc < end_pc,
1195  "The start_pc must be less than the end_pc as this is the range the "
1196  "exception is active");
1197 
1198  u2 handler_pc=read_u2();
1199  u2 catch_type=read_u2();
1200  method.exception_table[e].start_pc=start_pc;
1201  method.exception_table[e].end_pc=end_pc;
1202  method.exception_table[e].handler_pc=handler_pc;
1203  if(catch_type!=0)
1204  method.exception_table[e].catch_type=
1205  to_symbol_type(pool_entry(catch_type).expr.type());
1206  }
1207 
1208  u2 attributes_count=read_u2();
1209 
1210  for(std::size_t j=0; j<attributes_count; j++)
1211  rcode_attribute(method);
1212 
1213  irep_idt line_number;
1214 
1215  // add missing line numbers
1216  for(methodt::instructionst::iterator
1217  it=method.instructions.begin();
1218  it!=method.instructions.end();
1219  it++)
1220  {
1221  if(!it->source_location.get_line().empty())
1222  line_number=it->source_location.get_line();
1223  else if(!line_number.empty())
1224  it->source_location.set_line(line_number);
1225  it->source_location
1226  .set_function(
1227  "java::"+id2string(parse_tree.parsed_class.name)+"."+
1228  id2string(method.name)+":"+method.descriptor);
1229  }
1230 
1231  // line number of method
1232  if(!method.instructions.empty())
1233  method.source_location.set_line(
1234  method.instructions.begin()->source_location.get_line());
1235  }
1236  else if(attribute_name=="Signature")
1237  {
1238  u2 signature_index=read_u2();
1239  method.signature=id2string(pool_entry(signature_index).s);
1240  }
1241  else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1242  attribute_name=="RuntimeVisibleAnnotations")
1243  {
1245  }
1246  else if(attribute_name == "Exceptions")
1247  {
1249  }
1250  else
1251  skip_bytes(attribute_length);
1252 }
1253 
1255 {
1256  u2 attribute_name_index=read_u2();
1257  u4 attribute_length=read_u4();
1258 
1259  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1260 
1261  if(attribute_name=="Signature")
1262  {
1263  u2 signature_index=read_u2();
1264  field.signature=id2string(pool_entry(signature_index).s);
1265  }
1266  else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1267  attribute_name=="RuntimeVisibleAnnotations")
1268  {
1270  }
1271  else
1272  skip_bytes(attribute_length);
1273 }
1274 
1276 {
1277  u2 attribute_name_index=read_u2();
1278  u4 attribute_length=read_u4();
1279 
1280  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1281 
1282  if(attribute_name=="LineNumberTable")
1283  {
1284  // address -> instructiont
1285  typedef std::map<unsigned,
1286  methodt::instructionst::iterator> instruction_mapt;
1287  instruction_mapt instruction_map;
1288 
1289  for(methodt::instructionst::iterator
1290  it=method.instructions.begin();
1291  it!=method.instructions.end();
1292  it++)
1293  {
1294  instruction_map[it->address]=it;
1295  }
1296 
1297  u2 line_number_table_length=read_u2();
1298 
1299  for(std::size_t i=0; i<line_number_table_length; i++)
1300  {
1301  u2 start_pc=read_u2();
1302  u2 line_number=read_u2();
1303 
1304  // annotate the bytecode program
1305  instruction_mapt::const_iterator it=
1306  instruction_map.find(start_pc);
1307 
1308  if(it!=instruction_map.end())
1309  it->second->source_location.set_line(line_number);
1310  }
1311  }
1312  else if(attribute_name=="LocalVariableTable")
1313  {
1314  u2 local_variable_table_length=read_u2();
1315 
1316  method.local_variable_table.resize(local_variable_table_length);
1317 
1318  for(std::size_t i=0; i<local_variable_table_length; i++)
1319  {
1320  u2 start_pc=read_u2();
1321  u2 length=read_u2();
1322  u2 name_index=read_u2();
1323  u2 descriptor_index=read_u2();
1324  u2 index=read_u2();
1325 
1326  method.local_variable_table[i].index=index;
1327  method.local_variable_table[i].name=pool_entry(name_index).s;
1328  method.local_variable_table[i].descriptor=
1329  id2string(pool_entry(descriptor_index).s);
1330  method.local_variable_table[i].start_pc=start_pc;
1331  method.local_variable_table[i].length=length;
1332  }
1333  }
1334  else if(attribute_name=="LocalVariableTypeTable")
1335  {
1337  }
1338  else if(attribute_name=="StackMapTable")
1339  {
1340  u2 stack_map_entries=read_u2();
1341 
1342  method.stack_map_table.resize(stack_map_entries);
1343 
1344  for(size_t i=0; i<stack_map_entries; i++)
1345  {
1346  u1 frame_type=read_u1();
1347  if(frame_type<=63)
1348  {
1350  method.stack_map_table[i].locals.resize(0);
1351  method.stack_map_table[i].stack.resize(0);
1352  }
1353  else if(64<=frame_type && frame_type<=127)
1354  {
1355  method.stack_map_table[i].type=
1357  method.stack_map_table[i].locals.resize(0);
1358  method.stack_map_table[i].stack.resize(1);
1359  methodt::verification_type_infot verification_type_info;
1360  read_verification_type_info(verification_type_info);
1361  method.stack_map_table[i].stack[0]=verification_type_info;
1362  }
1363  else if(frame_type==247)
1364  {
1365  method.stack_map_table[i].type=
1367  method.stack_map_table[i].locals.resize(0);
1368  method.stack_map_table[i].stack.resize(1);
1369  methodt::verification_type_infot verification_type_info;
1370  u2 offset_delta=read_u2();
1371  read_verification_type_info(verification_type_info);
1372  method.stack_map_table[i].stack[0]=verification_type_info;
1373  method.stack_map_table[i].offset_delta=offset_delta;
1374  }
1375  else if(248<=frame_type && frame_type<=250)
1376  {
1378  method.stack_map_table[i].locals.resize(0);
1379  method.stack_map_table[i].stack.resize(0);
1380  u2 offset_delta=read_u2();
1381  method.stack_map_table[i].offset_delta=offset_delta;
1382  }
1383  else if(frame_type==251)
1384  {
1385  method.stack_map_table[i].type
1387  method.stack_map_table[i].locals.resize(0);
1388  method.stack_map_table[i].stack.resize(0);
1389  u2 offset_delta=read_u2();
1390  method.stack_map_table[i].offset_delta=offset_delta;
1391  }
1392  else if(252<=frame_type && frame_type<=254)
1393  {
1394  size_t new_locals=(size_t) (frame_type-251);
1396  method.stack_map_table[i].locals.resize(new_locals);
1397  method.stack_map_table[i].stack.resize(0);
1398  u2 offset_delta=read_u2();
1399  method.stack_map_table[i].offset_delta=offset_delta;
1400  for(size_t k=0; k<new_locals; k++)
1401  {
1402  method.stack_map_table[i].locals
1403  .push_back(methodt::verification_type_infot());
1405  method.stack_map_table[i].locals.back();
1407  }
1408  }
1409  else if(frame_type==255)
1410  {
1412  u2 offset_delta=read_u2();
1413  method.stack_map_table[i].offset_delta=offset_delta;
1414  u2 number_locals=read_u2();
1415  method.stack_map_table[i].locals.resize(number_locals);
1416  for(size_t k=0; k<(size_t) number_locals; k++)
1417  {
1418  method.stack_map_table[i].locals
1419  .push_back(methodt::verification_type_infot());
1421  method.stack_map_table[i].locals.back();
1423  }
1424  u2 number_stack_items=read_u2();
1425  method.stack_map_table[i].stack.resize(number_stack_items);
1426  for(size_t k=0; k<(size_t) number_stack_items; k++)
1427  {
1428  method.stack_map_table[i].stack
1429  .push_back(methodt::verification_type_infot());
1431  method.stack_map_table[i].stack.back();
1433  }
1434  }
1435  else
1436  throw "error: unknown stack frame type encountered";
1437  }
1438  }
1439  else
1440  skip_bytes(attribute_length);
1441 }
1442 
1445 {
1446  u1 tag=read_u1();
1447  switch(tag)
1448  {
1449  case VTYPE_INFO_TOP:
1451  break;
1452  case VTYPE_INFO_INTEGER:
1454  break;
1455  case VTYPE_INFO_FLOAT:
1457  break;
1458  case VTYPE_INFO_LONG:
1460  break;
1461  case VTYPE_INFO_DOUBLE:
1463  break;
1464  case VTYPE_INFO_ITEM_NULL:
1466  break;
1469  break;
1470  case VTYPE_INFO_OBJECT:
1472  v.cpool_index=read_u2();
1473  break;
1474  case VTYPE_INFO_UNINIT:
1476  v.offset=read_u2();
1477  break;
1478  default:
1479  throw "error: unknown verification type info encountered";
1480  }
1481 }
1482 
1484  annotationst &annotations)
1485 {
1486  u2 num_annotations=read_u2();
1487 
1488  for(u2 number=0; number<num_annotations; number++)
1489  {
1490  annotationt annotation;
1491  rRuntimeAnnotation(annotation);
1492  annotations.push_back(annotation);
1493  }
1494 }
1495 
1497  annotationt &annotation)
1498 {
1499  u2 type_index=read_u2();
1500  annotation.type=type_entry(type_index);
1502 }
1503 
1505  annotationt::element_value_pairst &element_value_pairs)
1506 {
1507  u2 num_element_value_pairs=read_u2();
1508  element_value_pairs.resize(num_element_value_pairs);
1509 
1510  for(auto &element_value_pair : element_value_pairs)
1511  {
1512  u2 element_name_index=read_u2();
1513  element_value_pair.element_name=pool_entry(element_name_index).s;
1514 
1515  relement_value_pair(element_value_pair);
1516  }
1517 }
1518 
1523  annotationt::element_value_pairt &element_value_pair)
1524 {
1525  u1 tag=read_u1();
1526 
1527  switch(tag)
1528  {
1529  case 'e':
1530  {
1531  UNUSED_u2(type_name_index);
1532  UNUSED_u2(const_name_index);
1533  // todo: enum
1534  }
1535  break;
1536 
1537  case 'c':
1538  {
1539  u2 class_info_index = read_u2();
1540  element_value_pair.value = symbol_exprt(pool_entry(class_info_index).s);
1541  }
1542  break;
1543 
1544  case '@':
1545  {
1546  // another annotation, recursively
1547  annotationt annotation;
1548  rRuntimeAnnotation(annotation);
1549  }
1550  break;
1551 
1552  case '[':
1553  {
1554  u2 num_values=read_u2();
1555  for(std::size_t i=0; i<num_values; i++)
1556  {
1557  annotationt::element_value_pairt element_value;
1558  relement_value_pair(element_value); // recursive call
1559  }
1560  }
1561  break;
1562 
1563  case 's':
1564  {
1565  u2 const_value_index=read_u2();
1566  element_value_pair.value=string_constantt(
1567  pool_entry(const_value_index).s);
1568  }
1569  break;
1570 
1571  default:
1572  {
1573  u2 const_value_index=read_u2();
1574  element_value_pair.value=constant(const_value_index);
1575  }
1576 
1577  break;
1578  }
1579 }
1580 
1593  classt &parsed_class,
1594  const u4 &attribute_length)
1595 {
1596  std::string name = parsed_class.name.c_str();
1597  u2 number_of_classes = read_u2();
1598  u4 number_of_bytes_to_be_read = number_of_classes * 8 + 2;
1599  INVARIANT(
1600  number_of_bytes_to_be_read == attribute_length,
1601  "The number of bytes to be read for the InnerClasses attribute does not "
1602  "match the attribute length.");
1603 
1604  const auto pool_entry_lambda = [this](u2 index) -> pool_entryt & {
1605  return pool_entry(index);
1606  };
1607  const auto remove_separator_char = [](std::string str, char ch) {
1608  str.erase(std::remove(str.begin(), str.end(), ch), str.end());
1609  return str;
1610  };
1611 
1612  for(int i = 0; i < number_of_classes; i++)
1613  {
1614  u2 inner_class_info_index = read_u2();
1615  u2 outer_class_info_index = read_u2();
1616  u2 inner_name_index = read_u2();
1617  u2 inner_class_access_flags = read_u2();
1618 
1619  std::string inner_class_info_name =
1620  class_infot(pool_entry(inner_class_info_index))
1621  .get_name(pool_entry_lambda);
1622  bool is_private = (inner_class_access_flags & ACC_PRIVATE) != 0;
1623  bool is_public = (inner_class_access_flags & ACC_PUBLIC) != 0;
1624  bool is_protected = (inner_class_access_flags & ACC_PROTECTED) != 0;
1625  bool is_static = (inner_class_access_flags & ACC_STATIC) != 0;
1626 
1627  // If the original parsed class name matches the inner class name,
1628  // the parsed class is an inner class, so overwrite the parsed class'
1629  // access information and mark it as an inner class.
1630  bool is_inner_class = remove_separator_char(id2string(parsed_class.name), '.') ==
1631  remove_separator_char(inner_class_info_name, '/');
1632  if(is_inner_class)
1633  parsed_class.is_inner_class = is_inner_class;
1634  if(!is_inner_class)
1635  continue;
1636  parsed_class.is_static_class = is_static;
1637  // This is a marker that a class is anonymous.
1638  if(inner_name_index == 0)
1639  parsed_class.is_anonymous_class = true;
1640  // Note that if outer_class_info_index == 0, the inner class is an anonymous
1641  // or local class, and is treated as private.
1642  if(outer_class_info_index == 0)
1643  {
1644  parsed_class.is_private = true;
1645  parsed_class.is_protected = false;
1646  parsed_class.is_public = false;
1647  }
1648  else
1649  {
1650  std::string outer_class_info_name =
1651  class_infot(pool_entry(outer_class_info_index))
1652  .get_name(pool_entry_lambda);
1653  parsed_class.outer_class =
1654  constant(outer_class_info_index).type().get(ID_C_base_name);
1655  parsed_class.is_private = is_private;
1656  parsed_class.is_protected = is_protected;
1657  parsed_class.is_public = is_public;
1658  }
1659  }
1660 }
1661 
1668 {
1669  u2 number_of_exceptions = read_u2();
1670 
1671  std::vector<irep_idt> exceptions;
1672  for(size_t i = 0; i < number_of_exceptions; i++)
1673  {
1674  u2 exception_index_table = read_u2();
1675  const irep_idt exception_name =
1676  constant(exception_index_table).type().get(ID_C_base_name);
1677  exceptions.push_back(exception_name);
1678  }
1679  return exceptions;
1680 }
1681 
1683 {
1684  u2 attribute_name_index=read_u2();
1685  u4 attribute_length=read_u4();
1686 
1687  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1688 
1689  if(attribute_name=="SourceFile")
1690  {
1691  u2 sourcefile_index=read_u2();
1692  irep_idt sourcefile_name;
1693 
1694  std::string fqn(id2string(parsed_class.name));
1695  size_t last_index=fqn.find_last_of(".");
1696  if(last_index==std::string::npos)
1697  sourcefile_name=pool_entry(sourcefile_index).s;
1698  else
1699  {
1700  std::string package_name=fqn.substr(0, last_index+1);
1701  std::replace(package_name.begin(), package_name.end(), '.', '/');
1702  const std::string &full_file_name=
1703  package_name+id2string(pool_entry(sourcefile_index).s);
1704  sourcefile_name=full_file_name;
1705  }
1706 
1707  for(methodst::iterator m_it=parsed_class.methods.begin();
1708  m_it!=parsed_class.methods.end();
1709  m_it++)
1710  {
1711  m_it->source_location.set_file(sourcefile_name);
1712  for(instructionst::iterator i_it=m_it->instructions.begin();
1713  i_it!=m_it->instructions.end();
1714  i_it++)
1715  {
1716  if(!i_it->source_location.get_line().empty())
1717  i_it->source_location.set_file(sourcefile_name);
1718  }
1719  }
1720  }
1721  else if(attribute_name=="Signature")
1722  {
1723  u2 signature_index=read_u2();
1724  parsed_class.signature=id2string(pool_entry(signature_index).s);
1726  parsed_class.signature.value(),
1728  }
1729  else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1730  attribute_name=="RuntimeVisibleAnnotations")
1731  {
1733  }
1734  else if(attribute_name == "BootstrapMethods")
1735  {
1736  // for this attribute
1737  // cf. https://docs.oracle.com/javase/specs/jvms/se8/html/jvms-4.html#jvms-4.7.23
1738  INVARIANT(
1739  !parsed_class.attribute_bootstrapmethods_read,
1740  "only one BootstrapMethods argument is allowed in a class file");
1741 
1742  // mark as read in parsed class
1743  parsed_class.attribute_bootstrapmethods_read = true;
1744  read_bootstrapmethods_entry(parsed_class);
1745  }
1746  else if(attribute_name == "InnerClasses")
1747  {
1749  parsed_class, attribute_length);
1750  }
1751  else
1752  skip_bytes(attribute_length);
1753 }
1754 
1756 {
1757  u2 methods_count=read_u2();
1758 
1759  for(std::size_t j=0; j<methods_count; j++)
1760  rmethod(parsed_class);
1761 }
1762 
1763 #define ACC_PUBLIC 0x0001
1764 #define ACC_PRIVATE 0x0002
1765 #define ACC_PROTECTED 0x0004
1766 #define ACC_STATIC 0x0008
1767 #define ACC_FINAL 0x0010
1768 #define ACC_SUPER 0x0020
1769 #define ACC_VOLATILE 0x0040
1770 #define ACC_TRANSIENT 0x0080
1771 #define ACC_INTERFACE 0x0200
1772 #define ACC_ABSTRACT 0x0400
1773 #define ACC_SYNTHETIC 0x1000
1774 #define ACC_ANNOTATION 0x2000
1775 #define ACC_ENUM 0x4000
1776 
1778 {
1779  methodt &method=parsed_class.add_method();
1780 
1781  u2 access_flags=read_u2();
1782  u2 name_index=read_u2();
1783  u2 descriptor_index=read_u2();
1784 
1785  method.is_final=(access_flags&ACC_FINAL)!=0;
1786  method.is_static=(access_flags&ACC_STATIC)!=0;
1787  method.is_abstract=(access_flags&ACC_ABSTRACT)!=0;
1788  method.is_public=(access_flags&ACC_PUBLIC)!=0;
1789  method.is_protected=(access_flags&ACC_PROTECTED)!=0;
1790  method.is_private=(access_flags&ACC_PRIVATE)!=0;
1791  method.is_synchronized=(access_flags&ACC_SYNCHRONIZED)!=0;
1792  method.is_native=(access_flags&ACC_NATIVE)!=0;
1793  method.is_bridge = (access_flags & ACC_BRIDGE) != 0;
1794  method.name=pool_entry(name_index).s;
1795  method.base_name=pool_entry(name_index).s;
1796  method.descriptor=id2string(pool_entry(descriptor_index).s);
1797 
1798  const auto flags = (method.is_public ? 1 : 0) +
1799  (method.is_protected?1:0)+
1800  (method.is_private?1:0);
1801  DATA_INVARIANT(flags<=1, "at most one of public, protected, private");
1802  u2 attributes_count=read_u2();
1803 
1804  for(std::size_t j=0; j<attributes_count; j++)
1805  rmethod_attribute(method);
1806 }
1807 
1810 {
1811  java_bytecode_parsert java_bytecode_parser;
1812  java_bytecode_parser.in=&istream;
1813  java_bytecode_parser.set_message_handler(message_handler);
1814 
1815  bool parser_result=java_bytecode_parser.parse();
1816 
1817  if(parser_result)
1818  {
1819  return {};
1820  }
1821 
1822  return std::move(java_bytecode_parser.parse_tree);
1823 }
1824 
1827 {
1828  std::ifstream in(file, std::ios::binary);
1829 
1830  if(!in)
1831  {
1832  messaget message(message_handler);
1833  message.error() << "failed to open input file `"
1834  << file << '\'' << messaget::eom;
1835  return {};
1836  }
1837 
1839 }
1840 
1845 {
1846  u2 local_variable_type_table_length=read_u2();
1847 
1848  INVARIANT(
1849  local_variable_type_table_length<=method.local_variable_table.size(),
1850  "Local variable type table cannot have more elements "
1851  "than the local variable table.");
1852  for(std::size_t i=0; i<local_variable_type_table_length; i++)
1853  {
1854  u2 start_pc=read_u2();
1855  u2 length=read_u2();
1856  u2 name_index=read_u2();
1857  u2 signature_index=read_u2();
1858  u2 index=read_u2();
1859 
1860  bool found=false;
1861  for(auto &lvar : method.local_variable_table)
1862  {
1863  // compare to entry in LVT
1864  if(lvar.index==index &&
1865  lvar.name==pool_entry(name_index).s &&
1866  lvar.start_pc==start_pc &&
1867  lvar.length==length)
1868  {
1869  found=true;
1870  lvar.signature=id2string(pool_entry(signature_index).s);
1871  break;
1872  }
1873  }
1874  INVARIANT(
1875  found,
1876  "Entry in LocalVariableTypeTable must be present in LVT");
1877  }
1878 }
1879 
1887 {
1888  const std::function<pool_entryt &(u2)> pool_entry_lambda =
1889  [this](u2 index) -> pool_entryt & { return pool_entry(index); };
1890 
1891  const base_ref_infot &ref_entry = entry.get_reference(pool_entry_lambda);
1892 
1893  const class_infot &class_entry = ref_entry.get_class(pool_entry_lambda);
1894  const name_and_type_infot &name_and_type =
1895  ref_entry.get_name_and_type(pool_entry_lambda);
1896 
1897  std::string class_name = class_entry.get_name(pool_entry_lambda);
1898  // replace '.' for '$' (inner classes)
1899  std::replace(class_name.begin(), class_name.end(), '.', '$');
1900  // replace '/' for '.' (package)
1901  std::replace(class_name.begin(), class_name.end(), '/', '.');
1902  const std::string method_ref =
1903  class_name + "." + name_and_type.get_name(pool_entry_lambda) + ':' +
1904  name_and_type.get_descriptor(pool_entry_lambda);
1905 
1906  lambda_method_handlet lambda_method_handle;
1907 
1908  if(has_prefix(name_and_type.get_name(pool_entry_lambda), "lambda$"))
1909  {
1910  // names seem to be lambda$POSTFIX$NUM
1911  // where POSTFIX is FUN for a function name in which the lambda is define
1912  // "static" when it is a static member of the class
1913  // "new" when it is a class variable, instantiated in <init>
1914  lambda_method_handle.lambda_method_name =
1915  name_and_type.get_name(pool_entry_lambda);
1916  lambda_method_handle.lambda_method_ref = method_ref;
1917  lambda_method_handle.handle_type =
1919 
1920  return lambda_method_handle;
1921  }
1922 
1923  return {};
1924 }
1925 
1930 {
1931  u2 num_bootstrap_methods = read_u2();
1932  for(size_t bootstrap_method_index = 0;
1933  bootstrap_method_index < num_bootstrap_methods;
1934  ++bootstrap_method_index)
1935  {
1936  u2 bootstrap_methodhandle_ref = read_u2();
1937  const pool_entryt &entry = pool_entry(bootstrap_methodhandle_ref);
1938 
1939  method_handle_infot method_handle{entry};
1940 
1941  u2 num_bootstrap_arguments = read_u2();
1942  debug() << "INFO: parse BootstrapMethod handle " << num_bootstrap_arguments
1943  << " #args" << eom;
1944 
1945  // read u2 values of entry into vector
1946  u2_valuest u2_values(num_bootstrap_arguments);
1947  for(size_t i = 0; i < num_bootstrap_arguments; i++)
1948  u2_values[i] = read_u2();
1949 
1950  // try parsing bootstrap method handle
1951  // each entry contains a MethodHandle structure
1952  // u2 tag
1953  // u2 reference kind which must be in the range from 1 to 9
1954  // u2 reference index into the constant pool
1955  //
1956  // reference kinds use the following
1957  // 1 to 4 must point to a CONSTANT_Fieldref structure
1958  // 5 or 8 must point to a CONSTANT_Methodref structure
1959  // 6 or 7 must point to a CONSTANT_Methodref or
1960  // CONSTANT_InterfaceMethodref structure, if the class file version
1961  // number is 52.0 or above, to a CONSTANT_Methodref only in the case
1962  // of less than 52.0
1963  // 9 must point to a CONSTANT_InterfaceMethodref structure
1964 
1965  // the index must point to a CONSTANT_String
1966  // CONSTANT_Class
1967  // CONSTANT_Integer
1968  // CONSTANT_Long
1969  // CONSTANT_Float
1970  // CONSTANT_Double
1971  // CONSTANT_MethodHandle
1972  // CONSTANT_MethodType
1973 
1974  // We read the three arguments here to see whether they correspond to
1975  // our hypotheses for this being a lambda function entry.
1976 
1977  // Need at least 3 arguments, the interface type, the method hanlde
1978  // and the method_type, otherwise it doesn't look like a call that we
1979  // understand
1980  if(num_bootstrap_arguments < 3)
1981  {
1983  parsed_class, bootstrap_method_index, std::move(u2_values));
1984  debug()
1985  << "format of BootstrapMethods entry not recognized: too few arguments"
1986  << eom;
1987  continue;
1988  }
1989 
1990  u2 interface_type_index = u2_values[0];
1991  u2 method_handle_index = u2_values[1];
1992  u2 method_type_index = u2_values[2];
1993 
1994  // The additional arguments for the altmetafactory call are skipped,
1995  // as they are currently not used. We verify though that they are of
1996  // CONSTANT_Integer type, cases where this does not hold will be
1997  // analyzed further.
1998  bool recognized = true;
1999  for(size_t i = 3; i < num_bootstrap_arguments; i++)
2000  {
2001  u2 skipped_argument = u2_values[i];
2002  recognized &= pool_entry(skipped_argument).tag == CONSTANT_Integer;
2003  }
2004 
2005  if(!recognized)
2006  {
2007  debug() << "format of BootstrapMethods entry not recognized: extra "
2008  "arguments of wrong type"
2009  << eom;
2011  parsed_class, bootstrap_method_index, std::move(u2_values));
2012  continue;
2013  }
2014 
2015  const pool_entryt &interface_type_argument =
2016  pool_entry(interface_type_index);
2017  const pool_entryt &method_handle_argument = pool_entry(method_handle_index);
2018  const pool_entryt &method_type_argument = pool_entry(method_type_index);
2019 
2020  if(
2021  interface_type_argument.tag != CONSTANT_MethodType ||
2022  method_handle_argument.tag != CONSTANT_MethodHandle ||
2023  method_type_argument.tag != CONSTANT_MethodType)
2024  {
2025  debug() << "format of BootstrapMethods entry not recognized: arguments "
2026  "wrong type"
2027  << eom;
2029  parsed_class, bootstrap_method_index, std::move(u2_values));
2030  continue;
2031  }
2032 
2033  debug() << "INFO: parse lambda handle" << eom;
2034  optionalt<lambda_method_handlet> lambda_method_handle =
2035  parse_method_handle(method_handle_infot{method_handle_argument});
2036 
2037  if(!lambda_method_handle.has_value())
2038  {
2039  debug() << "format of BootstrapMethods entry not recognized: method "
2040  "handle not recognised"
2041  << eom;
2043  parsed_class, bootstrap_method_index, std::move(u2_values));
2044  continue;
2045  }
2046 
2047  // If parse_method_handle can't parse the lambda method, it should return {}
2048  POSTCONDITION(
2049  lambda_method_handle->handle_type != method_handle_typet::UNKNOWN_HANDLE);
2050 
2051  lambda_method_handle->interface_type =
2052  pool_entry(interface_type_argument.ref1).s;
2053  lambda_method_handle->method_type = pool_entry(method_type_argument.ref1).s;
2054  lambda_method_handle->u2_values = std::move(u2_values);
2055  debug() << "lambda function reference "
2056  << id2string(lambda_method_handle->lambda_method_name)
2057  << " in class \"" << parsed_class.name << "\""
2058  << "\n interface type is "
2059  << id2string(pool_entry(interface_type_argument.ref1).s)
2060  << "\n method type is "
2061  << id2string(pool_entry(method_type_argument.ref1).s) << eom;
2062  parsed_class.add_method_handle(
2063  bootstrap_method_index, *lambda_method_handle);
2064  }
2065 }
2066 
2073  java_bytecode_parsert::classt &parsed_class,
2074  size_t bootstrap_method_index,
2075  java_bytecode_parsert::u2_valuest u2_values) const
2076 {
2077  const lambda_method_handlet lambda_method_handle =
2079  parsed_class.add_method_handle(bootstrap_method_index, lambda_method_handle);
2080 }
#define VTYPE_INFO_OBJECT
The type of an expression.
Definition: type.h:22
java_bytecode_parse_treet::classt::lambda_method_handlet lambda_method_handlet
Fixed-width bit-vector with unsigned binary interpretation.
Definition: std_types.h:1229
void rinterfaces(classt &parsed_class)
void store_unknown_method_handle(classt &parsed_class, size_t bootstrap_method_index, u2_valuest u2_values) const
Creates an unknown method handle and puts it into the parsed_class.
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
void set_java_bytecode_index(const irep_idt &index)
#define ACC_NATIVE
#define UNUSED_u2(x)
static lambda_method_handlet create_unknown_handle(const u2_valuest params)
#define VTYPE_INFO_LONG
void get_dependencies_from_generic_parameters(const std::string &signature, std::set< irep_idt > &refs)
Collect information about generic type parameters from a given signature.
Definition: java_types.cpp:847
java_bytecode_parse_treet::classt::u2_valuest u2_valuest
void rRuntimeAnnotation_attribute(annotationst &)
uint64_t u8
Definition: bytecode_info.h:58
#define CONSTANT_Methodref
constant_exprt to_expr() const
Definition: ieee_float.cpp:686
#define CONSTANT_MethodType
void rinner_classes_attribute(classt &parsed_class, const u4 &attribute_length)
Corresponds to the element_value structure Described in Java 8 specification 4.7.6 https://docs...
std::vector< instructiont > instructionst
pool_entryt & pool_entry(u2 index)
void rmethod(classt &parsed_class)
std::istream * in
Definition: parser.h:26
method_handle_kindt
Correspond to the different valid values for field reference_kind From Java 8 spec 4...
#define ACC_PROTECTED
typet java_int_type()
Definition: java_types.cpp:32
java_bytecode_parse_treet::annotationst annotationst
void rmethods(classt &parsed_class)
#define CONSTANT_InterfaceMethodref
struct bytecode_infot const bytecode_info[]
const symbol_typet & to_symbol_type(const typet &type)
Cast a generic typet to a symbol_typet.
Definition: std_types.h:139
An expression denoting a type.
Definition: std_expr.h:4408
java_bytecode_parse_treet parse_tree
#define ACC_PRIVATE
#define VTYPE_INFO_DOUBLE
#define CONSTANT_Integer
const componentst & components() const
Definition: std_types.h:245
#define CONSTANT_Fieldref
Represents the argument of an instruction that uses a CONSTANT_Fieldref This is used for example as a...
typet java_long_type()
Definition: java_types.cpp:42
typet & type()
Definition: expr.h:56
#define CONSTANT_Float
Corresponds to the CONSTANT_Class_info Structure Described in Java 8 specification 4...
std::vector< annotationt > annotationst
class_infot(const pool_entryt &entry)
#define VTYPE_INFO_ITEM_NULL
#define CONSTANT_Class
static mstreamt & eom(mstreamt &m)
Definition: message.h:272
Structure type.
Definition: std_types.h:297
uint32_t u4
Definition: bytecode_info.h:57
#define POSTCONDITION(CONDITION)
Definition: invariant.h:254
#define T_FLOAT
std::string get_name(pool_entry_lookupt pool_entry) const
Definition: parser.h:23
#define ACC_ABSTRACT
#define INVARIANT(CONDITION, REASON)
Definition: invariant.h:204
#define T_BYTE
static ieee_float_spect double_precision()
Definition: ieee_float.h:80
class_infot get_class(pool_entry_lookupt pool_entry) const
void relement_value_pairs(annotationt::element_value_pairst &)
#define CONSTANT_MethodHandle
Parser utilities.
#define CONSTANT_Long
java_bytecode_parsert::pool_entryt pool_entryt
const irep_idt & id() const
Definition: irep.h:259
typet java_type_from_string(const std::string &src, const std::string &class_name_prefix)
Transforms a string representation of a Java type into an internal type representation thereof...
Definition: java_types.cpp:470
java_bytecode_parse_treet::instructiont instructiont
#define ACC_FINAL
optionalt< java_bytecode_parse_treet > java_bytecode_parse(std::istream &istream, message_handlert &message_handler)
void unpack(const mp_integer &i)
Definition: ieee_float.cpp:312
A reference into the symbol table.
Definition: std_types.h:110
std::vector< pool_entryt > constant_poolt
name_and_type_infot get_name_and_type(pool_entry_lookupt pool_entry) const
#define CONSTANT_Double
#define VTYPE_INFO_UNINIT
#define ACC_BRIDGE
#define CONSTANT_NameAndType
Fixed-width bit-vector with two&#39;s complement interpretation.
Definition: std_types.h:1271
void parse_local_variable_type_table(methodt &method)
Parses the local variable type table of a method.
#define CONSTANT_Utf8
void add_method_handle(size_t bootstrap_index, lambda_method_handlet handle)
nonstd::optional< T > optionalt
Definition: optional.h:35
java_bytecode_parse_treet::classt classt
API to expression classes.
const irep_idt & get(const irep_namet &name) const
Definition: irep.cpp:213
mstreamt & error() const
Definition: message.h:302
void set_line(const irep_idt &line)
static ieee_float_spect single_precision()
Definition: ieee_float.h:74
name_and_type_infot(java_bytecode_parsert::pool_entryt entry)
base_ref_infot get_reference(pool_entry_lookupt pool_entry) const
#define PRECONDITION(CONDITION)
Definition: invariant.h:242
#define VTYPE_INFO_UNINIT_THIS
#define T_DOUBLE
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
#define ACC_SYNCHRONIZED
#define ACC_SYNTHETIC
uint16_t u2
Definition: bytecode_info.h:56
#define VTYPE_INFO_TOP
base_ref_infot(pool_entryt entry)
virtual void set_message_handler(message_handlert &_message_handler)
Definition: message.h:148
void rbytecode(methodt::instructionst &)
int32_t s4
Definition: bytecode_info.h:61
#define T_LONG
void skip_bytes(std::size_t bytes)
#define CONSTANT_String
const struct_typet & to_struct_type(const typet &type)
Cast a generic typet to a struct_typet.
Definition: std_types.h:318
dstringt has one field, an unsigned integer no which is an index into a static table of strings...
Definition: dstring.h:33
void rclass_attribute(classt &parsed_class)
void rmethod_attribute(methodt &method)
#define T_INT
void get_class_refs_rec(const typet &)
#define VTYPE_INFO_FLOAT
#define ACC_ENUM
typet java_type_from_string_with_exception(const std::string &descriptor, const optionalt< std::string > &signature, const std::string &class_name)
Definition: java_types.h:649
std::vector< element_value_pairt > element_value_pairst
void relement_value_pair(annotationt::element_value_pairt &)
Corresponds to the element_value structure Described in Java 8 specification 4.7.16.1 https://docs.oracle.com/javase/specs/jvms/se8/html/jvms-4.html#jvms-4.7.16.1.
std::string get_name(pool_entry_lookupt pool_entry) const
#define T_BOOLEAN
java_bytecode_parse_treet::classt::fieldst fieldst
method_handle_kindt reference_kind
void rfields(classt &parsed_class)
#define ACC_STATIC
std::vector< bytecodet > bytecodes
const java_method_typet & to_java_method_type(const typet &type)
Definition: java_types.h:289
java_bytecode_parse_treet::annotationt annotationt
mstreamt & result() const
Definition: message.h:312
const char * mnemonic
Definition: bytecode_info.h:46
#define ACC_INTERFACE
uint8_t u1
Definition: bytecode_info.h:55
std::function< java_bytecode_parsert::pool_entryt &(u2)> pool_entry_lookupt
structured_pool_entryt(java_bytecode_parsert::pool_entryt entry)
Base class for all expressions.
Definition: expr.h:42
#define VTYPE_INFO_INTEGER
java_bytecode_parse_treet::methodt::instructionst instructionst
const parameterst & parameters() const
Definition: std_types.h:905
void read_bootstrapmethods_entry(classt &)
Read all entries of the BootstrapMethods attribute of a class file.
#define ACC_PUBLIC
java_bytecode_parse_treet::fieldt fieldt
#define T_SHORT
void read_verification_type_info(methodt::verification_type_infot &)
u2 get_name_and_type_index() const
std::string to_string(const string_constraintt &expr)
Used for debug printing.
#define T_CHAR
method_handle_infot(java_bytecode_parsert::pool_entryt entry)
java_bytecode_parse_treet::methodt methodt
const typet type_entry(u2 index)
#define ACC_ANNOTATION
void rcode_attribute(methodt &method)
Corresponds to the CONSTANT_NameAndType_info Structure Described in Java 8 specification 4...
Expression to hold a symbol (variable)
Definition: std_expr.h:90
const char * c_str() const
Definition: dstring.h:84
int8_t s1
Definition: bytecode_info.h:59
goto_programt coverage_criteriont message_handlert & message_handler
Definition: cover.cpp:66
int16_t s2
Definition: bytecode_info.h:60
mstreamt & debug() const
Definition: message.h:332
void rRuntimeAnnotation(annotationt &)
const typet & subtype() const
Definition: type.h:33
#define DATA_INVARIANT(CONDITION, REASON)
Definition: invariant.h:278
java_bytecode_parse_treet::classt::methodst methodst
symbol_typet java_classname(const std::string &id)
Definition: java_types.cpp:710
std::string get_descriptor(pool_entry_lookupt pool_entry) const
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
bool empty() const
Definition: dstring.h:73
java_bytecode_parse_treet::classt::method_handle_typet method_handle_typet
const irept & find(const irep_namet &name) const
Definition: irep.cpp:285
std::vector< irep_idt > rexceptions_attribute()
Corresponds to the element_value structure Described in Java 8 specification 4.7.5 https://docs...
#define CONSTANT_InvokeDynamic
const typet & return_type() const
Definition: std_types.h:895
static std::string read_utf8_constant(const pool_entryt &entry)
const irep_idt & get_identifier() const
Definition: std_types.h:123
void set(const irep_namet &name, const irep_idt &value)
Definition: irep.h:286
optionalt< lambda_method_handlet > parse_method_handle(const class method_handle_infot &entry)
Read method handle pointed to from constant pool entry at index, return type of method handle and nam...
static format_containert< T > format(const T &o)
Definition: format.h:35
Definition: kdev_t.h:19