cprover
linker_script_merge.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Linker Script Merging
4 
5 Author: Kareem Khazem <karkhaz@karkhaz.com>, 2017
6 
7 \*******************************************************************/
8 
9 #include "linker_script_merge.h"
10 
11 #include <algorithm>
12 #include <fstream>
13 #include <iterator>
14 
15 #include <util/arith_tools.h>
16 #include <util/c_types.h>
17 #include <util/expr_initializer.h>
18 #include <util/magic.h>
19 #include <util/pointer_expr.h>
20 #include <util/run.h>
21 #include <util/tempfile.h>
22 
23 #include <json/json_parser.h>
24 
26 
28 
29 #include "compile.h"
30 
32 {
33  if(!cmdline.isset('T'))
34  return 0;
35 
36  auto original_goto_model =
38 
39  if(!original_goto_model.has_value())
40  {
41  log.error() << "Unable to read goto binary for linker script merging"
42  << messaget::eom;
43  return 1;
44  }
45 
46  temporary_filet linker_def_outfile("goto-cc-linker-info", ".json");
47  std::list<irep_idt> linker_defined_symbols;
48  int fail = get_linker_script_data(
49  linker_defined_symbols,
50  original_goto_model->symbol_table,
51  elf_binary,
52  linker_def_outfile());
53  // ignore linker script parsing failures until the code is tested more widely
54  if(fail!=0)
55  return 0;
56 
57  jsont data;
58  fail = parse_json(linker_def_outfile(), log.get_message_handler(), data);
59  if(fail!=0)
60  {
61  log.error() << "Problem parsing linker script JSON data" << messaget::eom;
62  return fail;
63  }
64 
66  if(fail!=0)
67  {
68  log.error() << "Malformed linker-script JSON document" << messaget::eom;
69  data.output(log.error());
70  return fail;
71  }
72 
73  fail=1;
74  linker_valuest linker_values;
75  const auto &pair =
76  original_goto_model->goto_functions.function_map.find(INITIALIZE_FUNCTION);
77  if(pair == original_goto_model->goto_functions.function_map.end())
78  {
79  log.error() << "No " << INITIALIZE_FUNCTION << " found in goto_functions"
80  << messaget::eom;
81  return fail;
82  }
83  fail = ls_data2instructions(
84  data,
85  cmdline.get_value('T'),
86  pair->second.body,
87  original_goto_model->symbol_table,
88  linker_values);
89  if(fail!=0)
90  {
91  log.error() << "Could not add linkerscript defs to " INITIALIZE_FUNCTION
92  << messaget::eom;
93  return fail;
94  }
95 
96  fail=goto_and_object_mismatch(linker_defined_symbols, linker_values);
97  if(fail!=0)
98  return fail;
99 
100  // The keys of linker_values are exactly the elements of
101  // linker_defined_symbols, so iterate over linker_values from now on.
102 
103  fail = pointerize_linker_defined_symbols(*original_goto_model, linker_values);
104 
105  if(fail!=0)
106  {
107  log.error() << "Could not pointerize all linker-defined expressions"
108  << messaget::eom;
109  return fail;
110  }
111 
113  goto_binary,
114  *original_goto_model,
115  cmdline.isset("validate-goto-model"),
117 
118  if(fail!=0)
119  {
120  log.error() << "Could not write linkerscript-augmented binary"
121  << messaget::eom;
122  }
123 
124  return fail;
125 }
126 
128  const std::string &elf_binary,
129  const std::string &goto_binary,
130  const cmdlinet &cmdline,
131  message_handlert &message_handler)
132  : elf_binary(elf_binary),
133  goto_binary(goto_binary),
134  cmdline(cmdline),
135  log(message_handler),
136  replacement_predicates(
138  "address of array's first member",
139  [](const exprt &expr) -> const symbol_exprt & {
140  return to_symbol_expr(
141  to_index_expr(to_address_of_expr(expr).object()).index());
142  },
143  [](const exprt &expr) {
144  return expr.id() == ID_address_of &&
145  expr.type().id() == ID_pointer &&
146 
147  to_address_of_expr(expr).object().id() == ID_index &&
148  to_address_of_expr(expr).object().type().id() ==
149  ID_unsignedbv &&
150 
151  to_index_expr(to_address_of_expr(expr).object())
152  .array()
153  .id() == ID_symbol &&
154  to_index_expr(to_address_of_expr(expr).object())
155  .array()
156  .type()
157  .id() == ID_array &&
158 
159  to_index_expr(to_address_of_expr(expr).object())
160  .index()
161  .id() == ID_constant &&
162  to_index_expr(to_address_of_expr(expr).object())
163  .index()
164  .type()
165  .id() == ID_signedbv;
166  }),
168  "address of array",
169  [](const exprt &expr) -> const symbol_exprt & {
170  return to_symbol_expr(to_address_of_expr(expr).object());
171  },
172  [](const exprt &expr) {
173  return expr.id() == ID_address_of &&
174  expr.type().id() == ID_pointer &&
175 
176  to_address_of_expr(expr).object().id() == ID_symbol &&
177  to_address_of_expr(expr).object().type().id() == ID_array;
178  }),
180  "address of struct",
181  [](const exprt &expr) -> const symbol_exprt & {
182  return to_symbol_expr(to_address_of_expr(expr).object());
183  },
184  [](const exprt &expr) {
185  return expr.id() == ID_address_of &&
186  expr.type().id() == ID_pointer &&
187 
188  to_address_of_expr(expr).object().id() == ID_symbol &&
189  (to_address_of_expr(expr).object().type().id() == ID_struct ||
190  to_address_of_expr(expr).object().type().id() ==
191  ID_struct_tag);
192  }),
194  "array variable",
195  [](const exprt &expr) -> const symbol_exprt & {
196  return to_symbol_expr(expr);
197  },
198  [](const exprt &expr) {
199  return expr.id() == ID_symbol && expr.type().id() == ID_array;
200  }),
202  "pointer (does not need pointerizing)",
203  [](const exprt &expr) -> const symbol_exprt & {
204  return to_symbol_expr(expr);
205  },
206  [](const exprt &expr) {
207  return expr.id() == ID_symbol && expr.type().id() == ID_pointer;
208  })})
209 {}
210 
212  goto_modelt &goto_model,
213  const linker_valuest &linker_values)
214 {
215  const namespacet ns(goto_model.symbol_table);
216 
217  int ret=0;
218  // First, pointerize the actual linker-defined symbols
219  for(const auto &pair : linker_values)
220  {
221  const auto maybe_symbol = goto_model.symbol_table.get_writeable(pair.first);
222  if(!maybe_symbol)
223  continue;
224  symbolt &entry=*maybe_symbol;
225  entry.type=pointer_type(char_type());
226  entry.is_extern=false;
227  entry.value=pair.second.second;
228  }
229 
230  // Next, find all occurrences of linker-defined symbols that are _values_
231  // of some symbol in the symbol table, and pointerize them too
232  for(const auto &pair : goto_model.symbol_table.symbols)
233  {
234  std::list<symbol_exprt> to_pointerize;
235  symbols_to_pointerize(linker_values, pair.second.value, to_pointerize);
236 
237  if(to_pointerize.empty())
238  continue;
239  log.debug() << "Pointerizing the symbol-table value of symbol "
240  << pair.first << messaget::eom;
241  int fail = pointerize_subexprs_of(
242  goto_model.symbol_table.get_writeable_ref(pair.first).value,
243  to_pointerize,
244  linker_values);
245  if(to_pointerize.empty() && fail==0)
246  continue;
247  ret=1;
248  for(const auto &sym : to_pointerize)
249  {
250  log.error() << " Could not pointerize '" << sym.get_identifier()
251  << "' in symbol table entry " << pair.first << ". Pretty:\n"
252  << sym.pretty() << "\n";
253  }
254  log.error() << messaget::eom;
255  }
256 
257  // Finally, pointerize all occurrences of linker-defined symbols in the
258  // goto program
259  for(auto &gf : goto_model.goto_functions.function_map)
260  {
261  goto_programt &program=gf.second.body;
262  for(auto &instruction : program.instructions)
263  {
264  for(exprt *insts :
265  std::list<exprt *>({&(instruction.code), &(instruction.guard)}))
266  {
267  std::list<symbol_exprt> to_pointerize;
268  symbols_to_pointerize(linker_values, *insts, to_pointerize);
269  if(to_pointerize.empty())
270  continue;
271  log.debug() << "Pointerizing a program expression..." << messaget::eom;
272  int fail = pointerize_subexprs_of(*insts, to_pointerize, linker_values);
273  if(to_pointerize.empty() && fail==0)
274  continue;
275  ret=1;
276  for(const auto &sym : to_pointerize)
277  {
278  log.error() << " Could not pointerize '" << sym.get_identifier()
279  << "' in function " << gf.first << ". Pretty:\n"
280  << sym.pretty() << "\n";
281  log.error().source_location = instruction.source_location;
282  }
283  log.error() << messaget::eom;
284  }
285  }
286  }
287  return ret;
288 }
289 
291  exprt &old_expr,
292  const linker_valuest &linker_values,
293  const symbol_exprt &old_symbol,
294  const irep_idt &ident,
295  const std::string &shape)
296 {
297  auto it=linker_values.find(ident);
298  if(it==linker_values.end())
299  {
300  log.error() << "Could not find a new expression for linker script-defined "
301  << "symbol '" << ident << "'" << messaget::eom;
302  return 1;
303  }
304  symbol_exprt new_expr=it->second.first;
305  new_expr.add_source_location()=old_symbol.source_location();
306  log.debug() << "Pointerizing linker-defined symbol '" << ident
307  << "' of shape <" << shape << ">." << messaget::eom;
308  old_expr=new_expr;
309  return 0;
310 }
311 
313  exprt &expr,
314  std::list<symbol_exprt> &to_pointerize,
315  const linker_valuest &linker_values)
316 {
317  int fail=0, tmp=0;
318  for(auto const &pair : linker_values)
319  for(auto const &pattern : replacement_predicates)
320  {
321  if(!pattern.match(expr))
322  continue;
323  // take a copy, expr will be changed below
324  const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
325  if(pair.first!=inner_symbol.get_identifier())
326  continue;
327  tmp=replace_expr(expr, linker_values, inner_symbol, pair.first,
328  pattern.description());
329  fail=tmp?tmp:fail;
330  auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
331  inner_symbol);
332  if(result==to_pointerize.end())
333  {
334  fail=1;
335  log.error() << "Too many removals of '" << inner_symbol.get_identifier()
336  << "'" << messaget::eom;
337  }
338  else
339  to_pointerize.erase(result);
340  // If we get here, we've just pointerized this expression. That expression
341  // will be a symbol possibly wrapped in some unary junk, but won't contain
342  // other symbols as subexpressions that might need to be pointerized. So
343  // it's safe to bail out here.
344  return fail;
345  }
346 
347  for(auto &op : expr.operands())
348  {
349  tmp = pointerize_subexprs_of(op, to_pointerize, linker_values);
350  fail=tmp?tmp:fail;
351  }
352  return fail;
353 }
354 
356  const linker_valuest &linker_values,
357  const exprt &expr,
358  std::list<symbol_exprt> &to_pointerize) const
359 {
360  for(const auto &op : expr.operands())
361  {
362  if(op.id()!=ID_symbol)
363  continue;
364  const symbol_exprt &sym_exp=to_symbol_expr(op);
365  const auto &pair=linker_values.find(sym_exp.get_identifier());
366  if(pair!=linker_values.end())
367  to_pointerize.push_back(sym_exp);
368  }
369  for(const auto &op : expr.operands())
370  symbols_to_pointerize(linker_values, op, to_pointerize);
371 }
372 
373 #if 0
374 The current implementation of this function is less precise than the
375  commented-out version below. To understand the difference between these
376  implementations, consider the following example:
377 
378 Suppose we have a section in the linker script, 100 bytes long, where the
379 address of the symbol sec_start is the start of the section (value 4096) and the
380 address of sec_end is the end of that section (value 4196).
381 
382 The current implementation synthesizes the goto-version of the following C:
383 
384  char __sec_array[100];
385  char *sec_start=(&__sec_array[0]);
386  char *sec_end=((&__sec_array[0])+100);
387  // Yes, it is 100 not 99. We're pointing to the end of the memory occupied
388  // by __sec_array, not the last element of __sec_array.
389 
390 This is imprecise for the following reason: the actual address of the array and
391 the pointers shall be some random CBMC-internal address, instead of being 4096
392 and 4196. The linker script, on the other hand, would have specified the exact
393 position of the section, and we even know what the actual values of sec_start
394 and sec_end are from the object file (these values are in the `addresses` list
395 of the `data` argument to this function). If the correctness of the code depends
396 on these actual values, then CBMCs model of the code is too imprecise to verify
397 this.
398 
399 The commented-out version of this function below synthesizes the following:
400 
401  char *sec_start=4096;
402  char *sec_end=4196;
403  __CPROVER_allocated_memory(4096, 100);
404 
405 This code has both the actual addresses of the start and end of the section and
406 tells CBMC that the intermediate region is valid. However, the allocated_memory
407 macro does not currently allocate an actual object at the address 4096, so
408 symbolic execution can fail. In particular, the 'allocated memory' is part of
409 __CPROVER_memory, which does not have a bounded size; this means that (for
410 example) calls to memcpy or memset fail, because the first and third arguments
411 do not have know n size. The commented-out implementation should be reinstated
412 once this limitation of __CPROVER_allocated_memory has been fixed.
413 
414 In either case, no other changes to the rest of the code (outside this function)
415 should be necessary. The rest of this file converts expressions containing the
416 linker-defined symbol into pointer types if they were not already, and this is
417 the right behaviour for both implementations.
418 #endif
420  jsont &data,
421  const std::string &linker_script,
422  goto_programt &gp,
423  symbol_tablet &symbol_table,
424  linker_valuest &linker_values)
425 #if 1
426 {
427  goto_programt::instructionst initialize_instructions=gp.instructions;
428  std::map<irep_idt, std::size_t> truncated_symbols;
429  for(auto &d : to_json_array(data["regions"]))
430  {
431  bool has_end=d["has-end-symbol"].is_true();
432 
433  std::ostringstream array_name;
434  array_name << CPROVER_PREFIX << "linkerscript-array_"
435  << d["start-symbol"].value;
436  if(has_end)
437  array_name << "-" << d["end-symbol"].value;
438 
439 
440  // Array symbol_exprt
441  mp_integer array_size = string2integer(d["size"].value);
442  if(array_size > MAX_FLATTENED_ARRAY_SIZE)
443  {
444  log.warning() << "Object section '" << d["section"].value << "' of size "
445  << array_size << " is too large to model. Truncating to "
446  << MAX_FLATTENED_ARRAY_SIZE << " bytes" << messaget::eom;
447  array_size=MAX_FLATTENED_ARRAY_SIZE;
448  if(!has_end)
449  truncated_symbols[d["size-symbol"].value]=MAX_FLATTENED_ARRAY_SIZE;
450  }
451 
452  constant_exprt array_size_expr=from_integer(array_size, size_type());
453  array_typet array_type(char_type(), array_size_expr);
454  symbol_exprt array_expr(array_name.str(), array_type);
455  source_locationt array_loc;
456 
457  array_loc.set_file(linker_script);
458  std::ostringstream array_comment;
459  array_comment << "Object section '" << d["section"].value << "' of size "
460  << array_size << " bytes";
461  array_loc.set_comment(array_comment.str());
462  array_expr.add_source_location()=array_loc;
463 
464  // Array start address
465  index_exprt zero_idx(array_expr, from_integer(0, size_type()));
466  address_of_exprt array_start(zero_idx);
467 
468  // Linker-defined symbol_exprt pointing to start address
469  symbol_exprt start_sym(d["start-symbol"].value, pointer_type(char_type()));
470  linker_values.emplace(
471  d["start-symbol"].value, std::make_pair(start_sym, array_start));
472 
473  // Since the value of the pointer will be a random CBMC address, write a
474  // note about the real address in the object file
475  auto it = std::find_if(
476  to_json_array(data["addresses"]).begin(),
477  to_json_array(data["addresses"]).end(),
478  [&d](const jsont &add) {
479  return add["sym"].value == d["start-symbol"].value;
480  });
481  if(it == to_json_array(data["addresses"]).end())
482  {
483  log.error() << "Start: Could not find address corresponding to symbol '"
484  << d["start-symbol"].value << "' (start of section)"
485  << messaget::eom;
486  return 1;
487  }
488  source_locationt start_loc;
489  start_loc.set_file(linker_script);
490  std::ostringstream start_comment;
491  start_comment << "Pointer to beginning of object section '"
492  << d["section"].value << "'. Original address in object file"
493  << " is " << (*it)["val"].value;
494  start_loc.set_comment(start_comment.str());
495  start_sym.add_source_location()=start_loc;
496 
497  // Instruction for start-address pointer in __CPROVER_initialize
498  code_assignt start_assign(start_sym, array_start);
499  start_assign.add_source_location()=start_loc;
500  goto_programt::instructiont start_assign_i =
501  goto_programt::make_assignment(start_assign, start_loc);
502  initialize_instructions.push_front(start_assign_i);
503 
504  if(has_end) // Same for pointer to end of array
505  {
506  plus_exprt array_end(array_start, array_size_expr);
507 
508  symbol_exprt end_sym(d["end-symbol"].value, pointer_type(char_type()));
509  linker_values.emplace(
510  d["end-symbol"].value, std::make_pair(end_sym, array_end));
511 
512  auto entry = std::find_if(
513  to_json_array(data["addresses"]).begin(),
514  to_json_array(data["addresses"]).end(),
515  [&d](const jsont &add) {
516  return add["sym"].value == d["end-symbol"].value;
517  });
518  if(entry == to_json_array(data["addresses"]).end())
519  {
520  log.debug() << "Could not find address corresponding to symbol '"
521  << d["end-symbol"].value << "' (end of section)"
522  << messaget::eom;
523  return 1;
524  }
525  source_locationt end_loc;
526  end_loc.set_file(linker_script);
527  std::ostringstream end_comment;
528  end_comment << "Pointer to end of object section '" << d["section"].value
529  << "'. Original address in object file"
530  << " is " << (*entry)["val"].value;
531  end_loc.set_comment(end_comment.str());
532  end_sym.add_source_location()=end_loc;
533 
534  code_assignt end_assign(end_sym, array_end);
535  end_assign.add_source_location()=end_loc;
536  goto_programt::instructiont end_assign_i =
537  goto_programt::make_assignment(end_assign, end_loc);
538  initialize_instructions.push_front(end_assign_i);
539  }
540 
541  // Add the array to the symbol table. We don't add the pointer(s) to the
542  // symbol table because they were already there, but declared as extern and
543  // probably with a different type. We change the externness and type in
544  // pointerize_linker_defined_symbols.
545  symbolt array_sym;
546  array_sym.name=array_name.str();
547  array_sym.pretty_name=array_name.str();
548  array_sym.is_lvalue=array_sym.is_static_lifetime=true;
549  array_sym.type=array_type;
550  array_sym.location=array_loc;
551  symbol_table.add(array_sym);
552 
553  // Push the array initialization to the front now, so that it happens before
554  // the initialization of the symbols that point to it.
555  namespacet ns(symbol_table);
556  const auto zi = zero_initializer(array_type, array_loc, ns);
557  CHECK_RETURN(zi.has_value());
558  code_assignt array_assign(array_expr, *zi);
559  array_assign.add_source_location()=array_loc;
560  goto_programt::instructiont array_assign_i =
561  goto_programt::make_assignment(array_assign, array_loc);
562  initialize_instructions.push_front(array_assign_i);
563  }
564 
565  // We've added every linker-defined symbol that marks the start or end of a
566  // region. But there might be other linker-defined symbols with some random
567  // address. These will have been declared extern too, so we need to give them
568  // a value also. Here, we give them the actual value that they have in the
569  // object file, since we're not assigning any object to them.
570  for(const auto &d : to_json_array(data["addresses"]))
571  {
572  auto it=linker_values.find(irep_idt(d["sym"].value));
573  if(it!=linker_values.end())
574  // sym marks the start or end of a region; already dealt with.
575  continue;
576 
577  // Perhaps this is a size symbol for a section whose size we truncated
578  // earlier.
579  irep_idt symbol_value;
580  const auto &pair=truncated_symbols.find(d["sym"].value);
581  if(pair==truncated_symbols.end())
582  symbol_value=d["val"].value;
583  else
584  {
585  log.debug()
586  << "Truncating the value of symbol " << d["sym"].value << " from "
587  << d["val"].value << " to " << MAX_FLATTENED_ARRAY_SIZE
588  << " because it corresponds to the size of a too-large section."
589  << messaget::eom;
591  }
592 
593  source_locationt loc;
594  loc.set_file(linker_script);
595  loc.set_comment("linker script-defined symbol: char *"+
596  d["sym"].value+"="+"(char *)"+id2string(symbol_value)+"u;");
597 
598  symbol_exprt lhs(d["sym"].value, pointer_type(char_type()));
599 
600  constant_exprt rhs(
602  string2integer(id2string(symbol_value)),
603  unsigned_int_type().get_width()),
605 
606  typecast_exprt rhs_tc(rhs, pointer_type(char_type()));
607 
608  linker_values.emplace(
609  irep_idt(d["sym"].value), std::make_pair(lhs, rhs_tc));
610 
611  code_assignt assign(lhs, rhs_tc);
612  assign.add_source_location()=loc;
613  goto_programt::instructiont assign_i =
614  goto_programt::make_assignment(assign, loc);
615  initialize_instructions.push_front(assign_i);
616  }
617  return 0;
618 }
619 #else
620 {
621  goto_programt::instructionst initialize_instructions=gp.instructions;
622  for(const auto &d : to_json_array(data["regions"]))
623  {
624  unsigned start=safe_string2unsigned(d["start"].value);
625  unsigned size=safe_string2unsigned(d["size"].value);
626  constant_exprt first=from_integer(start, size_type());
627  constant_exprt second=from_integer(size, size_type());
628  const code_typet void_t({}, empty_typet());
630  symbol_exprt(CPROVER_PREFIX "allocated_memory", void_t), {first, second});
631 
632  source_locationt loc;
633  loc.set_file(linker_script);
634  loc.set_comment("linker script-defined region:\n"+d["commt"].value+":\n"+
635  d["annot"].value);
636  f.add_source_location()=loc;
637 
639  i.make_function_call(f);
640  initialize_instructions.push_front(i);
641  }
642 
643  if(!symbol_table.has_symbol(CPROVER_PREFIX "allocated_memory"))
644  {
645  symbolt sym;
646  sym.name=CPROVER_PREFIX "allocated_memory";
647  sym.pretty_name=CPROVER_PREFIX "allocated_memory";
648  sym.is_lvalue=sym.is_static_lifetime=true;
649  const code_typet void_t({}, empty_typet());
650  sym.type=void_t;
651  symbol_table.add(sym);
652  }
653 
654  for(const auto &d : to_json_array(data["addresses"]))
655  {
656  source_locationt loc;
657  loc.set_file(linker_script);
658  loc.set_comment("linker script-defined symbol: char *"+
659  d["sym"].value+"="+"(char *)"+d["val"].value+"u;");
660 
661  symbol_exprt lhs(d["sym"].value, pointer_type(char_type()));
662 
663  constant_exprt rhs;
665  string2integer(d["val"].value), unsigned_int_type().get_width()));
666  rhs.type()=unsigned_int_type();
667 
668  exprt rhs_tc =
670 
671  linker_values.emplace(
672  irep_idt(d["sym"].value), std::make_pair(lhs, rhs_tc));
673 
674  code_assignt assign(lhs, rhs_tc);
675  assign.add_source_location()=loc;
677  assign_i.make_assignment(assign);
678  initialize_instructions.push_front(assign_i);
679  }
680  return 0;
681 }
682 #endif
683 
685  std::list<irep_idt> &linker_defined_symbols,
686  const symbol_tablet &symbol_table,
687  const std::string &out_file,
688  const std::string &def_out_file)
689 {
690  for(auto const &pair : symbol_table.symbols)
691  {
692  if(
693  pair.second.is_extern && pair.second.value.is_nil() &&
694  pair.second.name != CPROVER_PREFIX "memory")
695  {
696  linker_defined_symbols.push_back(pair.second.name);
697  }
698  }
699 
700  std::ostringstream linker_def_str;
701  std::copy(
702  linker_defined_symbols.begin(),
703  linker_defined_symbols.end(),
704  std::ostream_iterator<irep_idt>(linker_def_str, "\n"));
705  log.debug() << "Linker-defined symbols: [" << linker_def_str.str() << "]\n"
706  << messaget::eom;
707 
708  temporary_filet linker_def_infile("goto-cc-linker-defs", "");
709  std::ofstream linker_def_file(linker_def_infile());
710  linker_def_file << linker_def_str.str();
711  linker_def_file.close();
712 
713  std::vector<std::string> argv=
714  {
715  "ls_parse.py",
716  "--script", cmdline.get_value('T'),
717  "--object", out_file,
718  "--sym-file", linker_def_infile(),
719  "--out-file", def_out_file
720  };
721 
723  argv.push_back("--very-verbose");
725  argv.push_back("--verbose");
726 
727  log.debug() << "RUN:";
728  for(std::size_t i=0; i<argv.size(); i++)
729  log.debug() << " " << argv[i];
730  log.debug() << messaget::eom;
731 
732  int rc = run(argv[0], argv, linker_def_infile(), def_out_file, "");
733  if(rc!=0)
734  log.warning() << "Problem parsing linker script" << messaget::eom;
735 
736  return rc;
737 }
738 
740  const std::list<irep_idt> &linker_defined_symbols,
741  const linker_valuest &linker_values)
742 {
743  int fail=0;
744  for(const auto &sym : linker_defined_symbols)
745  if(linker_values.find(sym)==linker_values.end())
746  {
747  fail=1;
748  log.error() << "Variable '" << sym
749  << "' was declared extern but never given "
750  << "a value, even in a linker script" << messaget::eom;
751  }
752 
753  for(const auto &pair : linker_values)
754  {
755  auto it=std::find(linker_defined_symbols.begin(),
756  linker_defined_symbols.end(), pair.first);
757  if(it==linker_defined_symbols.end())
758  {
759  fail=1;
760  log.error()
761  << "Linker script-defined symbol '" << pair.first << "' was "
762  << "either defined in the C source code, not declared extern in "
763  << "the C source code, or does not appear in the C source code"
764  << messaget::eom;
765  }
766  }
767  return fail;
768 }
769 
771 {
772  if(!data.is_object())
773  return true;
774 
775  const json_objectt &data_object = to_json_object(data);
776  return (
777  !(data_object.find("regions") != data_object.end() &&
778  data_object.find("addresses") != data_object.end() &&
779  data["regions"].is_array() && data["addresses"].is_array() &&
780  std::all_of(
781  to_json_array(data["addresses"]).begin(),
782  to_json_array(data["addresses"]).end(),
783  [](const jsont &j) {
784  if(!j.is_object())
785  return false;
786 
787  const json_objectt &address = to_json_object(j);
788  return address.find("val") != address.end() &&
789  address.find("sym") != address.end() &&
790  address["val"].is_number() && address["sym"].is_string();
791  }) &&
792  std::all_of(
793  to_json_array(data["regions"]).begin(),
794  to_json_array(data["regions"]).end(),
795  [](const jsont &j) {
796  if(!j.is_object())
797  return false;
798 
799  const json_objectt &region = to_json_object(j);
800  return region.find("start") != region.end() &&
801  region.find("size") != region.end() &&
802  region.find("annot") != region.end() &&
803  region.find("commt") != region.end() &&
804  region.find("start-symbol") != region.end() &&
805  region.find("has-end-symbol") != region.end() &&
806  region.find("section") != region.end() &&
807  region["start"].is_number() && region["size"].is_number() &&
808  region["annot"].is_string() &&
809  region["start-symbol"].is_string() &&
810  region["section"].is_string() && region["commt"].is_string() &&
811  ((region["has-end-symbol"].is_true() &&
812  region.find("end-symbol") != region.end() &&
813  region["end-symbol"].is_string()) ||
814  (region["has-end-symbol"].is_false() &&
815  region.find("size-symbol") != region.end() &&
816  region.find("end-symbol") == region.end() &&
817  region["size-symbol"].is_string()));
818  })));
819 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
irep_idt integer2bvrep(const mp_integer &src, std::size_t width)
convert an integer to bit-vector representation with given width This uses two's complement for negat...
std::string array_name(const namespacet &ns, const exprt &expr)
Definition: array_name.cpp:20
unsignedbv_typet unsigned_int_type()
Definition: c_types.cpp:44
unsignedbv_typet size_type()
Definition: c_types.cpp:58
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:243
bitvector_typet char_type()
Definition: c_types.cpp:114
Operator to return the address of an object.
Definition: pointer_expr.h:200
exprt & object()
Definition: pointer_expr.h:209
Arrays with given size.
Definition: std_types.h:968
std::string get_value(char option) const
Definition: cmdline.cpp:47
virtual bool isset(char option) const
Definition: cmdline.cpp:29
A codet representing an assignment in the program.
Definition: std_code.h:295
codet representation of a function call statement.
Definition: std_code.h:1215
Base type of functions.
Definition: std_types.h:744
static bool write_bin_object_file(const std::string &file_name, const goto_modelt &src_goto_model, bool validate_goto_model, message_handlert &message_handler)
Writes the goto functions of src_goto_model to a binary format object file.
Definition: compile.cpp:575
A constant literal expression.
Definition: std_expr.h:2668
void set_value(const irep_idt &value)
Definition: std_expr.h:2681
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
The empty type.
Definition: std_types.h:46
Base class for all expressions.
Definition: expr.h:54
source_locationt & add_source_location()
Definition: expr.h:239
const source_locationt & source_location() const
Definition: expr.h:234
typet & type()
Return the type of the expression.
Definition: expr.h:82
operandst & operands()
Definition: expr.h:96
function_mapt function_map
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:30
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:33
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:180
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:74
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:556
static instructiont make_assignment(const code_assignt &_code, const source_locationt &l=source_locationt::nil())
Create an assignment instruction.
Definition: goto_program.h:995
std::list< instructiont > instructionst
Definition: goto_program.h:548
Array index operator.
Definition: std_expr.h:1243
exprt & array()
Definition: std_expr.h:1259
exprt & index()
Definition: std_expr.h:1269
const irep_idt & id() const
Definition: irep.h:407
iterator find(const std::string &key)
Definition: json.h:356
iterator end()
Definition: json.h:386
Definition: json.h:27
std::string value
Definition: json.h:132
bool is_object() const
Definition: json.h:56
linker_script_merget(const std::string &elf_binary, const std::string &goto_binary, const cmdlinet &, message_handlert &)
int pointerize_linker_defined_symbols(goto_modelt &, const linker_valuest &)
convert the type of linker script-defined symbols to char*
void symbols_to_pointerize(const linker_valuest &linker_values, const exprt &expr, std::list< symbol_exprt > &to_pointerize) const
fill to_pointerize with names of linker symbols appearing in expr
int add_linker_script_definitions()
Add values of linkerscript-defined symbols to the goto-binary.
const std::string & elf_binary
int get_linker_script_data(std::list< irep_idt > &linker_defined_symbols, const symbol_tablet &symbol_table, const std::string &out_file, const std::string &def_out_file)
Write linker script definitions to linker_data.
std::map< irep_idt, std::pair< symbol_exprt, exprt > > linker_valuest
int pointerize_subexprs_of(exprt &expr, std::list< symbol_exprt > &to_pointerize, const linker_valuest &linker_values)
const cmdlinet & cmdline
std::list< replacement_predicatet > replacement_predicates
The "shapes" of expressions to be replaced by a pointer.
int ls_data2instructions(jsont &data, const std::string &linker_script, goto_programt &gp, symbol_tablet &symbol_table, linker_valuest &linker_values)
Write a list of definitions derived from data into gp's instructions member.
int replace_expr(exprt &old_expr, const linker_valuest &linker_values, const symbol_exprt &old_symbol, const irep_idt &ident, const std::string &shape)
do the actual replacement of an expr with a new pointer expr
int goto_and_object_mismatch(const std::list< irep_idt > &linker_defined_symbols, const linker_valuest &linker_values)
one-to-one correspondence between extern & linker symbols
int linker_data_is_malformed(const jsont &data) const
Validate output of the scripts/ls_parse.py tool.
const std::string & goto_binary
unsigned get_verbosity() const
Definition: message.h:54
source_locationt source_location
Definition: message.h:247
mstreamt & error() const
Definition: message.h:399
mstreamt & warning() const
Definition: message.h:404
mstreamt & debug() const
Definition: message.h:429
message_handlert & get_message_handler()
Definition: message.h:184
@ M_DEBUG
Definition: message.h:171
@ M_RESULT
Definition: message.h:170
static eomt eom
Definition: message.h:297
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:92
The plus expression Associativity is not specified.
Definition: std_expr.h:831
Patterns of expressions that should be replaced.
void set_comment(const irep_idt &comment)
void set_file(const irep_idt &file)
Expression to hold a symbol (variable)
Definition: std_expr.h:81
const irep_idt & get_identifier() const
Definition: std_expr.h:110
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
The symbol table.
Definition: symbol_table.h:20
virtual symbolt * get_writeable(const irep_idt &name) override
Find a symbol in the symbol table for read-write access.
Definition: symbol_table.h:96
Symbol table entry.
Definition: symbol.h:28
bool is_extern
Definition: symbol.h:66
bool is_static_lifetime
Definition: symbol.h:65
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:37
typet type
Type of symbol.
Definition: symbol.h:31
irep_idt name
The unique identifier.
Definition: symbol.h:40
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:52
bool is_lvalue
Definition: symbol.h:66
exprt value
Initial value of symbol.
Definition: symbol.h:34
Semantic type conversion.
Definition: std_expr.h:1781
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:1789
Compile and link source and object files.
void __CPROVER_allocated_memory(__CPROVER_size_t address, __CPROVER_size_t extent)
#define CPROVER_PREFIX
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Expression Initialization.
dstringt irep_idt
Definition: irep.h:37
const std::string & id2string(const irep_idt &d)
Definition: irep.h:49
json_arrayt & to_json_array(jsont &json)
Definition: json.h:426
json_objectt & to_json_object(jsont &json)
Definition: json.h:444
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Definition: json_parser.cpp:16
Merge linker script-defined symbols into a goto-program.
Magic numbers used throughout the codebase.
const std::size_t MAX_FLATTENED_ARRAY_SIZE
Definition: magic.h:11
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:57
BigInt mp_integer
Definition: mp_arith.h:19
API to expression classes for Pointers.
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
Definition: pointer_expr.h:237
static bool read_goto_binary(const std::string &filename, symbol_tablet &, goto_functionst &, message_handlert &)
Read a goto binary from a file, but do not update config.
Read Goto Programs.
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:49
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:496
#define INITIALIZE_FUNCTION
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:190
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition: std_expr.h:1297
unsigned safe_string2unsigned(const std::string &str, int base)
Definition: string2int.cpp:19
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
Definition: kdev_t.h:24
Definition: kdev_t.h:19