1// dwarf_reader.h -- parse dwarf2/3 debug information for gold  -*- C++ -*-
2
3// Copyright (C) 2007-2017 Free Software Foundation, Inc.
4// Written by Ian Lance Taylor <iant@google.com>.
5
6// This file is part of gold.
7
8// This program is free software; you can redistribute it and/or modify
9// it under the terms of the GNU General Public License as published by
10// the Free Software Foundation; either version 3 of the License, or
11// (at your option) any later version.
12
13// This program is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16// GNU General Public License for more details.
17
18// You should have received a copy of the GNU General Public License
19// along with this program; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
21// MA 02110-1301, USA.
22
23#ifndef GOLD_DWARF_READER_H
24#define GOLD_DWARF_READER_H
25
26#include <vector>
27#include <map>
28#include <limits.h>
29#include <sys/types.h>
30
31#include "elfcpp.h"
32#include "elfcpp_swap.h"
33#include "dwarf.h"
34#include "reloc.h"
35
36namespace gold
37{
38
39class Dwarf_info_reader;
40struct LineStateMachine;
41
42// This class is used to extract the section index and offset of
43// the target of a relocation for a given offset within the section.
44
45class Elf_reloc_mapper
46{
47 public:
48  Elf_reloc_mapper()
49  { }
50
51  virtual
52  ~Elf_reloc_mapper()
53  { }
54
55  // Initialize the relocation tracker for section RELOC_SHNDX.
56  bool
57  initialize(unsigned int reloc_shndx, unsigned int reloc_type)
58  { return this->do_initialize(reloc_shndx, reloc_type); }
59
60  // Return the next reloc_offset.
61  off_t
62  next_offset()
63  { return this->do_next_offset(); }
64
65  // Advance to the next relocation past OFFSET.
66  void
67  advance(off_t offset)
68  { this->do_advance(offset); }
69
70  // Return the section index and offset within the section of the target
71  // of the relocation for RELOC_OFFSET in the referring section.
72  unsigned int
73  get_reloc_target(off_t reloc_offset, off_t* target_offset)
74  { return this->do_get_reloc_target(reloc_offset, target_offset); }
75
76  // Checkpoint the current position in the reloc section.
77  uint64_t
78  checkpoint() const
79  { return this->do_checkpoint(); }
80
81  // Reset the current position to the CHECKPOINT.
82  void
83  reset(uint64_t checkpoint)
84  { this->do_reset(checkpoint); }
85
86 protected:
87  virtual bool
88  do_initialize(unsigned int, unsigned int) = 0;
89
90  // Return the next reloc_offset.
91  virtual off_t
92  do_next_offset() = 0;
93
94  // Advance to the next relocation past OFFSET.
95  virtual void
96  do_advance(off_t offset) = 0;
97
98  virtual unsigned int
99  do_get_reloc_target(off_t reloc_offset, off_t* target_offset) = 0;
100
101  // Checkpoint the current position in the reloc section.
102  virtual uint64_t
103  do_checkpoint() const = 0;
104
105  // Reset the current position to the CHECKPOINT.
106  virtual void
107  do_reset(uint64_t checkpoint) = 0;
108};
109
110template<int size, bool big_endian>
111class Sized_elf_reloc_mapper : public Elf_reloc_mapper
112{
113 public:
114  Sized_elf_reloc_mapper(Object* object, const unsigned char* symtab,
115			 off_t symtab_size)
116    : object_(object), symtab_(symtab), symtab_size_(symtab_size),
117      reloc_type_(0), track_relocs_()
118  { }
119
120 protected:
121  bool
122  do_initialize(unsigned int reloc_shndx, unsigned int reloc_type);
123
124  // Return the next reloc_offset.
125  virtual off_t
126  do_next_offset()
127  { return this->track_relocs_.next_offset(); }
128
129  // Advance to the next relocation past OFFSET.
130  virtual void
131  do_advance(off_t offset)
132  { this->track_relocs_.advance(offset); }
133
134  unsigned int
135  do_get_reloc_target(off_t reloc_offset, off_t* target_offset);
136
137  // Checkpoint the current position in the reloc section.
138  uint64_t
139  do_checkpoint() const
140  { return this->track_relocs_.checkpoint(); }
141
142  // Reset the current position to the CHECKPOINT.
143  void
144  do_reset(uint64_t checkpoint)
145  { this->track_relocs_.reset(checkpoint); }
146
147 private:
148  typedef typename elfcpp::Elf_types<size>::Elf_Addr Address;
149
150  // Return the section index of symbol SYMNDX, and copy its value to *VALUE.
151  // Set *IS_ORDINARY true if the section index is an ordinary section index.
152  unsigned int
153  symbol_section(unsigned int symndx, Address* value, bool* is_ordinary);
154
155  // The object file.
156  Object* object_;
157  // The ELF symbol table.
158  const unsigned char* symtab_;
159  // The size of the ELF symbol table.
160  off_t symtab_size_;
161  // Type of the relocation section (SHT_REL or SHT_RELA).
162  unsigned int reloc_type_;
163  // Relocations for the referring section.
164  Track_relocs<size, big_endian> track_relocs_;
165};
166
167// This class is used to read the abbreviations table from the
168// .debug_abbrev section of the object file.
169
170class Dwarf_abbrev_table
171{
172 public:
173  // An attribute list entry.
174  struct Attribute
175  {
176    Attribute(unsigned int a, unsigned int f)
177      : attr(a), form(f)
178    { }
179    unsigned int attr;
180    unsigned int form;
181  };
182
183  // An abbrev code entry.
184  struct Abbrev_code
185  {
186    Abbrev_code(unsigned int t, bool hc)
187      : tag(t), has_children(hc), has_sibling_attribute(false), attributes()
188    {
189      this->attributes.reserve(10);
190    }
191
192    void
193    add_attribute(unsigned int attr, unsigned int form)
194    {
195      this->attributes.push_back(Attribute(attr, form));
196    }
197
198    // The DWARF tag.
199    unsigned int tag;
200    // True if the DIE has children.
201    bool has_children : 1;
202    // True if the DIE has a sibling attribute.
203    bool has_sibling_attribute : 1;
204    // The list of attributes and forms.
205    std::vector<Attribute> attributes;
206  };
207
208  Dwarf_abbrev_table()
209    : abbrev_shndx_(0), abbrev_offset_(0), buffer_(NULL), buffer_end_(NULL),
210      owns_buffer_(false), buffer_pos_(NULL), high_abbrev_codes_()
211  {
212    memset(this->low_abbrev_codes_, 0, sizeof(this->low_abbrev_codes_));
213  }
214
215  ~Dwarf_abbrev_table()
216  {
217    if (this->owns_buffer_ && this->buffer_ != NULL)
218      delete[] this->buffer_;
219    this->clear_abbrev_codes();
220  }
221
222  // Read the abbrev table from an object file.
223  bool
224  read_abbrevs(Relobj* object,
225	       unsigned int abbrev_shndx,
226	       off_t abbrev_offset)
227  {
228    // If we've already read this abbrev table, return immediately.
229    if (this->abbrev_shndx_ > 0
230	&& this->abbrev_shndx_ == abbrev_shndx
231	&& this->abbrev_offset_ == abbrev_offset)
232      return true;
233    return this->do_read_abbrevs(object, abbrev_shndx, abbrev_offset);
234  }
235
236  // Return the abbrev code entry for CODE.  This is a fast path for
237  // abbrev codes that are in the direct lookup table.  If not found
238  // there, we call do_get_abbrev() to do the hard work.
239  const Abbrev_code*
240  get_abbrev(unsigned int code)
241  {
242    if (code < this->low_abbrev_code_max_
243	&& this->low_abbrev_codes_[code] != NULL)
244      return this->low_abbrev_codes_[code];
245    return this->do_get_abbrev(code);
246  }
247
248 private:
249  // Read the abbrev table from an object file.
250  bool
251  do_read_abbrevs(Relobj* object,
252		  unsigned int abbrev_shndx,
253		  off_t abbrev_offset);
254
255  // Lookup the abbrev code entry for CODE.
256  const Abbrev_code*
257  do_get_abbrev(unsigned int code);
258
259  // Store an abbrev code entry for CODE.
260  void
261  store_abbrev(unsigned int code, const Abbrev_code* entry)
262  {
263    if (code < this->low_abbrev_code_max_)
264      this->low_abbrev_codes_[code] = entry;
265    else
266      this->high_abbrev_codes_[code] = entry;
267  }
268
269  // Clear the abbrev code table and release the memory it uses.
270  void
271  clear_abbrev_codes();
272
273  typedef Unordered_map<unsigned int, const Abbrev_code*> Abbrev_code_table;
274
275  // The section index of the current abbrev table.
276  unsigned int abbrev_shndx_;
277  // The offset within the section of the current abbrev table.
278  off_t abbrev_offset_;
279  // The buffer containing the .debug_abbrev section.
280  const unsigned char* buffer_;
281  const unsigned char* buffer_end_;
282  // True if this object owns the buffer and needs to delete it.
283  bool owns_buffer_;
284  // Pointer to the current position in the buffer.
285  const unsigned char* buffer_pos_;
286  // The table of abbrev codes.
287  // We use a direct-lookup array for low abbrev codes,
288  // and store the rest in a hash table.
289  static const unsigned int low_abbrev_code_max_ = 256;
290  const Abbrev_code* low_abbrev_codes_[low_abbrev_code_max_];
291  Abbrev_code_table high_abbrev_codes_;
292};
293
294// A DWARF range list.  The start and end offsets are relative
295// to the input section SHNDX.  Each range must lie entirely
296// within a single section.
297
298class Dwarf_range_list
299{
300 public:
301  struct Range
302  {
303    Range(unsigned int a_shndx, off_t a_start, off_t a_end)
304      : shndx(a_shndx), start(a_start), end(a_end)
305    { }
306
307    unsigned int shndx;
308    off_t start;
309    off_t end;
310  };
311
312  Dwarf_range_list()
313    : range_list_()
314  { }
315
316  void
317  add(unsigned int shndx, off_t start, off_t end)
318  { this->range_list_.push_back(Range(shndx, start, end)); }
319
320  size_t
321  size() const
322  { return this->range_list_.size(); }
323
324  const Range&
325  operator[](off_t i) const
326  { return this->range_list_[i]; }
327
328 private:
329  std::vector<Range> range_list_;
330};
331
332// This class is used to read the ranges table from the
333// .debug_ranges section of the object file.
334
335class Dwarf_ranges_table
336{
337 public:
338  Dwarf_ranges_table(Dwarf_info_reader* dwinfo)
339    : dwinfo_(dwinfo), ranges_shndx_(0), ranges_buffer_(NULL),
340      ranges_buffer_end_(NULL), owns_ranges_buffer_(false),
341      ranges_reloc_mapper_(NULL), reloc_type_(0), output_section_offset_(0)
342  { }
343
344  ~Dwarf_ranges_table()
345  {
346    if (this->owns_ranges_buffer_ && this->ranges_buffer_ != NULL)
347      delete[] this->ranges_buffer_;
348    if (this->ranges_reloc_mapper_ != NULL)
349      delete this->ranges_reloc_mapper_;
350  }
351
352  // Read the ranges table from an object file.
353  bool
354  read_ranges_table(Relobj* object,
355		    const unsigned char* symtab,
356		    off_t symtab_size,
357		    unsigned int ranges_shndx);
358
359  // Read the range table from an object file.
360  Dwarf_range_list*
361  read_range_list(Relobj* object,
362		  const unsigned char* symtab,
363		  off_t symtab_size,
364		  unsigned int address_size,
365		  unsigned int ranges_shndx,
366		  off_t ranges_offset);
367
368  // Look for a relocation at offset OFF in the range table,
369  // and return the section index and offset of the target.
370  unsigned int
371  lookup_reloc(off_t off, off_t* target_off);
372
373 private:
374  // The Dwarf_info_reader, for reading data.
375  Dwarf_info_reader* dwinfo_;
376  // The section index of the ranges table.
377  unsigned int ranges_shndx_;
378  // The buffer containing the .debug_ranges section.
379  const unsigned char* ranges_buffer_;
380  const unsigned char* ranges_buffer_end_;
381  // True if this object owns the buffer and needs to delete it.
382  bool owns_ranges_buffer_;
383  // Relocation mapper for the .debug_ranges section.
384  Elf_reloc_mapper* ranges_reloc_mapper_;
385  // Type of the relocation section (SHT_REL or SHT_RELA).
386  unsigned int reloc_type_;
387  // For incremental update links, this will hold the offset of the
388  // input section within the output section.  Offsets read from
389  // relocated data will be relative to the output section, and need
390  // to be corrected before reading data from the input section.
391  uint64_t output_section_offset_;
392};
393
394// This class is used to read the pubnames and pubtypes tables from the
395// .debug_pubnames and .debug_pubtypes sections of the object file.
396
397class Dwarf_pubnames_table
398{
399 public:
400  Dwarf_pubnames_table(Dwarf_info_reader* dwinfo, bool is_pubtypes)
401    : dwinfo_(dwinfo), buffer_(NULL), buffer_end_(NULL), owns_buffer_(false),
402      offset_size_(0), pinfo_(NULL), end_of_table_(NULL),
403      is_pubtypes_(is_pubtypes), is_gnu_style_(false),
404      unit_length_(0), cu_offset_(0)
405  { }
406
407  ~Dwarf_pubnames_table()
408  {
409    if (this->owns_buffer_ && this->buffer_ != NULL)
410      delete[] this->buffer_;
411  }
412
413  // Read the pubnames section from the object file, using the symbol
414  // table for relocating it.
415  bool
416  read_section(Relobj* object, const unsigned char* symbol_table,
417               off_t symtab_size);
418
419  // Read the header for the set at OFFSET.
420  bool
421  read_header(off_t offset);
422
423  // Return the offset to the cu within the info or types section.
424  off_t
425  cu_offset()
426  { return this->cu_offset_; }
427
428  // Return the size of this subsection of the table.  The unit length
429  // doesn't include the size of its own field.
430  off_t
431  subsection_size()
432  { return this->unit_length_; }
433
434  // Read the next name from the set.  If the pubname table is gnu-style,
435  // FLAG_BYTE is set to the high-byte of a gdb_index version 7 cu_index.
436  const char*
437  next_name(uint8_t* flag_byte);
438
439 private:
440  // The Dwarf_info_reader, for reading data.
441  Dwarf_info_reader* dwinfo_;
442  // The buffer containing the .debug_ranges section.
443  const unsigned char* buffer_;
444  const unsigned char* buffer_end_;
445  // True if this object owns the buffer and needs to delete it.
446  bool owns_buffer_;
447  // The size of a DWARF offset for the current set.
448  unsigned int offset_size_;
449  // The current position within the buffer.
450  const unsigned char* pinfo_;
451  // The end of the current pubnames table.
452  const unsigned char* end_of_table_;
453  // TRUE if this is a .debug_pubtypes section.
454  bool is_pubtypes_;
455  // Gnu-style pubnames table. This style has an extra flag byte between the
456  // offset and the name, and is used for generating version 7 of gdb-index.
457  bool is_gnu_style_;
458  // Fields read from the header.
459  uint64_t unit_length_;
460  off_t cu_offset_;
461
462  // Track relocations for this table so we can find the CUs that
463  // correspond to the subsections.
464  Elf_reloc_mapper* reloc_mapper_;
465  // Type of the relocation section (SHT_REL or SHT_RELA).
466  unsigned int reloc_type_;
467};
468
469// This class represents a DWARF Debug Info Entry (DIE).
470
471class Dwarf_die
472{
473 public:
474  // An attribute value.
475  struct Attribute_value
476  {
477    unsigned int attr;
478    unsigned int form;
479    union
480    {
481      int64_t intval;
482      uint64_t uintval;
483      const char* stringval;
484      const unsigned char* blockval;
485      off_t refval;
486    } val;
487    union
488    {
489      // Section index for reference forms.
490      unsigned int shndx;
491      // Block length for block forms.
492      unsigned int blocklen;
493      // Attribute offset for DW_FORM_strp.
494      unsigned int attr_off;
495    } aux;
496  };
497
498  // A list of attribute values.
499  typedef std::vector<Attribute_value> Attributes;
500
501  Dwarf_die(Dwarf_info_reader* dwinfo,
502	    off_t die_offset,
503	    Dwarf_die* parent);
504
505  // Return the DWARF tag for this DIE.
506  unsigned int
507  tag() const
508  {
509    if (this->abbrev_code_ == NULL)
510      return 0;
511    return this->abbrev_code_->tag;
512  }
513
514  // Return true if this DIE has children.
515  bool
516  has_children() const
517  {
518    gold_assert(this->abbrev_code_ != NULL);
519    return this->abbrev_code_->has_children;
520  }
521
522  // Return true if this DIE has a sibling attribute.
523  bool
524  has_sibling_attribute() const
525  {
526    gold_assert(this->abbrev_code_ != NULL);
527    return this->abbrev_code_->has_sibling_attribute;
528  }
529
530  // Return the value of attribute ATTR.
531  const Attribute_value*
532  attribute(unsigned int attr);
533
534  // Return the value of the DW_AT_name attribute.
535  const char*
536  name()
537  {
538    if (this->name_ == NULL)
539      this->set_name();
540    return this->name_;
541  }
542
543  // Return the value of the DW_AT_linkage_name
544  // or DW_AT_MIPS_linkage_name attribute.
545  const char*
546  linkage_name()
547  {
548    if (this->linkage_name_ == NULL)
549      this->set_linkage_name();
550    return this->linkage_name_;
551  }
552
553  // Return the value of the DW_AT_specification attribute.
554  off_t
555  specification()
556  {
557    if (!this->attributes_read_)
558      this->read_attributes();
559    return this->specification_;
560  }
561
562  // Return the value of the DW_AT_abstract_origin attribute.
563  off_t
564  abstract_origin()
565  {
566    if (!this->attributes_read_)
567      this->read_attributes();
568    return this->abstract_origin_;
569  }
570
571  // Return the value of attribute ATTR as a string.
572  const char*
573  string_attribute(unsigned int attr);
574
575  // Return the value of attribute ATTR as an integer.
576  int64_t
577  int_attribute(unsigned int attr);
578
579  // Return the value of attribute ATTR as an unsigned integer.
580  uint64_t
581  uint_attribute(unsigned int attr);
582
583  // Return the value of attribute ATTR as a reference.
584  off_t
585  ref_attribute(unsigned int attr, unsigned int* shndx);
586
587  // Return the value of attribute ATTR as a address.
588  off_t
589  address_attribute(unsigned int attr, unsigned int* shndx);
590
591  // Return the value of attribute ATTR as a flag.
592  bool
593  flag_attribute(unsigned int attr)
594  { return this->int_attribute(attr) != 0; }
595
596  // Return true if this DIE is a declaration.
597  bool
598  is_declaration()
599  { return this->flag_attribute(elfcpp::DW_AT_declaration); }
600
601  // Return the parent of this DIE.
602  Dwarf_die*
603  parent() const
604  { return this->parent_; }
605
606  // Return the offset of this DIE.
607  off_t
608  offset() const
609  { return this->die_offset_; }
610
611  // Return the offset of this DIE's first child.
612  off_t
613  child_offset();
614
615  // Set the offset of this DIE's next sibling.
616  void
617  set_sibling_offset(off_t sibling_offset)
618  { this->sibling_offset_ = sibling_offset; }
619
620  // Return the offset of this DIE's next sibling.
621  off_t
622  sibling_offset();
623
624 private:
625  typedef Dwarf_abbrev_table::Abbrev_code Abbrev_code;
626
627  // Read all the attributes of the DIE.
628  bool
629  read_attributes();
630
631  // Set the name of the DIE if present.
632  void
633  set_name();
634
635  // Set the linkage name if present.
636  void
637  set_linkage_name();
638
639  // Skip all the attributes of the DIE and return the offset
640  // of the next DIE.
641  off_t
642  skip_attributes();
643
644  // The Dwarf_info_reader, for reading attributes.
645  Dwarf_info_reader* dwinfo_;
646  // The parent of this DIE.
647  Dwarf_die* parent_;
648  // Offset of this DIE within its compilation unit.
649  off_t die_offset_;
650  // Offset of the first attribute, relative to the beginning of the DIE.
651  off_t attr_offset_;
652  // Offset of the first child, relative to the compilation unit.
653  off_t child_offset_;
654  // Offset of the next sibling, relative to the compilation unit.
655  off_t sibling_offset_;
656  // The abbreviation table entry.
657  const Abbrev_code* abbrev_code_;
658  // The list of attributes.
659  Attributes attributes_;
660  // True if the attributes have been read.
661  bool attributes_read_;
662  // The following fields hold common attributes to avoid a linear
663  // search through the attribute list.
664  // The DIE name (DW_AT_name).
665  const char* name_;
666  // Offset of the name in the string table (for DW_FORM_strp).
667  off_t name_off_;
668  // The linkage name (DW_AT_linkage_name or DW_AT_MIPS_linkage_name).
669  const char* linkage_name_;
670  // Offset of the linkage name in the string table (for DW_FORM_strp).
671  off_t linkage_name_off_;
672  // Section index of the string table (for DW_FORM_strp).
673  unsigned int string_shndx_;
674  // The value of a DW_AT_specification attribute.
675  off_t specification_;
676  // The value of a DW_AT_abstract_origin attribute.
677  off_t abstract_origin_;
678};
679
680// This class is used to read the debug info from the .debug_info
681// or .debug_types sections.  This is a base class that implements
682// the generic parsing of the compilation unit header and DIE
683// structure.  The parse() method parses the entire section, and
684// calls the various visit_xxx() methods for each header.  Clients
685// should derive a new class from this one and implement the
686// visit_compilation_unit() and visit_type_unit() functions.
687
688class Dwarf_info_reader
689{
690 public:
691  Dwarf_info_reader(bool is_type_unit,
692		    Relobj* object,
693		    const unsigned char* symtab,
694		    off_t symtab_size,
695		    unsigned int shndx,
696		    unsigned int reloc_shndx,
697		    unsigned int reloc_type)
698    : is_type_unit_(is_type_unit), object_(object), symtab_(symtab),
699      symtab_size_(symtab_size), shndx_(shndx), reloc_shndx_(reloc_shndx),
700      reloc_type_(reloc_type), abbrev_shndx_(0), string_shndx_(0),
701      buffer_(NULL), buffer_end_(NULL), cu_offset_(0), cu_length_(0),
702      offset_size_(0), address_size_(0), cu_version_(0),
703      abbrev_table_(), ranges_table_(this),
704      reloc_mapper_(NULL), string_buffer_(NULL), string_buffer_end_(NULL),
705      owns_string_buffer_(false), string_output_section_offset_(0)
706  { }
707
708  virtual
709  ~Dwarf_info_reader()
710  {
711    if (this->reloc_mapper_ != NULL)
712      delete this->reloc_mapper_;
713    if (this->owns_string_buffer_ && this->string_buffer_ != NULL)
714      delete[] this->string_buffer_;
715  }
716
717  // Begin parsing the debug info.  This calls visit_compilation_unit()
718  // or visit_type_unit() for each compilation or type unit found in the
719  // section, and visit_die() for each top-level DIE.
720  void
721  parse();
722
723  // Return the abbrev code entry for a CODE.
724  const Dwarf_abbrev_table::Abbrev_code*
725  get_abbrev(unsigned int code)
726  { return this->abbrev_table_.get_abbrev(code); }
727
728  // Return a pointer to the DWARF info buffer at OFFSET.
729  const unsigned char*
730  buffer_at_offset(off_t offset) const
731  {
732    const unsigned char* p = this->buffer_ + this->cu_offset_ + offset;
733    if (this->check_buffer(p + 1))
734      return p;
735    return NULL;
736  }
737
738  // Read a possibly unaligned integer of SIZE.
739  template <int valsize>
740  inline typename elfcpp::Valtype_base<valsize>::Valtype
741  read_from_pointer(const unsigned char* source);
742
743  // Read a possibly unaligned integer of SIZE.  Update SOURCE after read.
744  template <int valsize>
745  inline typename elfcpp::Valtype_base<valsize>::Valtype
746  read_from_pointer(const unsigned char** source);
747
748  // Look for a relocation at offset ATTR_OFF in the dwarf info,
749  // and return the section index and offset of the target.
750  unsigned int
751  lookup_reloc(off_t attr_off, off_t* target_off);
752
753  // Return a string from the DWARF string table.
754  const char*
755  get_string(off_t str_off, unsigned int string_shndx);
756
757  // Return the size of a DWARF offset.
758  unsigned int
759  offset_size() const
760  { return this->offset_size_; }
761
762  // Return the size of an address.
763  unsigned int
764  address_size() const
765  { return this->address_size_; }
766
767  // Set the section index of the .debug_abbrev section.
768  // We use this if there are no relocations for the .debug_info section.
769  // If not set, the code parse() routine will search for the section by name.
770  void
771  set_abbrev_shndx(unsigned int abbrev_shndx)
772  { this->abbrev_shndx_ = abbrev_shndx; }
773
774  // Return a pointer to the object file's ELF symbol table.
775  const unsigned char*
776  symtab() const
777  { return this->symtab_; }
778
779  // Return the size of the object file's ELF symbol table.
780  off_t
781  symtab_size() const
782  { return this->symtab_size_; }
783
784  // Return the offset of the current compilation unit.
785  off_t
786  cu_offset() const
787  { return this->cu_offset_; }
788
789 protected:
790  // Begin parsing the debug info.  This calls visit_compilation_unit()
791  // or visit_type_unit() for each compilation or type unit found in the
792  // section, and visit_die() for each top-level DIE.
793  template<bool big_endian>
794  void
795  do_parse();
796
797  // The following methods are hooks that are meant to be implemented
798  // by a derived class.  A default, do-nothing, implementation of
799  // each is provided for this base class.
800
801  // Visit a compilation unit.
802  virtual void
803  visit_compilation_unit(off_t cu_offset, off_t cu_length, Dwarf_die* root_die);
804
805  // Visit a type unit.
806  virtual void
807  visit_type_unit(off_t tu_offset, off_t tu_length, off_t type_offset,
808		  uint64_t signature, Dwarf_die* root_die);
809
810  // Read the range table.
811  Dwarf_range_list*
812  read_range_list(unsigned int ranges_shndx, off_t ranges_offset)
813  {
814    return this->ranges_table_.read_range_list(this->object_,
815					       this->symtab_,
816					       this->symtab_size_,
817					       this->address_size_,
818					       ranges_shndx,
819					       ranges_offset);
820  }
821
822  // Return the object.
823  Relobj*
824  object() const
825  { return this->object_; }
826
827  // Checkpoint the relocation tracker.
828  uint64_t
829  get_reloc_checkpoint() const
830  { return this->reloc_mapper_->checkpoint(); }
831
832  // Reset the relocation tracker to the CHECKPOINT.
833  void
834  reset_relocs(uint64_t checkpoint)
835  { this->reloc_mapper_->reset(checkpoint); }
836
837 private:
838  // Print a warning about a corrupt debug section.
839  void
840  warn_corrupt_debug_section() const;
841
842  // Check that P is within the bounds of the current section.
843  bool
844  check_buffer(const unsigned char* p) const
845  {
846    if (p > this->buffer_ + this->cu_offset_ + this->cu_length_)
847      {
848	this->warn_corrupt_debug_section();
849	return false;
850      }
851    return true;
852  }
853
854  // Read the DWARF string table.
855  bool
856  read_string_table(unsigned int string_shndx)
857  {
858    // If we've already read this string table, return immediately.
859    if (this->string_shndx_ > 0 && this->string_shndx_ == string_shndx)
860      return true;
861    if (string_shndx == 0 && this->string_shndx_ > 0)
862      return true;
863    return this->do_read_string_table(string_shndx);
864  }
865
866  bool
867  do_read_string_table(unsigned int string_shndx);
868
869  // True if this is a type unit; false for a compilation unit.
870  bool is_type_unit_;
871  // The object containing the .debug_info or .debug_types input section.
872  Relobj* object_;
873  // The ELF symbol table.
874  const unsigned char* symtab_;
875  // The size of the ELF symbol table.
876  off_t symtab_size_;
877  // Index of the .debug_info or .debug_types section.
878  unsigned int shndx_;
879  // Index of the relocation section.
880  unsigned int reloc_shndx_;
881  // Type of the relocation section (SHT_REL or SHT_RELA).
882  unsigned int reloc_type_;
883  // Index of the .debug_abbrev section (0 if not known).
884  unsigned int abbrev_shndx_;
885  // Index of the .debug_str section.
886  unsigned int string_shndx_;
887  // The buffer for the debug info.
888  const unsigned char* buffer_;
889  const unsigned char* buffer_end_;
890  // Offset of the current compilation unit.
891  off_t cu_offset_;
892  // Length of the current compilation unit.
893  off_t cu_length_;
894  // Size of a DWARF offset for the current compilation unit.
895  unsigned int offset_size_;
896  // Size of an address for the target architecture.
897  unsigned int address_size_;
898  // Compilation unit version number.
899  unsigned int cu_version_;
900  // Abbreviations table for current compilation unit.
901  Dwarf_abbrev_table abbrev_table_;
902  // Ranges table for the current compilation unit.
903  Dwarf_ranges_table ranges_table_;
904  // Relocation mapper for the section.
905  Elf_reloc_mapper* reloc_mapper_;
906  // The buffer for the debug string table.
907  const char* string_buffer_;
908  const char* string_buffer_end_;
909  // True if this object owns the buffer and needs to delete it.
910  bool owns_string_buffer_;
911  // For incremental update links, this will hold the offset of the
912  // input .debug_str section within the output section.  Offsets read
913  // from relocated data will be relative to the output section, and need
914  // to be corrected before reading data from the input section.
915  uint64_t string_output_section_offset_;
916};
917
918// We can't do better than to keep the offsets in a sorted vector.
919// Here, offset is the key, and file_num/line_num is the value.
920struct Offset_to_lineno_entry
921{
922  off_t offset;
923  int header_num;  // which file-list to use (i.e. which .o file are we in)
924  // A pointer into files_.
925  unsigned int file_num : sizeof(int) * CHAR_BIT - 1;
926  // True if this was the last entry for the current offset, meaning
927  // it's the line that actually applies.
928  unsigned int last_line_for_offset : 1;
929  // The line number in the source file.  -1 to indicate end-of-function.
930  int line_num;
931
932  // This sorts by offsets first, and then puts the correct line to
933  // report for a given offset at the beginning of the run of equal
934  // offsets (so that asking for 1 line gives the best answer).  This
935  // is not a total ordering.
936  bool operator<(const Offset_to_lineno_entry& that) const
937  {
938    if (this->offset != that.offset)
939      return this->offset < that.offset;
940    // Note the '>' which makes this sort 'true' first.
941    return this->last_line_for_offset > that.last_line_for_offset;
942  }
943};
944
945// This class is used to read the line information from the debugging
946// section of an object file.
947
948class Dwarf_line_info
949{
950 public:
951  Dwarf_line_info()
952  { }
953
954  virtual
955  ~Dwarf_line_info()
956  { }
957
958  // Given a section number and an offset, returns the associated
959  // file and line-number, as a string: "file:lineno".  If unable
960  // to do the mapping, returns the empty string.  You must call
961  // read_line_mappings() before calling this function.  If
962  // 'other_lines' is non-NULL, fills that in with other line
963  // numbers assigned to the same offset.
964  std::string
965  addr2line(unsigned int shndx, off_t offset,
966            std::vector<std::string>* other_lines)
967  { return this->do_addr2line(shndx, offset, other_lines); }
968
969  // A helper function for a single addr2line lookup.  It also keeps a
970  // cache of the last CACHE_SIZE Dwarf_line_info objects it created;
971  // set to 0 not to cache at all.  The larger CACHE_SIZE is, the more
972  // chance this routine won't have to re-create a Dwarf_line_info
973  // object for its addr2line computation; such creations are slow.
974  // NOTE: Not thread-safe, so only call from one thread at a time.
975  static std::string
976  one_addr2line(Object* object, unsigned int shndx, off_t offset,
977                size_t cache_size, std::vector<std::string>* other_lines);
978
979  // This reclaims all the memory that one_addr2line may have cached.
980  // Use this when you know you will not be calling one_addr2line again.
981  static void
982  clear_addr2line_cache();
983
984 private:
985  virtual std::string
986  do_addr2line(unsigned int shndx, off_t offset,
987               std::vector<std::string>* other_lines) = 0;
988};
989
990template<int size, bool big_endian>
991class Sized_dwarf_line_info : public Dwarf_line_info
992{
993 public:
994  // Initializes a .debug_line reader for a given object file.
995  // If SHNDX is specified and non-negative, only read the debug
996  // information that pertains to the specified section.
997  Sized_dwarf_line_info(Object* object, unsigned int read_shndx = -1U);
998
999  virtual
1000  ~Sized_dwarf_line_info()
1001  {
1002    if (this->buffer_start_ != NULL)
1003      delete[] this->buffer_start_;
1004  }
1005
1006 private:
1007  std::string
1008  do_addr2line(unsigned int shndx, off_t offset,
1009               std::vector<std::string>* other_lines);
1010
1011  // Formats a file and line number to a string like "dirname/filename:lineno".
1012  std::string
1013  format_file_lineno(const Offset_to_lineno_entry& lineno) const;
1014
1015  // Start processing line info, and populates the offset_map_.
1016  // If SHNDX is non-negative, only store debug information that
1017  // pertains to the specified section.
1018  void
1019  read_line_mappings(unsigned int shndx);
1020
1021  // Reads the relocation section associated with .debug_line and
1022  // stores relocation information in reloc_map_.
1023  void
1024  read_relocs();
1025
1026  // Reads the DWARF2/3 header for this line info.  Each takes as input
1027  // a starting buffer position, and returns the ending position.
1028  const unsigned char*
1029  read_header_prolog(const unsigned char* lineptr);
1030
1031  const unsigned char*
1032  read_header_tables(const unsigned char* lineptr);
1033
1034  // Reads the DWARF2/3 line information.  If shndx is non-negative,
1035  // discard all line information that doesn't pertain to the given
1036  // section.
1037  const unsigned char*
1038  read_lines(const unsigned char* lineptr, unsigned int shndx);
1039
1040  // Process a single line info opcode at START using the state
1041  // machine at LSM.  Return true if we should define a line using the
1042  // current state of the line state machine.  Place the length of the
1043  // opcode in LEN.
1044  bool
1045  process_one_opcode(const unsigned char* start,
1046                     struct LineStateMachine* lsm, size_t* len);
1047
1048  // Some parts of processing differ depending on whether the input
1049  // was a .o file or not.
1050  bool input_is_relobj();
1051
1052  // If we saw anything amiss while parsing, we set this to false.
1053  // Then addr2line will always fail (rather than return possibly-
1054  // corrupt data).
1055  bool data_valid_;
1056
1057  // A DWARF2/3 line info header.  This is not the same size as in the
1058  // actual file, as the one in the file may have a 32 bit or 64 bit
1059  // lengths.
1060
1061  struct Dwarf_line_infoHeader
1062  {
1063    off_t total_length;
1064    int version;
1065    off_t prologue_length;
1066    int min_insn_length; // insn stands for instructin
1067    bool default_is_stmt; // stmt stands for statement
1068    signed char line_base;
1069    int line_range;
1070    unsigned char opcode_base;
1071    std::vector<unsigned char> std_opcode_lengths;
1072    int offset_size;
1073  } header_;
1074
1075  // buffer is the buffer for our line info, starting at exactly where
1076  // the line info to read is.
1077  const unsigned char* buffer_;
1078  const unsigned char* buffer_end_;
1079  // If the buffer was allocated temporarily, and therefore must be
1080  // deallocated in the dtor, this contains a pointer to the start
1081  // of the buffer.
1082  const unsigned char* buffer_start_;
1083
1084  // This has relocations that point into buffer.
1085  Sized_elf_reloc_mapper<size, big_endian>* reloc_mapper_;
1086  // The type of the reloc section in track_relocs_--SHT_REL or SHT_RELA.
1087  unsigned int track_relocs_type_;
1088
1089  // This is used to figure out what section to apply a relocation to.
1090  const unsigned char* symtab_buffer_;
1091  section_size_type symtab_buffer_size_;
1092
1093  // Holds the directories and files as we see them.  We have an array
1094  // of directory-lists, one for each .o file we're reading (usually
1095  // there will just be one, but there may be more if input is a .so).
1096  std::vector<std::vector<std::string> > directories_;
1097  // The first part is an index into directories_, the second the filename.
1098  std::vector<std::vector< std::pair<int, std::string> > > files_;
1099
1100  // An index into the current directories_ and files_ vectors.
1101  int current_header_index_;
1102
1103  // A sorted map from offset of the relocation target to the shndx
1104  // and addend for the relocation.
1105  typedef std::map<off_t, std::pair<unsigned int, off_t> >
1106  Reloc_map;
1107  Reloc_map reloc_map_;
1108
1109  // We have a vector of offset->lineno entries for every input section.
1110  typedef Unordered_map<unsigned int, std::vector<Offset_to_lineno_entry> >
1111  Lineno_map;
1112
1113  Lineno_map line_number_map_;
1114};
1115
1116} // End namespace gold.
1117
1118#endif // !defined(GOLD_DWARF_READER_H)
1119