1// merge.h -- handle section merging for gold  -*- C++ -*-
2
3// Copyright (C) 2006-2017 Free Software Foundation, Inc.
4// Written by Ian Lance Taylor <iant@google.com>.
5
6// This file is part of gold.
7
8// This program is free software; you can redistribute it and/or modify
9// it under the terms of the GNU General Public License as published by
10// the Free Software Foundation; either version 3 of the License, or
11// (at your option) any later version.
12
13// This program is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16// GNU General Public License for more details.
17
18// You should have received a copy of the GNU General Public License
19// along with this program; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
21// MA 02110-1301, USA.
22
23#ifndef GOLD_MERGE_H
24#define GOLD_MERGE_H
25
26#include <climits>
27#include <map>
28#include <vector>
29
30#include "stringpool.h"
31#include "output.h"
32
33namespace gold
34{
35
36// For each object with merge sections, we store an Object_merge_map.
37// This is used to map locations in input sections to a merged output
38// section.  The output section itself is not recorded here--it can be
39// found in the output_sections_ field of the Object.
40
41class Object_merge_map
42{
43 public:
44  Object_merge_map()
45    : section_merge_maps_()
46  { }
47
48  ~Object_merge_map();
49
50  // Add a mapping for MERGE_MAP, for the bytes from OFFSET to OFFSET
51  // + LENGTH in the input section SHNDX to OUTPUT_OFFSET in the
52  // output section.  An OUTPUT_OFFSET of -1 means that the bytes are
53  // discarded.  OUTPUT_OFFSET is relative to the start of the merged
54  // data in the output section.
55  void
56  add_mapping(const Output_section_data*, unsigned int shndx,
57              section_offset_type offset, section_size_type length,
58              section_offset_type output_offset);
59
60  // Get the output offset for an input address.  MERGE_MAP is the map
61  // we are looking for, or NULL if we don't care.  The input address
62  // is at offset OFFSET in section SHNDX.  This sets *OUTPUT_OFFSET
63  // to the offset in the output section; this will be -1 if the bytes
64  // are not being copied to the output.  This returns true if the
65  // mapping is known, false otherwise.  *OUTPUT_OFFSET is relative to
66  // the start of the merged data in the output section.
67  bool
68  get_output_offset(unsigned int shndx,
69		    section_offset_type offset,
70		    section_offset_type* output_offset);
71
72  const Output_section_data*
73  find_merge_section(unsigned int shndx) const;
74
75  // Initialize an mapping from input offsets to output addresses for
76  // section SHNDX.  STARTING_ADDRESS is the output address of the
77  // merged section.
78  template<int size>
79  void
80  initialize_input_to_output_map(
81      unsigned int shndx,
82      typename elfcpp::Elf_types<size>::Elf_Addr starting_address,
83      Unordered_map<section_offset_type,
84		    typename elfcpp::Elf_types<size>::Elf_Addr>*);
85
86  // Map input section offsets to a length and an output section
87  // offset.  An output section offset of -1 means that this part of
88  // the input section is being discarded.
89  struct Input_merge_entry
90  {
91    // The offset in the input section.
92    section_offset_type input_offset;
93    // The length.
94    section_size_type length;
95    // The offset in the output section.
96    section_offset_type output_offset;
97  };
98
99  // A list of entries for a particular input section.
100  struct Input_merge_map
101  {
102    void add_mapping(section_offset_type input_offset, section_size_type length,
103                     section_offset_type output_offset);
104
105    typedef std::vector<Input_merge_entry> Entries;
106
107    // We store these with the Relobj, and we look them up by input
108    // section.  It is possible to have two different merge maps
109    // associated with a single output section.  For example, this
110    // happens routinely with .rodata, when merged string constants
111    // and merged fixed size constants are both put into .rodata.  The
112    // output offset that we store is not the offset from the start of
113    // the output section; it is the offset from the start of the
114    // merged data in the output section.  That means that the caller
115    // is going to add the offset of the merged data within the output
116    // section, which means that the caller needs to know which set of
117    // merged data it found the entry in.  So it's not enough to find
118    // this data based on the input section and the output section; we
119    // also have to find it based on a set of merged data in the
120    // output section.  In order to verify that we are looking at the
121    // right data, we store a pointer to the Merge_map here, and we
122    // pass in a pointer when looking at the data.  If we are asked to
123    // look up information for a different Merge_map, we report that
124    // we don't have it, rather than trying a lookup and returning an
125    // answer which will receive the wrong offset.
126    const Output_section_data* output_data;
127    // The list of mappings.
128    Entries entries;
129    // Whether the ENTRIES field is sorted by input_offset.
130    bool sorted;
131
132    Input_merge_map()
133      : output_data(NULL), entries(), sorted(true)
134    { }
135  };
136
137  // Get or make the Input_merge_map to use for the section SHNDX
138  // with MERGE_MAP.
139  Input_merge_map*
140  get_or_make_input_merge_map(const Output_section_data* merge_map,
141                              unsigned int shndx);
142
143  private:
144  // A less-than comparison routine for Input_merge_entry.
145  struct Input_merge_compare
146  {
147    bool
148    operator()(const Input_merge_entry& i1, const Input_merge_entry& i2) const
149    { return i1.input_offset < i2.input_offset; }
150  };
151
152  // Map input section indices to merge maps.
153  typedef std::vector<std::pair<unsigned int, Input_merge_map*> >
154      Section_merge_maps;
155
156  // Return a pointer to the Input_merge_map to use for the input
157  // section SHNDX, or NULL.
158  const Input_merge_map*
159  get_input_merge_map(unsigned int shndx) const;
160
161  Input_merge_map *
162  get_input_merge_map(unsigned int shndx) {
163    return const_cast<Input_merge_map *>(static_cast<const Object_merge_map *>(
164                                             this)->get_input_merge_map(shndx));
165  }
166
167  Section_merge_maps section_merge_maps_;
168};
169
170// A general class for SHF_MERGE data, to hold functions shared by
171// fixed-size constant data and string data.
172
173class Output_merge_base : public Output_section_data
174{
175 public:
176  Output_merge_base(uint64_t entsize, uint64_t addralign)
177    : Output_section_data(addralign), entsize_(entsize),
178      keeps_input_sections_(false), first_relobj_(NULL), first_shndx_(-1),
179      input_sections_()
180  { }
181
182  // Return the entry size.
183  uint64_t
184  entsize() const
185  { return this->entsize_; }
186
187  // Whether this is a merge string section.  This is only true of
188  // Output_merge_string.
189  bool
190  is_string()
191  { return this->do_is_string(); }
192
193  // Whether this keeps input sections.
194  bool
195  keeps_input_sections() const
196  { return this->keeps_input_sections_; }
197
198  // Set the keeps-input-sections flag.  This is virtual so that sub-classes
199  // can perform additional checks.
200  void
201  set_keeps_input_sections()
202  { this->do_set_keeps_input_sections(); }
203
204  // Return the object of the first merged input section.  This used
205  // for script processing.  This is NULL if merge section is empty.
206  Relobj*
207  first_relobj() const
208  { return this->first_relobj_; }
209
210  // Return the section index of the first merged input section.  This
211  // is used for script processing.  This is valid only if merge section
212  // is not valid.
213  unsigned int
214  first_shndx() const
215  {
216    gold_assert(this->first_relobj_ != NULL);
217    return this->first_shndx_;
218  }
219
220  // Set of merged input sections.
221  typedef Unordered_set<Section_id, Section_id_hash> Input_sections;
222
223  // Beginning of merged input sections.
224  Input_sections::const_iterator
225  input_sections_begin() const
226  {
227    gold_assert(this->keeps_input_sections_);
228    return this->input_sections_.begin();
229  }
230
231  // Beginning of merged input sections.
232  Input_sections::const_iterator
233  input_sections_end() const
234  {
235    gold_assert(this->keeps_input_sections_);
236    return this->input_sections_.end();
237  }
238
239 protected:
240  // Return the output offset for an input offset.
241  bool
242  do_output_offset(const Relobj* object, unsigned int shndx,
243		   section_offset_type offset,
244		   section_offset_type* poutput) const;
245
246  // This may be overridden by the child class.
247  virtual bool
248  do_is_string()
249  { return false; }
250
251  // This may be overridden by the child class.
252  virtual void
253  do_set_keeps_input_sections()
254  { this->keeps_input_sections_ = true; }
255
256  // Record the merged input section for script processing.
257  void
258  record_input_section(Relobj* relobj, unsigned int shndx);
259
260 private:
261  // The entry size.  For fixed-size constants, this is the size of
262  // the constants.  For strings, this is the size of a character.
263  uint64_t entsize_;
264  // Whether we keep input sections.
265  bool keeps_input_sections_;
266  // Object of the first merged input section.  We use this for script
267  // processing.
268  Relobj* first_relobj_;
269  // Section index of the first merged input section.
270  unsigned int first_shndx_;
271  // Input sections.  We only keep them is keeps_input_sections_ is true.
272  Input_sections input_sections_;
273};
274
275// Handle SHF_MERGE sections with fixed-size constant data.
276
277class Output_merge_data : public Output_merge_base
278{
279 public:
280  Output_merge_data(uint64_t entsize, uint64_t addralign)
281    : Output_merge_base(entsize, addralign), p_(NULL), len_(0), alc_(0),
282      input_count_(0),
283      hashtable_(128, Merge_data_hash(this), Merge_data_eq(this))
284  { }
285
286 protected:
287  // Add an input section.
288  bool
289  do_add_input_section(Relobj* object, unsigned int shndx);
290
291  // Set the final data size.
292  void
293  set_final_data_size();
294
295  // Write the data to the file.
296  void
297  do_write(Output_file*);
298
299  // Write the data to a buffer.
300  void
301  do_write_to_buffer(unsigned char*);
302
303  // Write to a map file.
304  void
305  do_print_to_mapfile(Mapfile* mapfile) const
306  { mapfile->print_output_data(this, _("** merge constants")); }
307
308  // Print merge stats to stderr.
309  void
310  do_print_merge_stats(const char* section_name);
311
312  // Set keeps-input-sections flag.
313  void
314  do_set_keeps_input_sections()
315  {
316    gold_assert(this->input_count_ == 0);
317    Output_merge_base::do_set_keeps_input_sections();
318  }
319
320 private:
321  // We build a hash table of the fixed-size constants.  Each constant
322  // is stored as a pointer into the section data we are accumulating.
323
324  // A key in the hash table.  This is an offset in the section
325  // contents we are building.
326  typedef section_offset_type Merge_data_key;
327
328  // Compute the hash code.  To do this we need a pointer back to the
329  // object holding the data.
330  class Merge_data_hash
331  {
332   public:
333    Merge_data_hash(const Output_merge_data* pomd)
334      : pomd_(pomd)
335    { }
336
337    size_t
338    operator()(Merge_data_key) const;
339
340   private:
341    const Output_merge_data* pomd_;
342  };
343
344  friend class Merge_data_hash;
345
346  // Compare two entries in the hash table for equality.  To do this
347  // we need a pointer back to the object holding the data.  Note that
348  // we now have a pointer to the object stored in two places in the
349  // hash table.  Fixing this would require specializing the hash
350  // table, which would be hard to do portably.
351  class Merge_data_eq
352  {
353   public:
354    Merge_data_eq(const Output_merge_data* pomd)
355      : pomd_(pomd)
356    { }
357
358    bool
359    operator()(Merge_data_key k1, Merge_data_key k2) const;
360
361   private:
362    const Output_merge_data* pomd_;
363  };
364
365  friend class Merge_data_eq;
366
367  // The type of the hash table.
368  typedef Unordered_set<Merge_data_key, Merge_data_hash, Merge_data_eq>
369    Merge_data_hashtable;
370
371  // Given a hash table key, which is just an offset into the section
372  // data, return a pointer to the corresponding constant.
373  const unsigned char*
374  constant(Merge_data_key k) const
375  {
376    gold_assert(k >= 0 && k < static_cast<section_offset_type>(this->len_));
377    return this->p_ + k;
378  }
379
380  // Add a constant to the output.
381  void
382  add_constant(const unsigned char*);
383
384  // The accumulated data.
385  unsigned char* p_;
386  // The length of the accumulated data.
387  section_size_type len_;
388  // The size of the allocated buffer.
389  section_size_type alc_;
390  // The number of entries seen in input files.
391  size_t input_count_;
392  // The hash table.
393  Merge_data_hashtable hashtable_;
394};
395
396// Handle SHF_MERGE sections with string data.  This is a template
397// based on the type of the characters in the string.
398
399template<typename Char_type>
400class Output_merge_string : public Output_merge_base
401{
402 public:
403  Output_merge_string(uint64_t addralign)
404    : Output_merge_base(sizeof(Char_type), addralign), stringpool_(addralign),
405      merged_strings_lists_(), input_count_(0), input_size_(0)
406  {
407    this->stringpool_.set_no_zero_null();
408  }
409
410 protected:
411  // Add an input section.
412  bool
413  do_add_input_section(Relobj* object, unsigned int shndx);
414
415  // Do all the final processing after the input sections are read in.
416  // Returns the final data size.
417  section_size_type
418  finalize_merged_data();
419
420  // Set the final data size.
421  void
422  set_final_data_size();
423
424  // Write the data to the file.
425  void
426  do_write(Output_file*);
427
428  // Write the data to a buffer.
429  void
430  do_write_to_buffer(unsigned char*);
431
432  // Write to a map file.
433  void
434  do_print_to_mapfile(Mapfile* mapfile) const
435  { mapfile->print_output_data(this, _("** merge strings")); }
436
437  // Print merge stats to stderr.
438  void
439  do_print_merge_stats(const char* section_name);
440
441  // Writes the stringpool to a buffer.
442  void
443  stringpool_to_buffer(unsigned char* buffer, section_size_type buffer_size)
444  { this->stringpool_.write_to_buffer(buffer, buffer_size); }
445
446  // Clears all the data in the stringpool, to save on memory.
447  void
448  clear_stringpool()
449  { this->stringpool_.clear(); }
450
451  // Whether this is a merge string section.
452  virtual bool
453  do_is_string()
454  { return true; }
455
456  // Set keeps-input-sections flag.
457  void
458  do_set_keeps_input_sections()
459  {
460    gold_assert(this->input_count_ == 0);
461    Output_merge_base::do_set_keeps_input_sections();
462  }
463
464 private:
465  // The name of the string type, for stats.
466  const char*
467  string_name();
468
469  // As we see input sections, we build a mapping from object, section
470  // index and offset to strings.
471  struct Merged_string
472  {
473    // The offset in the input section.
474    section_offset_type offset;
475    // The key in the Stringpool.
476    Stringpool::Key stringpool_key;
477
478    Merged_string(section_offset_type offseta, Stringpool::Key stringpool_keya)
479      : offset(offseta), stringpool_key(stringpool_keya)
480    { }
481  };
482
483  typedef std::vector<Merged_string> Merged_strings;
484
485  struct Merged_strings_list
486  {
487    // The input object where the strings were found.
488    Relobj* object;
489    // The input section in the input object.
490    unsigned int shndx;
491    // The list of merged strings.
492    Merged_strings merged_strings;
493
494    Merged_strings_list(Relobj* objecta, unsigned int shndxa)
495      : object(objecta), shndx(shndxa), merged_strings()
496    { }
497  };
498
499  typedef std::vector<Merged_strings_list*> Merged_strings_lists;
500
501  // As we see the strings, we add them to a Stringpool.
502  Stringpool_template<Char_type> stringpool_;
503  // Map from a location in an input object to an entry in the
504  // Stringpool.
505  Merged_strings_lists merged_strings_lists_;
506  // The number of entries seen in input files.
507  size_t input_count_;
508  // The total size of input sections.
509  size_t input_size_;
510};
511
512} // End namespace gold.
513
514#endif // !defined(GOLD_MERGE_H)
515