1// fileread.h -- read files for gold   -*- C++ -*-
2
3// Copyright (C) 2006-2017 Free Software Foundation, Inc.
4// Written by Ian Lance Taylor <iant@google.com>.
5
6// This file is part of gold.
7
8// This program is free software; you can redistribute it and/or modify
9// it under the terms of the GNU General Public License as published by
10// the Free Software Foundation; either version 3 of the License, or
11// (at your option) any later version.
12
13// This program is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16// GNU General Public License for more details.
17
18// You should have received a copy of the GNU General Public License
19// along with this program; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
21// MA 02110-1301, USA.
22
23// Classes used to read data from binary input files.
24
25#ifndef GOLD_FILEREAD_H
26#define GOLD_FILEREAD_H
27
28#include <list>
29#include <map>
30#include <string>
31#include <vector>
32
33#include "token.h"
34
35namespace gold
36{
37
38// Since not all system supports stat.st_mtim and struct timespec,
39// we define our own structure and fill the nanoseconds if we can.
40
41struct Timespec
42{
43  Timespec()
44    : seconds(0), nanoseconds(0)
45  { }
46
47  Timespec(time_t a_seconds, int a_nanoseconds)
48    : seconds(a_seconds), nanoseconds(a_nanoseconds)
49  { }
50
51  time_t seconds;
52  int nanoseconds;
53};
54
55// Get the last modified time of an unopened file.  Returns false if the
56// file does not exist.
57
58bool
59get_mtime(const char* filename, Timespec* mtime);
60
61class Position_dependent_options;
62class Input_file_argument;
63class Dirsearch;
64class File_view;
65
66// File_read manages a file descriptor and mappings for a file we are
67// reading.
68
69class File_read
70{
71 public:
72  File_read()
73    : name_(), descriptor_(-1), is_descriptor_opened_(false), object_count_(0),
74      size_(0), token_(false), views_(), saved_views_(), mapped_bytes_(0),
75      released_(true), whole_file_view_(NULL)
76  { }
77
78  ~File_read();
79
80  // Open a file.
81  bool
82  open(const Task*, const std::string& name);
83
84  // Pretend to open the file, but provide the file contents.  No
85  // actual file system activity will occur.  This is used for
86  // testing.
87  bool
88  open(const Task*, const std::string& name, const unsigned char* contents,
89       off_t size);
90
91  // Return the file name.
92  const std::string&
93  filename() const
94  { return this->name_; }
95
96  // Add an object associated with a file.
97  void
98  add_object()
99  { ++this->object_count_; }
100
101  // Remove an object associated with a file.
102  void
103  remove_object()
104  { --this->object_count_; }
105
106  // Lock the file for exclusive access within a particular Task::run
107  // execution.  This routine may only be called when the workqueue
108  // lock is held.
109  void
110  lock(const Task* t);
111
112  // Unlock the file.
113  void
114  unlock(const Task* t);
115
116  // Test whether the object is locked.
117  bool
118  is_locked() const;
119
120  // Return the token, so that the task can be queued.
121  Task_token*
122  token()
123  { return &this->token_; }
124
125  // Release the file.  This indicates that we aren't going to do
126  // anything further with it until it is unlocked.  This is used
127  // because a Task which locks the file never calls either lock or
128  // unlock; it just locks the token.  The basic rule is that a Task
129  // which locks a file via the Task::locks interface must explicitly
130  // call release() when it is done.  This is not necessary for code
131  // which calls unlock() on the file.
132  void
133  release();
134
135  // Return the size of the file.
136  off_t
137  filesize() const
138  { return this->size_; }
139
140  // Return a view into the file starting at file offset START for
141  // SIZE bytes.  OFFSET is the offset into the input file for the
142  // file we are reading; this is zero for a normal object file,
143  // non-zero for an object file in an archive.  ALIGNED is true if
144  // the data must be naturally aligned (i.e., aligned to the size
145  // of a target word); this only matters when OFFSET is not zero.
146  // The pointer will remain valid until the File_read is unlocked.
147  // It is an error if we can not read enough data from the file.
148  // The CACHE parameter is a hint as to whether it will be useful
149  // to cache this data for later accesses--i.e., later calls to
150  // get_view, read, or get_lasting_view which retrieve the same
151  // data.
152  const unsigned char*
153  get_view(off_t offset, off_t start, section_size_type size, bool aligned,
154	   bool cache);
155
156  // Read data from the file into the buffer P starting at file offset
157  // START for SIZE bytes.
158  void
159  read(off_t start, section_size_type size, void* p);
160
161  // Return a lasting view into the file starting at file offset START
162  // for SIZE bytes.  This is allocated with new, and the caller is
163  // responsible for deleting it when done.  The data associated with
164  // this view will remain valid until the view is deleted.  It is an
165  // error if we can not read enough data from the file.  The OFFSET,
166  // ALIGNED and CACHE parameters are as in get_view.
167  File_view*
168  get_lasting_view(off_t offset, off_t start, section_size_type size,
169		   bool aligned, bool cache);
170
171  // Mark all views as no longer cached.
172  void
173  clear_view_cache_marks();
174
175  // Discard all uncached views.  This is normally done by release(),
176  // but not for objects in archives.  FIXME: This is a complicated
177  // interface, and it would be nice to have something more automatic.
178  void
179  clear_uncached_views()
180  { this->clear_views(CLEAR_VIEWS_ARCHIVE); }
181
182  // A struct used to do a multiple read.
183  struct Read_multiple_entry
184  {
185    // The file offset of the data to read.
186    off_t file_offset;
187    // The amount of data to read.
188    section_size_type size;
189    // The buffer where the data should be placed.
190    unsigned char* buffer;
191
192    Read_multiple_entry(off_t o, section_size_type s, unsigned char* b)
193      : file_offset(o), size(s), buffer(b)
194    { }
195  };
196
197  typedef std::vector<Read_multiple_entry> Read_multiple;
198
199  // Read a bunch of data from the file into various different
200  // locations.  The vector must be sorted by ascending file_offset.
201  // BASE is a base offset to be added to all the offsets in the
202  // vector.
203  void
204  read_multiple(off_t base, const Read_multiple&);
205
206  // Dump statistical information to stderr.
207  static void
208  print_stats();
209
210  // Return the open file descriptor (for plugins).
211  int
212  descriptor()
213  {
214    this->reopen_descriptor();
215    return this->descriptor_;
216  }
217
218  // Return the file last modification time.  Calls gold_fatal if the stat
219  // system call failed.
220  Timespec
221  get_mtime();
222
223 private:
224  // Control for what views to clear.
225  enum Clear_views_mode
226  {
227    // Clear uncached views not used by an archive.
228    CLEAR_VIEWS_NORMAL,
229    // Clear all uncached views (including in an archive).
230    CLEAR_VIEWS_ARCHIVE,
231    // Clear all views (i.e., we're destroying the file).
232    CLEAR_VIEWS_ALL
233  };
234
235  // This class may not be copied.
236  File_read(const File_read&);
237  File_read& operator=(const File_read&);
238
239  // Total bytes mapped into memory during the link if --stats.
240  static unsigned long long total_mapped_bytes;
241
242  // Current number of bytes mapped into memory during the link if
243  // --stats.
244  static unsigned long long current_mapped_bytes;
245
246  // High water mark of bytes mapped into memory during the link if
247  // --stats.
248  static unsigned long long maximum_mapped_bytes;
249
250  // A view into the file.
251  class View
252  {
253   public:
254    // Specifies how to dispose the data on destruction of the view.
255    enum Data_ownership
256    {
257      // Data owned by File object - nothing done in destructor.
258      DATA_NOT_OWNED,
259      // Data allocated with new[] and owned by this object - should
260      // use delete[].
261      DATA_ALLOCATED_ARRAY,
262      // Data mmapped and owned by this object - should munmap.
263      DATA_MMAPPED
264    };
265
266    View(off_t start, section_size_type size, const unsigned char* data,
267	 unsigned int byteshift, bool cache, Data_ownership data_ownership)
268      : start_(start), size_(size), data_(data), lock_count_(0),
269	byteshift_(byteshift), cache_(cache), data_ownership_(data_ownership),
270	accessed_(true)
271    { }
272
273    ~View();
274
275    off_t
276    start() const
277    { return this->start_; }
278
279    section_size_type
280    size() const
281    { return this->size_; }
282
283    const unsigned char*
284    data() const
285    { return this->data_; }
286
287    void
288    lock();
289
290    void
291    unlock();
292
293    bool
294    is_locked();
295
296    unsigned int
297    byteshift() const
298    { return this->byteshift_; }
299
300    void
301    set_cache()
302    { this->cache_ = true; }
303
304    void
305    clear_cache()
306    { this->cache_ = false; }
307
308    bool
309    should_cache() const
310    { return this->cache_; }
311
312    void
313    set_accessed()
314    { this->accessed_ = true; }
315
316    void
317    clear_accessed()
318    { this->accessed_= false; }
319
320    bool
321    accessed() const
322    { return this->accessed_; }
323
324    // Returns TRUE if this view contains permanent data -- e.g., data that
325    // was supplied by the owner of the File object.
326    bool
327    is_permanent_view() const
328    { return this->data_ownership_ == DATA_NOT_OWNED; }
329
330   private:
331    View(const View&);
332    View& operator=(const View&);
333
334    // The file offset of the start of the view.
335    off_t start_;
336    // The size of the view.
337    section_size_type size_;
338    // A pointer to the actual bytes.
339    const unsigned char* data_;
340    // The number of locks on this view.
341    int lock_count_;
342    // The number of bytes that the view is shifted relative to the
343    // underlying file.  This is used to align data.  This is normally
344    // zero, except possibly for an object in an archive.
345    unsigned int byteshift_;
346    // Whether the view is cached.
347    bool cache_;
348    // Whether the view is mapped into memory.  If not, data_ points
349    // to memory allocated using new[].
350    Data_ownership data_ownership_;
351    // Whether the view has been accessed recently.
352    bool accessed_;
353  };
354
355  friend class View;
356  friend class File_view;
357
358  // The type of a mapping from page start and byte shift to views.
359  typedef std::map<std::pair<off_t, unsigned int>, View*> Views;
360
361  // A simple list of Views.
362  typedef std::list<View*> Saved_views;
363
364  // Open the descriptor if necessary.
365  void
366  reopen_descriptor();
367
368  // Find a view into the file.
369  View*
370  find_view(off_t start, section_size_type size, unsigned int byteshift,
371	    View** vshifted) const;
372
373  // Read data from the file into a buffer.
374  void
375  do_read(off_t start, section_size_type size, void* p);
376
377  // Add a view.
378  void
379  add_view(View*);
380
381  // Make a view into the file.
382  View*
383  make_view(off_t start, section_size_type size, unsigned int byteshift,
384	    bool cache);
385
386  // Find or make a view into the file.
387  View*
388  find_or_make_view(off_t offset, off_t start, section_size_type size,
389		    bool aligned, bool cache);
390
391  // Clear the file views.
392  void
393  clear_views(Clear_views_mode);
394
395  // The size of a file page for buffering data.
396  static const off_t page_size = 8192;
397
398  // Given a file offset, return the page offset.
399  static off_t
400  page_offset(off_t file_offset)
401  { return file_offset & ~ (page_size - 1); }
402
403  // Given a file size, return the size to read integral pages.
404  static off_t
405  pages(off_t file_size)
406  { return (file_size + (page_size - 1)) & ~ (page_size - 1); }
407
408  // The maximum number of entries we will pass to ::readv.
409  static const size_t max_readv_entries = 128;
410
411  // Use readv to read data.
412  void
413  do_readv(off_t base, const Read_multiple&, size_t start, size_t count);
414
415  // File name.
416  std::string name_;
417  // File descriptor.
418  int descriptor_;
419  // Whether we have regained the descriptor after releasing the file.
420  bool is_descriptor_opened_;
421  // The number of objects associated with this file.  This will be
422  // more than 1 in the case of an archive.
423  int object_count_;
424  // File size.
425  off_t size_;
426  // A token used to lock the file.
427  Task_token token_;
428  // Buffered views into the file.
429  Views views_;
430  // List of views which were locked but had to be removed from views_
431  // because they were not large enough.
432  Saved_views saved_views_;
433  // Total amount of space mapped into memory.  This is only changed
434  // while the file is locked.  When we unlock the file, we transfer
435  // the total to total_mapped_bytes, and reset this to zero.
436  size_t mapped_bytes_;
437  // Whether the file was released.
438  bool released_;
439  // A view containing the whole file.  May be NULL if we mmap only
440  // the relevant parts of the file.  Not NULL if:
441  // - Flag --mmap_whole_files is set (default on 64-bit hosts).
442  // - The contents was specified in the constructor.  Used only for
443  //   testing purposes).
444  View* whole_file_view_;
445};
446
447// A view of file data that persists even when the file is unlocked.
448// Callers should destroy these when no longer required.  These are
449// obtained form File_read::get_lasting_view.  They may only be
450// destroyed when the underlying File_read is locked.
451
452class File_view
453{
454 public:
455  // This may only be called when the underlying File_read is locked.
456  ~File_view();
457
458  // Return a pointer to the data associated with this view.
459  const unsigned char*
460  data() const
461  { return this->data_; }
462
463 private:
464  File_view(const File_view&);
465  File_view& operator=(const File_view&);
466
467  friend class File_read;
468
469  // Callers have to get these via File_read::get_lasting_view.
470  File_view(File_read& file, File_read::View* view, const unsigned char* data)
471    : file_(file), view_(view), data_(data)
472  { }
473
474  File_read& file_;
475  File_read::View* view_;
476  const unsigned char* data_;
477};
478
479// All the information we hold for a single input file.  This can be
480// an object file, a shared library, or an archive.
481
482class Input_file
483{
484 public:
485  enum Format
486  {
487    FORMAT_NONE,
488    FORMAT_ELF,
489    FORMAT_BINARY
490  };
491
492  Input_file(const Input_file_argument* input_argument)
493    : input_argument_(input_argument), found_name_(), file_(),
494      is_in_sysroot_(false), format_(FORMAT_NONE)
495  { }
496
497  // Create an input file given just a filename.
498  Input_file(const char* name);
499
500  // Create an input file with the contents already provided.  This is
501  // only used for testing.  With this path, don't call the open
502  // method.
503  Input_file(const Task*, const char* name, const unsigned char* contents,
504	     off_t size);
505
506  // Return the command line argument.
507  const Input_file_argument*
508  input_file_argument() const
509  { return this->input_argument_; }
510
511  // Return whether this is a file that we will search for in the list
512  // of directories.
513  bool
514  will_search_for() const;
515
516  // Open the file.  If the open fails, this will report an error and
517  // return false.  If there is a search, it starts at directory
518  // *PINDEX.  *PINDEX should be initialized to zero.  It may be
519  // restarted to find the next file with a matching name by
520  // incrementing the result and calling this again.
521  bool
522  open(const Dirsearch&, const Task*, int* pindex);
523
524  // Return the name given by the user.  For -lc this will return "c".
525  const char*
526  name() const;
527
528  // Return the file name.  For -lc this will return something like
529  // "/usr/lib/libc.so".
530  const std::string&
531  filename() const
532  { return this->file_.filename(); }
533
534  // Return the name under which we found the file, corresponding to
535  // the command line.  For -lc this will return something like
536  // "libc.so".
537  const std::string&
538  found_name() const
539  { return this->found_name_; }
540
541  // Return the position dependent options.
542  const Position_dependent_options&
543  options() const;
544
545  // Return the file.
546  File_read&
547  file()
548  { return this->file_; }
549
550  const File_read&
551  file() const
552  { return this->file_; }
553
554  // Whether we found the file in a directory in the system root.
555  bool
556  is_in_sysroot() const
557  { return this->is_in_sysroot_; }
558
559  // Whether this file is in a system directory.
560  bool
561  is_in_system_directory() const;
562
563  // Return whether this file is to be read only for its symbols.
564  bool
565  just_symbols() const;
566
567  // Return the format of the unconverted input file.
568  Format
569  format() const
570  { return this->format_; }
571
572  // Try to find a file in the extra search dirs.  Returns true on success.
573  static bool
574  try_extra_search_path(int* pindex,
575			const Input_file_argument* input_argument,
576			std::string filename, std::string* found_name,
577			std::string* namep);
578
579  // Find the actual file.
580  static bool
581  find_file(const Dirsearch& dirpath, int* pindex,
582	    const Input_file_argument* input_argument,
583	    bool* is_in_sysroot,
584	    std::string* found_name, std::string* namep);
585
586 private:
587  Input_file(const Input_file&);
588  Input_file& operator=(const Input_file&);
589
590  // Open a binary file.
591  bool
592  open_binary(const Task* task, const std::string& name);
593
594  // The argument from the command line.
595  const Input_file_argument* input_argument_;
596  // The name under which we opened the file.  This is like the name
597  // on the command line, but -lc turns into libc.so (or whatever).
598  // It only includes the full path if the path was on the command
599  // line.
600  std::string found_name_;
601  // The file after we open it.
602  File_read file_;
603  // Whether we found the file in a directory in the system root.
604  bool is_in_sysroot_;
605  // Format of unconverted input file.
606  Format format_;
607};
608
609} // end namespace gold
610
611#endif // !defined(GOLD_FILEREAD_H)
612