1/* strings -- print the strings of printable characters in files
2   Copyright (C) 1993-2017 Free Software Foundation, Inc.
3
4   This program is free software; you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 3, or (at your option)
7   any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program; if not, write to the Free Software
16   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17   02110-1301, USA.  */
18
19/* Usage: strings [options] file...
20
21   Options:
22   --all
23   -a
24   -		Scan each file in its entirety.
25
26   --data
27   -d		Scan only the initialized data section(s) of object files.
28
29   --print-file-name
30   -f		Print the name of the file before each string.
31
32   --bytes=min-len
33   -n min-len
34   -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
35		that are followed by a NUL or a newline.  Default is 4.
36
37   --radix={o,x,d}
38   -t {o,x,d}	Print the offset within the file before each string,
39		in octal/hex/decimal.
40
41  --include-all-whitespace
42  -w		By default tab and space are the only whitepace included in graphic
43		char sequences.  This option considers all of isspace() valid.
44
45   -o		Like -to.  (Some other implementations have -o like -to,
46		others like -td.  We chose one arbitrarily.)
47
48   --encoding={s,S,b,l,B,L}
49   -e {s,S,b,l,B,L}
50		Select character encoding: 7-bit-character, 8-bit-character,
51		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
52		littleendian 32-bit.
53
54   --target=BFDNAME
55   -T {bfdname}
56		Specify a non-default object file format.
57
58  --output-separator=sep_string
59  -s sep_string	String used to separate parsed strings in output.
60		Default is newline.
61
62   --help
63   -h		Print the usage message on the standard output.
64
65   --version
66   -V
67   -v		Print the program version number.
68
69   Written by Richard Stallman <rms@gnu.ai.mit.edu>
70   and David MacKenzie <djm@gnu.ai.mit.edu>.  */
71
72#include "sysdep.h"
73#include "bfd.h"
74#include "getopt.h"
75#include "libiberty.h"
76#include "safe-ctype.h"
77#include "bucomm.h"
78
79#define STRING_ISGRAPHIC(c) \
80      (   (c) >= 0 \
81       && (c) <= 255 \
82       && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \
83           || (include_all_whitespace == TRUE && ISSPACE (c))) \
84      )
85
86#ifndef errno
87extern int errno;
88#endif
89
90/* The BFD section flags that identify an initialized data section.  */
91#define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
92
93/* Radix for printing addresses (must be 8, 10 or 16).  */
94static int address_radix;
95
96/* Minimum length of sequence of graphic chars to trigger output.  */
97static int string_min;
98
99/* Whether or not we include all whitespace as a graphic char.   */
100static bfd_boolean include_all_whitespace;
101
102/* TRUE means print address within file for each string.  */
103static bfd_boolean print_addresses;
104
105/* TRUE means print filename for each string.  */
106static bfd_boolean print_filenames;
107
108/* TRUE means for object files scan only the data section.  */
109static bfd_boolean datasection_only;
110
111/* TRUE if we found an initialized data section in the current file.  */
112static bfd_boolean got_a_section;
113
114/* The BFD object file format.  */
115static char *target;
116
117/* The character encoding format.  */
118static char encoding;
119static int encoding_bytes;
120
121/* Output string used to separate parsed strings  */
122static char *output_separator;
123
124static struct option long_options[] =
125{
126  {"all", no_argument, NULL, 'a'},
127  {"data", no_argument, NULL, 'd'},
128  {"print-file-name", no_argument, NULL, 'f'},
129  {"bytes", required_argument, NULL, 'n'},
130  {"radix", required_argument, NULL, 't'},
131  {"include-all-whitespace", required_argument, NULL, 'w'},
132  {"encoding", required_argument, NULL, 'e'},
133  {"target", required_argument, NULL, 'T'},
134  {"output-separator", required_argument, NULL, 's'},
135  {"help", no_argument, NULL, 'h'},
136  {"version", no_argument, NULL, 'v'},
137  {NULL, 0, NULL, 0}
138};
139
140/* Records the size of a named file so that we
141   do not repeatedly run bfd_stat() on it.  */
142
143typedef struct
144{
145  const char *  filename;
146  bfd_size_type filesize;
147} filename_and_size_t;
148
149static bfd_boolean strings_file (char *);
150static void print_strings (const char *, FILE *, file_ptr, int, int, char *);
151static void usage (FILE *, int) ATTRIBUTE_NORETURN;
152
153int main (int, char **);
154
155int
156main (int argc, char **argv)
157{
158  int optc;
159  int exit_status = 0;
160  bfd_boolean files_given = FALSE;
161  char *s;
162  int numeric_opt = 0;
163
164#if defined (HAVE_SETLOCALE)
165  setlocale (LC_ALL, "");
166#endif
167  bindtextdomain (PACKAGE, LOCALEDIR);
168  textdomain (PACKAGE);
169
170  program_name = argv[0];
171  xmalloc_set_program_name (program_name);
172  bfd_set_error_program_name (program_name);
173
174  expandargv (&argc, &argv);
175
176  string_min = 4;
177  include_all_whitespace = FALSE;
178  print_addresses = FALSE;
179  print_filenames = FALSE;
180  if (DEFAULT_STRINGS_ALL)
181    datasection_only = FALSE;
182  else
183    datasection_only = TRUE;
184  target = NULL;
185  encoding = 's';
186  output_separator = NULL;
187
188  while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:Vv0123456789",
189			      long_options, (int *) 0)) != EOF)
190    {
191      switch (optc)
192	{
193	case 'a':
194	  datasection_only = FALSE;
195	  break;
196
197	case 'd':
198	  datasection_only = TRUE;
199	  break;
200
201	case 'f':
202	  print_filenames = TRUE;
203	  break;
204
205	case 'H':
206	case 'h':
207	  usage (stdout, 0);
208
209	case 'n':
210	  string_min = (int) strtoul (optarg, &s, 0);
211	  if (s != NULL && *s != 0)
212	    fatal (_("invalid integer argument %s"), optarg);
213	  break;
214
215	case 'w':
216	  include_all_whitespace = TRUE;
217	  break;
218
219	case 'o':
220	  print_addresses = TRUE;
221	  address_radix = 8;
222	  break;
223
224	case 't':
225	  print_addresses = TRUE;
226	  if (optarg[1] != '\0')
227	    usage (stderr, 1);
228	  switch (optarg[0])
229	    {
230	    case 'o':
231	      address_radix = 8;
232	      break;
233
234	    case 'd':
235	      address_radix = 10;
236	      break;
237
238	    case 'x':
239	      address_radix = 16;
240	      break;
241
242	    default:
243	      usage (stderr, 1);
244	    }
245	  break;
246
247	case 'T':
248	  target = optarg;
249	  break;
250
251	case 'e':
252	  if (optarg[1] != '\0')
253	    usage (stderr, 1);
254	  encoding = optarg[0];
255	  break;
256
257	case 's':
258	  output_separator = optarg;
259          break;
260
261	case 'V':
262	case 'v':
263	  print_version ("strings");
264	  break;
265
266	case '?':
267	  usage (stderr, 1);
268
269	default:
270	  numeric_opt = optind;
271	  break;
272	}
273    }
274
275  if (numeric_opt != 0)
276    {
277      string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0);
278      if (s != NULL && *s != 0)
279	fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1);
280    }
281  if (string_min < 1)
282    fatal (_("invalid minimum string length %d"), string_min);
283
284  switch (encoding)
285    {
286    case 'S':
287    case 's':
288      encoding_bytes = 1;
289      break;
290    case 'b':
291    case 'l':
292      encoding_bytes = 2;
293      break;
294    case 'B':
295    case 'L':
296      encoding_bytes = 4;
297      break;
298    default:
299      usage (stderr, 1);
300    }
301
302  bfd_init ();
303  set_default_bfd_target ();
304
305  if (optind >= argc)
306    {
307      datasection_only = FALSE;
308      SET_BINARY (fileno (stdin));
309      print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
310      files_given = TRUE;
311    }
312  else
313    {
314      for (; optind < argc; ++optind)
315	{
316	  if (strcmp (argv[optind], "-") == 0)
317	    datasection_only = FALSE;
318	  else
319	    {
320	      files_given = TRUE;
321	      exit_status |= strings_file (argv[optind]) == FALSE;
322	    }
323	}
324    }
325
326  if (!files_given)
327    usage (stderr, 1);
328
329  return (exit_status);
330}
331
332/* Scan section SECT of the file ABFD, whose printable name is in
333   ARG->filename and whose size might be in ARG->filesize.  If it
334   contains initialized data set `got_a_section' and print the
335   strings in it.
336
337   FIXME: We ought to be able to return error codes/messages for
338   certain conditions.  */
339
340static void
341strings_a_section (bfd *abfd, asection *sect, void *arg)
342{
343  filename_and_size_t * filename_and_sizep;
344  bfd_size_type *filesizep;
345  bfd_size_type sectsize;
346  void *mem;
347
348  if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
349    return;
350
351  sectsize = bfd_get_section_size (sect);
352
353  if (sectsize <= 0)
354    return;
355
356  /* Get the size of the file.  This might have been cached for us.  */
357  filename_and_sizep = (filename_and_size_t *) arg;
358  filesizep = & filename_and_sizep->filesize;
359
360  if (*filesizep == 0)
361    {
362      struct stat st;
363
364      if (bfd_stat (abfd, &st))
365	return;
366
367      /* Cache the result so that we do not repeatedly stat this file.  */
368      *filesizep = st.st_size;
369    }
370
371  /* Compare the size of the section against the size of the file.
372     If the section is bigger then the file must be corrupt and
373     we should not try dumping it.  */
374  if (sectsize >= *filesizep)
375    return;
376
377  mem = xmalloc (sectsize);
378
379  if (bfd_get_section_contents (abfd, sect, mem, (file_ptr) 0, sectsize))
380    {
381      got_a_section = TRUE;
382
383      print_strings (filename_and_sizep->filename, NULL, sect->filepos,
384		     0, sectsize, (char *) mem);
385    }
386
387  free (mem);
388}
389
390/* Scan all of the sections in FILE, and print the strings
391   in the initialized data section(s).
392
393   Return TRUE if successful,
394   FALSE if not (such as if FILE is not an object file).  */
395
396static bfd_boolean
397strings_object_file (const char *file)
398{
399  filename_and_size_t filename_and_size;
400  bfd *abfd;
401
402  abfd = bfd_openr (file, target);
403
404  if (abfd == NULL)
405    /* Treat the file as a non-object file.  */
406    return FALSE;
407
408  /* This call is mainly for its side effect of reading in the sections.
409     We follow the traditional behavior of `strings' in that we don't
410     complain if we don't recognize a file to be an object file.  */
411  if (!bfd_check_format (abfd, bfd_object))
412    {
413      bfd_close (abfd);
414      return FALSE;
415    }
416
417  got_a_section = FALSE;
418  filename_and_size.filename = file;
419  filename_and_size.filesize = 0;
420  bfd_map_over_sections (abfd, strings_a_section, & filename_and_size);
421
422  if (!bfd_close (abfd))
423    {
424      bfd_nonfatal (file);
425      return FALSE;
426    }
427
428  return got_a_section;
429}
430
431/* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
432
433static bfd_boolean
434strings_file (char *file)
435{
436  struct stat st;
437
438  /* get_file_size does not support non-S_ISREG files.  */
439
440  if (stat (file, &st) < 0)
441    {
442      if (errno == ENOENT)
443	non_fatal (_("'%s': No such file"), file);
444      else
445	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
446		   file, strerror (errno));
447      return FALSE;
448    }
449
450  /* If we weren't told to scan the whole file,
451     try to open it as an object file and only look at
452     initialized data sections.  If that fails, fall back to the
453     whole file.  */
454  if (!datasection_only || !strings_object_file (file))
455    {
456      FILE *stream;
457
458      stream = fopen (file, FOPEN_RB);
459      if (stream == NULL)
460	{
461	  fprintf (stderr, "%s: ", program_name);
462	  perror (file);
463	  return FALSE;
464	}
465
466      print_strings (file, stream, (file_ptr) 0, 0, 0, (char *) 0);
467
468      if (fclose (stream) == EOF)
469	{
470	  fprintf (stderr, "%s: ", program_name);
471	  perror (file);
472	  return FALSE;
473	}
474    }
475
476  return TRUE;
477}
478
479/* Read the next character, return EOF if none available.
480   Assume that STREAM is positioned so that the next byte read
481   is at address ADDRESS in the file.
482
483   If STREAM is NULL, do not read from it.
484   The caller can supply a buffer of characters
485   to be processed before the data in STREAM.
486   MAGIC is the address of the buffer and
487   MAGICCOUNT is how many characters are in it.  */
488
489static long
490get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
491{
492  int c, i;
493  long r = 0;
494
495  for (i = 0; i < encoding_bytes; i++)
496    {
497      if (*magiccount)
498	{
499	  (*magiccount)--;
500	  c = *(*magic)++;
501	}
502      else
503	{
504	  if (stream == NULL)
505	    return EOF;
506
507	  /* Only use getc_unlocked if we found a declaration for it.
508	     Otherwise, libc is not thread safe by default, and we
509	     should not use it.  */
510
511#if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
512	  c = getc_unlocked (stream);
513#else
514	  c = getc (stream);
515#endif
516	  if (c == EOF)
517	    return EOF;
518	}
519
520      (*address)++;
521      r = (r << 8) | (c & 0xff);
522    }
523
524  switch (encoding)
525    {
526    default:
527      break;
528    case 'l':
529      r = ((r & 0xff) << 8) | ((r & 0xff00) >> 8);
530      break;
531    case 'L':
532      r = (((r & 0xff) << 24) | ((r & 0xff00) << 8)
533	   | ((r & 0xff0000) >> 8) | ((r & 0xff000000) >> 24));
534      break;
535    }
536
537  return r;
538}
539
540/* Find the strings in file FILENAME, read from STREAM.
541   Assume that STREAM is positioned so that the next byte read
542   is at address ADDRESS in the file.
543   Stop reading at address STOP_POINT in the file, if nonzero.
544
545   If STREAM is NULL, do not read from it.
546   The caller can supply a buffer of characters
547   to be processed before the data in STREAM.
548   MAGIC is the address of the buffer and
549   MAGICCOUNT is how many characters are in it.
550   Those characters come at address ADDRESS and the data in STREAM follow.  */
551
552static void
553print_strings (const char *filename, FILE *stream, file_ptr address,
554	       int stop_point, int magiccount, char *magic)
555{
556  char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
557
558  while (1)
559    {
560      file_ptr start;
561      int i;
562      long c;
563
564      /* See if the next `string_min' chars are all graphic chars.  */
565    tryline:
566      if (stop_point && address >= stop_point)
567	break;
568      start = address;
569      for (i = 0; i < string_min; i++)
570	{
571	  c = get_char (stream, &address, &magiccount, &magic);
572	  if (c == EOF)
573	    {
574	      free (buf);
575	      return;
576	    }
577	  if (! STRING_ISGRAPHIC (c))
578	    /* Found a non-graphic.  Try again starting with next char.  */
579	    goto tryline;
580	  buf[i] = c;
581	}
582
583      /* We found a run of `string_min' graphic characters.  Print up
584	 to the next non-graphic character.  */
585
586      if (print_filenames)
587	printf ("%s: ", filename);
588      if (print_addresses)
589	switch (address_radix)
590	  {
591	  case 8:
592#ifdef HAVE_LONG_LONG
593	    if (sizeof (start) > sizeof (long))
594	      {
595# ifndef __MSVCRT__
596	        printf ("%7llo ", (unsigned long long) start);
597# else
598	        printf ("%7I64o ", (unsigned long long) start);
599# endif
600	      }
601	    else
602#elif !BFD_HOST_64BIT_LONG
603	    if (start != (unsigned long) start)
604	      printf ("++%7lo ", (unsigned long) start);
605	    else
606#endif
607	      printf ("%7lo ", (unsigned long) start);
608	    break;
609
610	  case 10:
611#ifdef HAVE_LONG_LONG
612	    if (sizeof (start) > sizeof (long))
613	      {
614# ifndef __MSVCRT__
615	        printf ("%7lld ", (unsigned long long) start);
616# else
617	        printf ("%7I64d ", (unsigned long long) start);
618# endif
619	      }
620	    else
621#elif !BFD_HOST_64BIT_LONG
622	    if (start != (unsigned long) start)
623	      printf ("++%7ld ", (unsigned long) start);
624	    else
625#endif
626	      printf ("%7ld ", (long) start);
627	    break;
628
629	  case 16:
630#ifdef HAVE_LONG_LONG
631	    if (sizeof (start) > sizeof (long))
632	      {
633# ifndef __MSVCRT__
634	        printf ("%7llx ", (unsigned long long) start);
635# else
636	        printf ("%7I64x ", (unsigned long long) start);
637# endif
638	      }
639	    else
640#elif !BFD_HOST_64BIT_LONG
641	    if (start != (unsigned long) start)
642	      printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
643		      (unsigned long) (start & 0xffffffff));
644	    else
645#endif
646	      printf ("%7lx ", (unsigned long) start);
647	    break;
648	  }
649
650      buf[i] = '\0';
651      fputs (buf, stdout);
652
653      while (1)
654	{
655	  c = get_char (stream, &address, &magiccount, &magic);
656	  if (c == EOF)
657	    break;
658	  if (! STRING_ISGRAPHIC (c))
659	    break;
660	  putchar (c);
661	}
662
663      if (output_separator)
664        fputs (output_separator, stdout);
665      else
666        putchar ('\n');
667    }
668  free (buf);
669}
670
671static void
672usage (FILE *stream, int status)
673{
674  fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
675  fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
676  fprintf (stream, _(" The options are:\n"));
677
678  if (DEFAULT_STRINGS_ALL)
679    fprintf (stream, _("\
680  -a - --all                Scan the entire file, not just the data section [default]\n\
681  -d --data                 Only scan the data sections in the file\n"));
682  else
683    fprintf (stream, _("\
684  -a - --all                Scan the entire file, not just the data section\n\
685  -d --data                 Only scan the data sections in the file [default]\n"));
686
687  fprintf (stream, _("\
688  -f --print-file-name      Print the name of the file before each string\n\
689  -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
690  -<number>                   least [number] characters (default 4).\n\
691  -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
692  -w --include-all-whitespace Include all whitespace as valid string characters\n\
693  -o                        An alias for --radix=o\n\
694  -T --target=<BFDNAME>     Specify the binary file format\n\
695  -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
696                            s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
697  -s --output-separator=<string> String used to separate strings in output.\n\
698  @<file>                   Read options from <file>\n\
699  -h --help                 Display this information\n\
700  -v -V --version           Print the program's version number\n"));
701  list_supported_targets (program_name, stream);
702  if (REPORT_BUGS_TO[0] && status == 0)
703    fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
704  exit (status);
705}
706