1/* printf - format and print data
2   Copyright (C) 1990-2010 Free Software Foundation, Inc.
3
4   This program is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16
17/* Usage: printf format [argument...]
18
19   A front end to the printf function that lets it be used from the shell.
20
21   Backslash escapes:
22
23   \" = double quote
24   \\ = backslash
25   \a = alert (bell)
26   \b = backspace
27   \c = produce no further output
28   \e = escape
29   \f = form feed
30   \n = new line
31   \r = carriage return
32   \t = horizontal tab
33   \v = vertical tab
34   \ooo = octal number (ooo is 1 to 3 digits)
35   \xhh = hexadecimal number (hhh is 1 to 2 digits)
36   \uhhhh = 16-bit Unicode character (hhhh is 4 digits)
37   \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits)
38
39   Additional directive:
40
41   %b = print an argument string, interpreting backslash escapes,
42     except that octal escapes are of the form \0 or \0ooo.
43
44   The `format' argument is re-used as many times as necessary
45   to convert all of the given arguments.
46
47   David MacKenzie <djm@gnu.ai.mit.edu> */
48
49#include <config.h>
50#include <stdio.h>
51#include <sys/types.h>
52
53#include "system.h"
54#include "c-strtod.h"
55#include "error.h"
56#include "quote.h"
57#include "unicodeio.h"
58#include "xprintf.h"
59
60/* The official name of this program (e.g., no `g' prefix).  */
61#define PROGRAM_NAME "printf"
62
63#define AUTHORS proper_name ("David MacKenzie")
64
65#define isodigit(c) ((c) >= '0' && (c) <= '7')
66#define hextobin(c) ((c) >= 'a' && (c) <= 'f' ? (c) - 'a' + 10 : \
67                     (c) >= 'A' && (c) <= 'F' ? (c) - 'A' + 10 : (c) - '0')
68#define octtobin(c) ((c) - '0')
69
70/* The value to return to the calling program.  */
71static int exit_status;
72
73/* True if the POSIXLY_CORRECT environment variable is set.  */
74static bool posixly_correct;
75
76/* This message appears in N_() here rather than just in _() below because
77   the sole use would have been in a #define.  */
78static char const *const cfcc_msg =
79 N_("warning: %s: character(s) following character constant have been ignored");
80
81void
82usage (int status)
83{
84  if (status != EXIT_SUCCESS)
85    fprintf (stderr, _("Try `%s --help' for more information.\n"),
86             program_name);
87  else
88    {
89      printf (_("\
90Usage: %s FORMAT [ARGUMENT]...\n\
91  or:  %s OPTION\n\
92"),
93              program_name, program_name);
94      fputs (_("\
95Print ARGUMENT(s) according to FORMAT, or execute according to OPTION:\n\
96\n\
97"), stdout);
98      fputs (HELP_OPTION_DESCRIPTION, stdout);
99      fputs (VERSION_OPTION_DESCRIPTION, stdout);
100      fputs (_("\
101\n\
102FORMAT controls the output as in C printf.  Interpreted sequences are:\n\
103\n\
104  \\\"      double quote\n\
105"), stdout);
106      fputs (_("\
107  \\\\      backslash\n\
108  \\a      alert (BEL)\n\
109  \\b      backspace\n\
110  \\c      produce no further output\n\
111  \\e      escape\n\
112  \\f      form feed\n\
113  \\n      new line\n\
114  \\r      carriage return\n\
115  \\t      horizontal tab\n\
116  \\v      vertical tab\n\
117"), stdout);
118      fputs (_("\
119  \\NNN    byte with octal value NNN (1 to 3 digits)\n\
120  \\xHH    byte with hexadecimal value HH (1 to 2 digits)\n\
121  \\uHHHH  Unicode (ISO/IEC 10646) character with hex value HHHH (4 digits)\n\
122  \\UHHHHHHHH  Unicode character with hex value HHHHHHHH (8 digits)\n\
123"), stdout);
124      fputs (_("\
125  %%      a single %\n\
126  %b      ARGUMENT as a string with `\\' escapes interpreted,\n\
127          except that octal escapes are of the form \\0 or \\0NNN\n\
128\n\
129and all C format specifications ending with one of diouxXfeEgGcs, with\n\
130ARGUMENTs converted to proper type first.  Variable widths are handled.\n\
131"), stdout);
132      printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME);
133      emit_ancillary_info ();
134    }
135  exit (status);
136}
137
138static void
139verify_numeric (const char *s, const char *end)
140{
141  if (errno)
142    {
143      error (0, errno, "%s", s);
144      exit_status = EXIT_FAILURE;
145    }
146  else if (*end)
147    {
148      if (s == end)
149        error (0, 0, _("%s: expected a numeric value"), s);
150      else
151        error (0, 0, _("%s: value not completely converted"), s);
152      exit_status = EXIT_FAILURE;
153    }
154}
155
156#define STRTOX(TYPE, FUNC_NAME, LIB_FUNC_EXPR)				 \
157static TYPE								 \
158FUNC_NAME (char const *s)						 \
159{									 \
160  char *end;								 \
161  TYPE val;								 \
162                                                                         \
163  if (*s == '\"' || *s == '\'')						 \
164    {									 \
165      unsigned char ch = *++s;						 \
166      val = ch;								 \
167      /* If POSIXLY_CORRECT is not set, then give a warning that there	 \
168         are characters following the character constant and that GNU	 \
169         printf is ignoring those characters.  If POSIXLY_CORRECT *is*	 \
170         set, then don't give the warning.  */				 \
171      if (*++s != 0 && !posixly_correct)				 \
172        error (0, 0, _(cfcc_msg), s);					 \
173    }									 \
174  else									 \
175    {									 \
176      errno = 0;							 \
177      val = (LIB_FUNC_EXPR);						 \
178      verify_numeric (s, end);						 \
179    }									 \
180  return val;								 \
181}									 \
182
183STRTOX (intmax_t,    vstrtoimax, strtoimax (s, &end, 0))
184STRTOX (uintmax_t,   vstrtoumax, strtoumax (s, &end, 0))
185STRTOX (long double, vstrtold,   c_strtold (s, &end))
186
187/* Output a single-character \ escape.  */
188
189static void
190print_esc_char (char c)
191{
192  switch (c)
193    {
194    case 'a':			/* Alert. */
195      putchar ('\a');
196      break;
197    case 'b':			/* Backspace. */
198      putchar ('\b');
199      break;
200    case 'c':			/* Cancel the rest of the output. */
201      exit (EXIT_SUCCESS);
202      break;
203    case 'e':			/* Escape. */
204      putchar ('\x1B');
205      break;
206    case 'f':			/* Form feed. */
207      putchar ('\f');
208      break;
209    case 'n':			/* New line. */
210      putchar ('\n');
211      break;
212    case 'r':			/* Carriage return. */
213      putchar ('\r');
214      break;
215    case 't':			/* Horizontal tab. */
216      putchar ('\t');
217      break;
218    case 'v':			/* Vertical tab. */
219      putchar ('\v');
220      break;
221    default:
222      putchar (c);
223      break;
224    }
225}
226
227/* Print a \ escape sequence starting at ESCSTART.
228   Return the number of characters in the escape sequence
229   besides the backslash.
230   If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o
231   is an octal digit; otherwise they are of the form \ooo.  */
232
233static int
234print_esc (const char *escstart, bool octal_0)
235{
236  const char *p = escstart + 1;
237  int esc_value = 0;		/* Value of \nnn escape. */
238  int esc_length;		/* Length of \nnn escape. */
239
240  if (*p == 'x')
241    {
242      /* A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits.  */
243      for (esc_length = 0, ++p;
244           esc_length < 2 && isxdigit (to_uchar (*p));
245           ++esc_length, ++p)
246        esc_value = esc_value * 16 + hextobin (*p);
247      if (esc_length == 0)
248        error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape"));
249      putchar (esc_value);
250    }
251  else if (isodigit (*p))
252    {
253      /* Parse \0ooo (if octal_0 && *p == '0') or \ooo (otherwise).
254         Allow \ooo if octal_0 && *p != '0'; this is an undocumented
255         extension to POSIX that is compatible with Bash 2.05b.  */
256      for (esc_length = 0, p += octal_0 && *p == '0';
257           esc_length < 3 && isodigit (*p);
258           ++esc_length, ++p)
259        esc_value = esc_value * 8 + octtobin (*p);
260      putchar (esc_value);
261    }
262  else if (*p && strchr ("\"\\abcefnrtv", *p))
263    print_esc_char (*p++);
264  else if (*p == 'u' || *p == 'U')
265    {
266      char esc_char = *p;
267      unsigned int uni_value;
268
269      uni_value = 0;
270      for (esc_length = (esc_char == 'u' ? 4 : 8), ++p;
271           esc_length > 0;
272           --esc_length, ++p)
273        {
274          if (! isxdigit (to_uchar (*p)))
275            error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape"));
276          uni_value = uni_value * 16 + hextobin (*p);
277        }
278
279      /* A universal character name shall not specify a character short
280         identifier in the range 00000000 through 00000020, 0000007F through
281         0000009F, or 0000D800 through 0000DFFF inclusive. A universal
282         character name shall not designate a character in the required
283         character set.  */
284      if ((uni_value <= 0x9f
285           && uni_value != 0x24 && uni_value != 0x40 && uni_value != 0x60)
286          || (uni_value >= 0xd800 && uni_value <= 0xdfff))
287        error (EXIT_FAILURE, 0, _("invalid universal character name \\%c%0*x"),
288               esc_char, (esc_char == 'u' ? 4 : 8), uni_value);
289
290      print_unicode_char (stdout, uni_value, 0);
291    }
292  else
293    {
294      putchar ('\\');
295      if (*p)
296        {
297          putchar (*p);
298          p++;
299        }
300    }
301  return p - escstart - 1;
302}
303
304/* Print string STR, evaluating \ escapes. */
305
306static void
307print_esc_string (const char *str)
308{
309  for (; *str; str++)
310    if (*str == '\\')
311      str += print_esc (str, true);
312    else
313      putchar (*str);
314}
315
316/* Evaluate a printf conversion specification.  START is the start of
317   the directive, LENGTH is its length, and CONVERSION specifies the
318   type of conversion.  LENGTH does not include any length modifier or
319   the conversion specifier itself.  FIELD_WIDTH and PRECISION are the
320   field width and precision for '*' values, if HAVE_FIELD_WIDTH and
321   HAVE_PRECISION are true, respectively.  ARGUMENT is the argument to
322   be formatted.  */
323
324static void
325print_direc (const char *start, size_t length, char conversion,
326             bool have_field_width, int field_width,
327             bool have_precision, int precision,
328             char const *argument)
329{
330  char *p;		/* Null-terminated copy of % directive. */
331
332  /* Create a null-terminated copy of the % directive, with an
333     intmax_t-wide length modifier substituted for any existing
334     integer length modifier.  */
335  {
336    char *q;
337    char const *length_modifier;
338    size_t length_modifier_len;
339
340    switch (conversion)
341      {
342      case 'd': case 'i': case 'o': case 'u': case 'x': case 'X':
343        length_modifier = PRIdMAX;
344        length_modifier_len = sizeof PRIdMAX - 2;
345        break;
346
347      case 'a': case 'e': case 'f': case 'g':
348      case 'A': case 'E': case 'F': case 'G':
349        length_modifier = "L";
350        length_modifier_len = 1;
351        break;
352
353      default:
354        length_modifier = start;  /* Any valid pointer will do.  */
355        length_modifier_len = 0;
356        break;
357      }
358
359    p = xmalloc (length + length_modifier_len + 2);
360    q = mempcpy (p, start, length);
361    q = mempcpy (q, length_modifier, length_modifier_len);
362    *q++ = conversion;
363    *q = '\0';
364  }
365
366  switch (conversion)
367    {
368    case 'd':
369    case 'i':
370      {
371        intmax_t arg = vstrtoimax (argument);
372        if (!have_field_width)
373          {
374            if (!have_precision)
375              xprintf (p, arg);
376            else
377              xprintf (p, precision, arg);
378          }
379        else
380          {
381            if (!have_precision)
382              xprintf (p, field_width, arg);
383            else
384              xprintf (p, field_width, precision, arg);
385          }
386      }
387      break;
388
389    case 'o':
390    case 'u':
391    case 'x':
392    case 'X':
393      {
394        uintmax_t arg = vstrtoumax (argument);
395        if (!have_field_width)
396          {
397            if (!have_precision)
398              xprintf (p, arg);
399            else
400              xprintf (p, precision, arg);
401          }
402        else
403          {
404            if (!have_precision)
405              xprintf (p, field_width, arg);
406            else
407              xprintf (p, field_width, precision, arg);
408          }
409      }
410      break;
411
412    case 'a':
413    case 'A':
414    case 'e':
415    case 'E':
416    case 'f':
417    case 'F':
418    case 'g':
419    case 'G':
420      {
421        long double arg = vstrtold (argument);
422        if (!have_field_width)
423          {
424            if (!have_precision)
425              xprintf (p, arg);
426            else
427              xprintf (p, precision, arg);
428          }
429        else
430          {
431            if (!have_precision)
432              xprintf (p, field_width, arg);
433            else
434              xprintf (p, field_width, precision, arg);
435          }
436      }
437      break;
438
439    case 'c':
440      if (!have_field_width)
441        xprintf (p, *argument);
442      else
443        xprintf (p, field_width, *argument);
444      break;
445
446    case 's':
447      if (!have_field_width)
448        {
449          if (!have_precision)
450            xprintf (p, argument);
451          else
452            xprintf (p, precision, argument);
453        }
454      else
455        {
456          if (!have_precision)
457            xprintf (p, field_width, argument);
458          else
459            xprintf (p, field_width, precision, argument);
460        }
461      break;
462    }
463
464  free (p);
465}
466
467/* Print the text in FORMAT, using ARGV (with ARGC elements) for
468   arguments to any `%' directives.
469   Return the number of elements of ARGV used.  */
470
471static int
472print_formatted (const char *format, int argc, char **argv)
473{
474  int save_argc = argc;		/* Preserve original value.  */
475  const char *f;		/* Pointer into `format'.  */
476  const char *direc_start;	/* Start of % directive.  */
477  size_t direc_length;		/* Length of % directive.  */
478  bool have_field_width;	/* True if FIELD_WIDTH is valid.  */
479  int field_width = 0;		/* Arg to first '*'.  */
480  bool have_precision;		/* True if PRECISION is valid.  */
481  int precision = 0;		/* Arg to second '*'.  */
482  char ok[UCHAR_MAX + 1];	/* ok['x'] is true if %x is allowed.  */
483
484  for (f = format; *f; ++f)
485    {
486      switch (*f)
487        {
488        case '%':
489          direc_start = f++;
490          direc_length = 1;
491          have_field_width = have_precision = false;
492          if (*f == '%')
493            {
494              putchar ('%');
495              break;
496            }
497          if (*f == 'b')
498            {
499              /* FIXME: Field width and precision are not supported
500                 for %b, even though POSIX requires it.  */
501              if (argc > 0)
502                {
503                  print_esc_string (*argv);
504                  ++argv;
505                  --argc;
506                }
507              break;
508            }
509
510          memset (ok, 0, sizeof ok);
511          ok['a'] = ok['A'] = ok['c'] = ok['d'] = ok['e'] = ok['E'] =
512            ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o'] =
513            ok['s'] = ok['u'] = ok['x'] = ok['X'] = 1;
514
515          for (;; f++, direc_length++)
516            switch (*f)
517              {
518#if (__GLIBC__ == 2 && 2 <= __GLIBC_MINOR__) || 3 <= __GLIBC__
519              case 'I':
520#endif
521              case '\'':
522                ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] =
523                  ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0;
524                break;
525              case '-': case '+': case ' ':
526                break;
527              case '#':
528                ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0;
529                break;
530              case '0':
531                ok['c'] = ok['s'] = 0;
532                break;
533              default:
534                goto no_more_flag_characters;
535              }
536        no_more_flag_characters:;
537
538          if (*f == '*')
539            {
540              ++f;
541              ++direc_length;
542              if (argc > 0)
543                {
544                  intmax_t width = vstrtoimax (*argv);
545                  if (INT_MIN <= width && width <= INT_MAX)
546                    field_width = width;
547                  else
548                    error (EXIT_FAILURE, 0, _("invalid field width: %s"),
549                           *argv);
550                  ++argv;
551                  --argc;
552                }
553              else
554                field_width = 0;
555              have_field_width = true;
556            }
557          else
558            while (ISDIGIT (*f))
559              {
560                ++f;
561                ++direc_length;
562              }
563          if (*f == '.')
564            {
565              ++f;
566              ++direc_length;
567              ok['c'] = 0;
568              if (*f == '*')
569                {
570                  ++f;
571                  ++direc_length;
572                  if (argc > 0)
573                    {
574                      intmax_t prec = vstrtoimax (*argv);
575                      if (prec < 0)
576                        {
577                          /* A negative precision is taken as if the
578                             precision were omitted, so -1 is safe
579                             here even if prec < INT_MIN.  */
580                          precision = -1;
581                        }
582                      else if (INT_MAX < prec)
583                        error (EXIT_FAILURE, 0, _("invalid precision: %s"),
584                               *argv);
585                      else
586                        precision = prec;
587                      ++argv;
588                      --argc;
589                    }
590                  else
591                    precision = 0;
592                  have_precision = true;
593                }
594              else
595                while (ISDIGIT (*f))
596                  {
597                    ++f;
598                    ++direc_length;
599                  }
600            }
601
602          while (*f == 'l' || *f == 'L' || *f == 'h'
603                 || *f == 'j' || *f == 't' || *f == 'z')
604            ++f;
605
606          {
607            unsigned char conversion = *f;
608            if (! ok[conversion])
609              error (EXIT_FAILURE, 0,
610                     _("%.*s: invalid conversion specification"),
611                     (int) (f + 1 - direc_start), direc_start);
612          }
613
614          print_direc (direc_start, direc_length, *f,
615                       have_field_width, field_width,
616                       have_precision, precision,
617                       (argc <= 0 ? "" : (argc--, *argv++)));
618          break;
619
620        case '\\':
621          f += print_esc (f, false);
622          break;
623
624        default:
625          putchar (*f);
626        }
627    }
628
629  return save_argc - argc;
630}
631
632int
633main (int argc, char **argv)
634{
635  char *format;
636  int args_used;
637
638  initialize_main (&argc, &argv);
639  set_program_name (argv[0]);
640  setlocale (LC_ALL, "");
641  bindtextdomain (PACKAGE, LOCALEDIR);
642  textdomain (PACKAGE);
643
644  atexit (close_stdout);
645
646  exit_status = EXIT_SUCCESS;
647
648  posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
649
650  /* We directly parse options, rather than use parse_long_options, in
651     order to avoid accepting abbreviations.  */
652  if (argc == 2)
653    {
654      if (STREQ (argv[1], "--help"))
655        usage (EXIT_SUCCESS);
656
657      if (STREQ (argv[1], "--version"))
658        {
659          version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version, AUTHORS,
660                       (char *) NULL);
661          exit (EXIT_SUCCESS);
662        }
663    }
664
665  /* The above handles --help and --version.
666     Since there is no other invocation of getopt, handle `--' here.  */
667  if (1 < argc && STREQ (argv[1], "--"))
668    {
669      --argc;
670      ++argv;
671    }
672
673  if (argc <= 1)
674    {
675      error (0, 0, _("missing operand"));
676      usage (EXIT_FAILURE);
677    }
678
679  format = argv[1];
680  argc -= 2;
681  argv += 2;
682
683  do
684    {
685      args_used = print_formatted (format, argc, argv);
686      argc -= args_used;
687      argv += args_used;
688    }
689  while (args_used > 0 && argc > 0);
690
691  if (argc > 0)
692    error (0, 0,
693           _("warning: ignoring excess arguments, starting with %s"),
694           quote (argv[0]));
695
696  exit (exit_status);
697}
698