1/* Various utility functions.
2   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
3   2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4
5This file is part of GNU Wget.
6
7GNU Wget is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3 of the License, or
10(at your option) any later version.
11
12GNU Wget is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19
20Additional permission under GNU GPL version 3 section 7
21
22If you modify this program, or any covered work, by linking or
23combining it with the OpenSSL project's OpenSSL library (or a
24modified version of that library), containing parts covered by the
25terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26grants you additional permission to convey the resulting work.
27Corresponding Source for a non-source form of such a combination
28shall include the source code for the parts of OpenSSL used as well
29as that of the covered work.  */
30
31#include "wget.h"
32
33#include <stdio.h>
34#include <stdlib.h>
35#include <string.h>
36#include <time.h>
37#ifdef HAVE_SYS_TIME_H
38# include <sys/time.h>
39#endif
40#ifdef HAVE_UNISTD_H
41# include <unistd.h>
42#endif
43#ifdef HAVE_MMAP
44# include <sys/mman.h>
45#endif
46#ifdef HAVE_PROCESS_H
47# include <process.h>  /* getpid() */
48#endif
49#ifdef HAVE_UTIME_H
50# include <utime.h>
51#endif
52#ifdef HAVE_SYS_UTIME_H
53# include <sys/utime.h>
54#endif
55#include <errno.h>
56#include <fcntl.h>
57#include <assert.h>
58#include <stdarg.h>
59#include <locale.h>
60
61/* For TIOCGWINSZ and friends: */
62#ifdef HAVE_SYS_IOCTL_H
63# include <sys/ioctl.h>
64#endif
65#ifdef HAVE_TERMIOS_H
66# include <termios.h>
67#endif
68
69/* Needed for Unix version of run_with_timeout. */
70#include <signal.h>
71#include <setjmp.h>
72
73#ifndef HAVE_SIGSETJMP
74/* If sigsetjmp is a macro, configure won't pick it up. */
75# ifdef sigsetjmp
76#  define HAVE_SIGSETJMP
77# endif
78#endif
79
80#if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
81# define USE_SIGNAL_TIMEOUT
82#endif
83
84#include "utils.h"
85#include "hash.h"
86
87#ifdef __VMS
88#include "vms.h"
89#endif /* def __VMS */
90
91#ifdef TESTING
92#include "test.h"
93#endif
94
95static void
96memfatal (const char *context, long attempted_size)
97{
98  /* Make sure we don't try to store part of the log line, and thus
99     call malloc.  */
100  log_set_save_context (false);
101
102  /* We have different log outputs in different situations:
103     1) output without bytes information
104     2) output with bytes information  */
105  if (attempted_size == UNKNOWN_ATTEMPTED_SIZE)
106    {
107      logprintf (LOG_ALWAYS,
108                 _("%s: %s: Failed to allocate enough memory; memory exhausted.\n"),
109                 exec_name, context);
110    }
111  else
112    {
113      logprintf (LOG_ALWAYS,
114                 _("%s: %s: Failed to allocate %ld bytes; memory exhausted.\n"),
115                 exec_name, context, attempted_size);
116    }
117
118  exit (1);
119}
120
121/* Character property table for (re-)escaping VMS ODS5 extended file
122   names.  Note that this table ignores Unicode.
123
124   ODS2 valid characters: 0-9 A-Z a-z $ - _ ~
125
126   ODS5 Invalid characters:
127      C0 control codes (0x00 to 0x1F inclusive)
128      Asterisk (*)
129      Question mark (?)
130
131   ODS5 Invalid characters only in VMS V7.2 (which no one runs, right?):
132      Double quotation marks (")
133      Backslash (\)
134      Colon (:)
135      Left angle bracket (<)
136      Right angle bracket (>)
137      Slash (/)
138      Vertical bar (|)
139
140   Characters escaped by "^":
141      SP  !  #  %  &  '  (  )  +  ,  .  ;  =  @  [  ]  ^  `  {  }  ~
142
143   Either "^_" or "^ " is accepted as a space.  Period (.) is a special
144   case.  Note that un-escaped < and > can also confuse a directory
145   spec.
146
147   Characters put out as ^xx:
148      7F (DEL)
149      80-9F (C1 control characters)
150      A0 (nonbreaking space)
151      FF (Latin small letter y diaeresis)
152
153   Other cases:
154      Unicode: "^Uxxxx", where "xxxx" is four hex digits.
155
156    Property table values:
157      Normal escape:    1
158      Space:            2
159      Dot:              4
160      Hex-hex escape:   8
161      ODS2 normal:     16
162      ODS2 lower case: 32
163      Hex digit:       64
164*/
165
166unsigned char char_prop[ 256] = {
167
168/* NUL SOH STX ETX EOT ENQ ACK BEL   BS  HT  LF  VT  FF  CR  SO  SI */
169    0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
170
171/* DLE DC1 DC2 DC3 DC4 NAK SYN ETB  CAN  EM SUB ESC  FS  GS  RS  US */
172    0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
173
174/*  SP  !   "   #   $   %   &   '    (   )   *   +   ,   -   .   /  */
175    2,  1,  0,  1, 16,  1,  1,  1,   1,  1,  0,  1,  1, 16,  4,  0,
176
177/*  0   1   2   3   4   5   6   7    8   9   :   ;   <   =   >   ?  */
178   80, 80, 80, 80, 80, 80, 80, 80,  80, 80,  0,  1,  1,  1,  1,  1,
179
180/*  @   A   B   C   D   E   F   G    H   I   J   K   L   M   N   O  */
181    1, 80, 80, 80, 80, 80, 80, 16,  16, 16, 16, 16, 16, 16, 16, 16,
182
183/*  P   Q   R   S   T   U   V   W    X   Y   Z   [   \   ]   ^   _  */
184   16, 16, 16, 16, 16, 16, 16, 16,  16, 16, 16,  1,  0,  1,  1, 16,
185
186/*  `   a   b   c   d   e   f   g    h   i   j   k   l   m   n   o  */
187    1, 96, 96, 96, 96, 96, 96, 32,  32, 32, 32, 32, 32, 32, 32, 32,
188
189/*  p   q   r   s   t   u   v   w    x   y   z   {   |   }   ~  DEL */
190   32, 32, 32, 32, 32, 32, 32, 32,  32, 32, 32,  1,  0,  1, 17,  8,
191
192    8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
193    8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
194    8,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
195    0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
196    0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
197    0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
198    0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
199    0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  8
200};
201
202/* Utility function: like xstrdup(), but also lowercases S.  */
203
204char *
205xstrdup_lower (const char *s)
206{
207  char *copy = xstrdup (s);
208  char *p = copy;
209  for (; *p; p++)
210    *p = c_tolower (*p);
211  return copy;
212}
213
214/* Copy the string formed by two pointers (one on the beginning, other
215   on the char after the last char) to a new, malloc-ed location.
216   0-terminate it.  */
217char *
218strdupdelim (const char *beg, const char *end)
219{
220  char *res = xmalloc (end - beg + 1);
221  memcpy (res, beg, end - beg);
222  res[end - beg] = '\0';
223  return res;
224}
225
226/* Parse a string containing comma-separated elements, and return a
227   vector of char pointers with the elements.  Spaces following the
228   commas are ignored.  */
229char **
230sepstring (const char *s)
231{
232  char **res;
233  const char *p;
234  int i = 0;
235
236  if (!s || !*s)
237    return NULL;
238  res = NULL;
239  p = s;
240  while (*s)
241    {
242      if (*s == ',')
243        {
244          res = xrealloc (res, (i + 2) * sizeof (char *));
245          res[i] = strdupdelim (p, s);
246          res[++i] = NULL;
247          ++s;
248          /* Skip the blanks following the ','.  */
249          while (c_isspace (*s))
250            ++s;
251          p = s;
252        }
253      else
254        ++s;
255    }
256  res = xrealloc (res, (i + 2) * sizeof (char *));
257  res[i] = strdupdelim (p, s);
258  res[i + 1] = NULL;
259  return res;
260}
261
262/* Like sprintf, but prints into a string of sufficient size freshly
263   allocated with malloc, which is returned.  If unable to print due
264   to invalid format, returns NULL.  Inability to allocate needed
265   memory results in abort, as with xmalloc.  This is in spirit
266   similar to the GNU/BSD extension asprintf, but somewhat easier to
267   use.
268
269   Internally the function either calls vasprintf or loops around
270   vsnprintf until the correct size is found.  Since Wget also ships a
271   fallback implementation of vsnprintf, this should be portable.  */
272
273/* Constant is using for limits memory allocation for text buffer.
274   Applicable in situation when: vasprintf is not available in the system
275   and vsnprintf return -1 when long line is truncated (in old versions of
276   glibc and in other system where C99 doesn`t support) */
277
278#define FMT_MAX_LENGTH 1048576
279
280char *
281aprintf (const char *fmt, ...)
282{
283#if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
284  /* Use vasprintf. */
285  int ret;
286  va_list args;
287  char *str;
288  va_start (args, fmt);
289  ret = vasprintf (&str, fmt, args);
290  va_end (args);
291  if (ret < 0 && errno == ENOMEM)
292    memfatal ("aprintf", UNKNOWN_ATTEMPTED_SIZE);  /* for consistency
293                                                      with xmalloc/xrealloc */
294  else if (ret < 0)
295    return NULL;
296  return str;
297#else  /* not HAVE_VASPRINTF */
298
299  /* vasprintf is unavailable.  snprintf into a small buffer and
300     resize it as necessary. */
301  int size = 32;
302  char *str = xmalloc (size);
303
304  /* #### This code will infloop and eventually abort in xrealloc if
305     passed a FMT that causes snprintf to consistently return -1.  */
306
307  while (1)
308    {
309      int n;
310      va_list args;
311
312      va_start (args, fmt);
313      n = vsnprintf (str, size, fmt, args);
314      va_end (args);
315
316      /* If the printing worked, return the string. */
317      if (n > -1 && n < size)
318        return str;
319
320      /* Else try again with a larger buffer. */
321      if (n > -1)               /* C99 */
322        size = n + 1;           /* precisely what is needed */
323      else if (size >= FMT_MAX_LENGTH)  /* We have a huge buffer, */
324        {                               /* maybe we have some wrong
325                                           format string? */
326          logprintf (LOG_ALWAYS,
327                     _("%s: aprintf: text buffer is too big (%ld bytes), "
328                       "aborting.\n"),
329                     exec_name, size);  /* printout a log message */
330          abort ();                     /* and abort... */
331        }
332      else
333        {
334          /* else, we continue to grow our
335           * buffer: Twice the old size. */
336          size <<= 1;
337        }
338      str = xrealloc (str, size);
339    }
340#endif /* not HAVE_VASPRINTF */
341}
342
343/* Concatenate the NULL-terminated list of string arguments into
344   freshly allocated space.  */
345
346char *
347concat_strings (const char *str0, ...)
348{
349  va_list args;
350  int saved_lengths[5];         /* inspired by Apache's apr_pstrcat */
351  char *ret, *p;
352
353  const char *next_str;
354  int total_length = 0;
355  size_t argcount;
356
357  /* Calculate the length of and allocate the resulting string. */
358
359  argcount = 0;
360  va_start (args, str0);
361  for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
362    {
363      int len = strlen (next_str);
364      if (argcount < countof (saved_lengths))
365        saved_lengths[argcount++] = len;
366      total_length += len;
367    }
368  va_end (args);
369  p = ret = xmalloc (total_length + 1);
370
371  /* Copy the strings into the allocated space. */
372
373  argcount = 0;
374  va_start (args, str0);
375  for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
376    {
377      int len;
378      if (argcount < countof (saved_lengths))
379        len = saved_lengths[argcount++];
380      else
381        len = strlen (next_str);
382      memcpy (p, next_str, len);
383      p += len;
384    }
385  va_end (args);
386  *p = '\0';
387
388  return ret;
389}
390
391/* Format the provided time according to the specified format.  The
392   format is a string with format elements supported by strftime.  */
393
394static char *
395fmttime (time_t t, const char *fmt)
396{
397  static char output[32];
398  struct tm *tm = localtime(&t);
399  if (!tm)
400    abort ();
401  if (!strftime(output, sizeof(output), fmt, tm))
402    abort ();
403  return output;
404}
405
406/* Return pointer to a static char[] buffer in which zero-terminated
407   string-representation of TM (in form hh:mm:ss) is printed.
408
409   If TM is NULL, the current time will be used.  */
410
411char *
412time_str (time_t t)
413{
414  return fmttime(t, "%H:%M:%S");
415}
416
417/* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
418
419char *
420datetime_str (time_t t)
421{
422  return fmttime(t, "%Y-%m-%d %H:%M:%S");
423}
424
425/* The Windows versions of the following two functions are defined in
426   mswindows.c. On MSDOS this function should never be called. */
427
428#ifdef __VMS
429
430void
431fork_to_background (void)
432{
433  return;
434}
435
436#else /* def __VMS */
437
438#if !defined(WINDOWS) && !defined(MSDOS)
439void
440fork_to_background (void)
441{
442  pid_t pid;
443  /* Whether we arrange our own version of opt.lfilename here.  */
444  bool logfile_changed = false;
445
446  if (!opt.lfilename && (!opt.quiet || opt.server_response))
447    {
448      /* We must create the file immediately to avoid either a race
449         condition (which arises from using unique_name and failing to
450         use fopen_excl) or lying to the user about the log file name
451         (which arises from using unique_name, printing the name, and
452         using fopen_excl later on.)  */
453      FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
454      if (new_log_fp)
455        {
456          logfile_changed = true;
457          fclose (new_log_fp);
458        }
459    }
460  pid = fork ();
461  if (pid < 0)
462    {
463      /* parent, error */
464      perror ("fork");
465      exit (1);
466    }
467  else if (pid != 0)
468    {
469      /* parent, no error */
470      printf (_("Continuing in background, pid %d.\n"), (int) pid);
471      if (logfile_changed)
472        printf (_("Output will be written to %s.\n"), quote (opt.lfilename));
473      exit (0);                 /* #### should we use _exit()? */
474    }
475
476  /* child: give up the privileges and keep running. */
477  setsid ();
478  freopen ("/dev/null", "r", stdin);
479  freopen ("/dev/null", "w", stdout);
480  freopen ("/dev/null", "w", stderr);
481}
482#endif /* !WINDOWS && !MSDOS */
483
484#endif /* def __VMS [else] */
485
486
487/* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
488   specified with TM.  The atime ("access time") is set to the current
489   time.  */
490
491void
492touch (const char *file, time_t tm)
493{
494#ifdef HAVE_STRUCT_UTIMBUF
495  struct utimbuf times;
496#else
497  struct {
498    time_t actime;
499    time_t modtime;
500  } times;
501#endif
502  times.modtime = tm;
503  times.actime = time (NULL);
504  if (utime (file, &times) == -1)
505    logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
506}
507
508/* Checks if FILE is a symbolic link, and removes it if it is.  Does
509   nothing under MS-Windows.  */
510int
511remove_link (const char *file)
512{
513  int err = 0;
514  struct_stat st;
515
516  if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
517    {
518      DEBUGP (("Unlinking %s (symlink).\n", file));
519      err = unlink (file);
520      if (err != 0)
521        logprintf (LOG_VERBOSE, _("Failed to unlink symlink %s: %s\n"),
522                   quote (file), strerror (errno));
523    }
524  return err;
525}
526
527/* Does FILENAME exist?  This is quite a lousy implementation, since
528   it supplies no error codes -- only a yes-or-no answer.  Thus it
529   will return that a file does not exist if, e.g., the directory is
530   unreadable.  I don't mind it too much currently, though.  The
531   proper way should, of course, be to have a third, error state,
532   other than true/false, but that would introduce uncalled-for
533   additional complexity to the callers.  */
534bool
535file_exists_p (const char *filename)
536{
537#ifdef HAVE_ACCESS
538  return access (filename, F_OK) >= 0;
539#else
540  struct_stat buf;
541  return stat (filename, &buf) >= 0;
542#endif
543}
544
545/* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
546   Returns 0 on error.  */
547bool
548file_non_directory_p (const char *path)
549{
550  struct_stat buf;
551  /* Use lstat() rather than stat() so that symbolic links pointing to
552     directories can be identified correctly.  */
553  if (lstat (path, &buf) != 0)
554    return false;
555  return S_ISDIR (buf.st_mode) ? false : true;
556}
557
558/* Return the size of file named by FILENAME, or -1 if it cannot be
559   opened or seeked into. */
560wgint
561file_size (const char *filename)
562{
563#if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
564  wgint size;
565  /* We use fseek rather than stat to determine the file size because
566     that way we can also verify that the file is readable without
567     explicitly checking for permissions.  Inspired by the POST patch
568     by Arnaud Wylie.  */
569  FILE *fp = fopen (filename, "rb");
570  if (!fp)
571    return -1;
572  fseeko (fp, 0, SEEK_END);
573  size = ftello (fp);
574  fclose (fp);
575  return size;
576#else
577  struct_stat st;
578  if (stat (filename, &st) < 0)
579    return -1;
580  return st.st_size;
581#endif
582}
583
584/* 2005-02-19 SMS.
585   If no UNIQ_SEP is defined (as on VMS), have unique_name() return the
586   original name.  With the VMS file systems' versioning, everything
587   should be fine, and appending ".NN" just causes trouble.
588*/
589
590#ifdef UNIQ_SEP
591
592/* stat file names named PREFIX.1, PREFIX.2, etc., until one that
593   doesn't exist is found.  Return a freshly allocated copy of the
594   unused file name.  */
595
596static char *
597unique_name_1 (const char *prefix)
598{
599  int count = 1;
600  int plen = strlen (prefix);
601  char *template = (char *)alloca (plen + 1 + 24);
602  char *template_tail = template + plen;
603
604  memcpy (template, prefix, plen);
605  *template_tail++ = UNIQ_SEP;
606
607  do
608    number_to_string (template_tail, count++);
609  while (file_exists_p (template));
610
611  return xstrdup (template);
612}
613
614/* Return a unique file name, based on FILE.
615
616   More precisely, if FILE doesn't exist, it is returned unmodified.
617   If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
618   file name that doesn't exist is returned.
619
620   2005-02-19 SMS.  "." is now UNIQ_SEP, and may be different.
621
622   The resulting file is not created, only verified that it didn't
623   exist at the point in time when the function was called.
624   Therefore, where security matters, don't rely that the file created
625   by this function exists until you open it with O_EXCL or
626   equivalent.
627
628   If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
629   string.  Otherwise, it may return FILE if the file doesn't exist
630   (and therefore doesn't need changing).  */
631
632char *
633unique_name (const char *file, bool allow_passthrough)
634{
635  /* If the FILE itself doesn't exist, return it without
636     modification. */
637  if (!file_exists_p (file))
638    return allow_passthrough ? (char *)file : xstrdup (file);
639
640  /* Otherwise, find a numeric suffix that results in unused file name
641     and return it.  */
642  return unique_name_1 (file);
643}
644
645#else /* def UNIQ_SEP */
646
647/* Dummy unique_name() for VMS.  Return the original name as easily as
648   possible.
649*/
650char *
651unique_name (const char *file, bool allow_passthrough)
652{
653  /* Return the FILE itself, without modification, irregardful. */
654  return allow_passthrough ? (char *)file : xstrdup (file);
655}
656
657#endif /* def UNIQ_SEP [else] */
658
659/* Create a file based on NAME, except without overwriting an existing
660   file with that name.  Providing O_EXCL is correctly implemented,
661   this function does not have the race condition associated with
662   opening the file returned by unique_name.  */
663
664FILE *
665unique_create (const char *name, bool binary, char **opened_name)
666{
667  /* unique file name, based on NAME */
668  char *uname = unique_name (name, false);
669  FILE *fp;
670  while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
671    {
672      xfree (uname);
673      uname = unique_name (name, false);
674    }
675  if (opened_name && fp != NULL)
676    {
677      if (fp)
678        *opened_name = uname;
679      else
680        {
681          *opened_name = NULL;
682          xfree (uname);
683        }
684    }
685  else
686    xfree (uname);
687  return fp;
688}
689
690/* Open the file for writing, with the addition that the file is
691   opened "exclusively".  This means that, if the file already exists,
692   this function will *fail* and errno will be set to EEXIST.  If
693   BINARY is set, the file will be opened in binary mode, equivalent
694   to fopen's "wb".
695
696   If opening the file fails for any reason, including the file having
697   previously existed, this function returns NULL and sets errno
698   appropriately.  */
699
700FILE *
701fopen_excl (const char *fname, int binary)
702{
703  int fd;
704#ifdef O_EXCL
705
706/* 2005-04-14 SMS.
707   VMS lacks O_BINARY, but makes up for it in weird and wonderful ways.
708   It also has file versions which obviate all the O_EXCL effort.
709   O_TRUNC (something of a misnomer) requests a new version.
710*/
711# ifdef __VMS
712/* Common open() optional arguments:
713   sequential access only, access callback function.
714*/
715#  define OPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
716
717  int open_id;
718  int flags = O_WRONLY | O_CREAT | O_TRUNC;
719
720  if (binary > 1)
721    {
722      open_id = 11;
723      fd = open( fname,                 /* File name. */
724       flags,                           /* Flags. */
725       0777,                            /* Mode for default protection. */
726       "ctx=bin,stm",                   /* Binary, stream access. */
727       "rfm=stmlf",                     /* Stream_LF. */
728       OPEN_OPT_ARGS);                  /* Access callback. */
729    }
730  else if (binary)
731    {
732      open_id = 12;
733      fd = open( fname,                 /* File name. */
734       flags,                           /* Flags. */
735       0777,                            /* Mode for default protection. */
736       "ctx=bin,stm",                   /* Binary, stream access. */
737       "rfm=fix",                       /* Fixed-length, */
738       "mrs=512",                       /* 512-byte records. */
739       OPEN_OPT_ARGS);                  /* Access callback. */
740    }
741  else
742    {
743      open_id = 13;
744      fd = open( fname,                 /* File name. */
745       flags,                           /* Flags. */
746       0777,                            /* Mode for default protection.
747*/
748       "rfm=stmlf",                     /* Stream_LF. */
749       OPEN_OPT_ARGS);                  /* Access callback. */
750    }
751# else /* def __VMS */
752  int flags = O_WRONLY | O_CREAT | O_EXCL;
753# ifdef O_BINARY
754  if (binary)
755    flags |= O_BINARY;
756# endif
757  fd = open (fname, flags, 0666);
758# endif /* def __VMS [else] */
759
760  if (fd < 0)
761    return NULL;
762  return fdopen (fd, binary ? "wb" : "w");
763#else  /* not O_EXCL */
764  /* Manually check whether the file exists.  This is prone to race
765     conditions, but systems without O_EXCL haven't deserved
766     better.  */
767  if (file_exists_p (fname))
768    {
769      errno = EEXIST;
770      return NULL;
771    }
772  return fopen (fname, binary ? "wb" : "w");
773#endif /* not O_EXCL */
774}
775
776/* Create DIRECTORY.  If some of the pathname components of DIRECTORY
777   are missing, create them first.  In case any mkdir() call fails,
778   return its error status.  Returns 0 on successful completion.
779
780   The behaviour of this function should be identical to the behaviour
781   of `mkdir -p' on systems where mkdir supports the `-p' option.  */
782int
783make_directory (const char *directory)
784{
785  int i, ret, quit = 0;
786  char *dir;
787
788  /* Make a copy of dir, to be able to write to it.  Otherwise, the
789     function is unsafe if called with a read-only char *argument.  */
790  STRDUP_ALLOCA (dir, directory);
791
792  /* If the first character of dir is '/', skip it (and thus enable
793     creation of absolute-pathname directories.  */
794  for (i = (*dir == '/'); 1; ++i)
795    {
796      for (; dir[i] && dir[i] != '/'; i++)
797        ;
798      if (!dir[i])
799        quit = 1;
800      dir[i] = '\0';
801      /* Check whether the directory already exists.  Allow creation of
802         of intermediate directories to fail, as the initial path components
803         are not necessarily directories!  */
804      if (!file_exists_p (dir))
805        ret = mkdir (dir, 0777);
806      else
807        ret = 0;
808      if (quit)
809        break;
810      else
811        dir[i] = '/';
812    }
813  return ret;
814}
815
816/* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
817   should be a file name.
818
819   file_merge("/foo/bar", "baz")  => "/foo/baz"
820   file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
821   file_merge("foo", "bar")       => "bar"
822
823   In other words, it's a simpler and gentler version of uri_merge.  */
824
825char *
826file_merge (const char *base, const char *file)
827{
828  char *result;
829  const char *cut = (const char *)strrchr (base, '/');
830
831  if (!cut)
832    return xstrdup (file);
833
834  result = xmalloc (cut - base + 1 + strlen (file) + 1);
835  memcpy (result, base, cut - base);
836  result[cut - base] = '/';
837  strcpy (result + (cut - base) + 1, file);
838
839  return result;
840}
841
842/* Like fnmatch, but performs a case-insensitive match.  */
843
844int
845fnmatch_nocase (const char *pattern, const char *string, int flags)
846{
847#ifdef FNM_CASEFOLD
848  /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
849     also present on *BSD platforms, and possibly elsewhere.  */
850  return fnmatch (pattern, string, flags | FNM_CASEFOLD);
851#else
852  /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
853  char *patcopy = (char *) alloca (strlen (pattern) + 1);
854  char *strcopy = (char *) alloca (strlen (string) + 1);
855  char *p;
856  for (p = patcopy; *pattern; pattern++, p++)
857    *p = c_tolower (*pattern);
858  *p = '\0';
859  for (p = strcopy; *string; string++, p++)
860    *p = c_tolower (*string);
861  *p = '\0';
862  return fnmatch (patcopy, strcopy, flags);
863#endif
864}
865
866static bool in_acclist (const char *const *, const char *, bool);
867
868/* Determine whether a file is acceptable to be followed, according to
869   lists of patterns to accept/reject.  */
870bool
871acceptable (const char *s)
872{
873  int l = strlen (s);
874
875  while (l && s[l] != '/')
876    --l;
877  if (s[l] == '/')
878    s += (l + 1);
879  if (opt.accepts)
880    {
881      if (opt.rejects)
882        return (in_acclist ((const char *const *)opt.accepts, s, true)
883                && !in_acclist ((const char *const *)opt.rejects, s, true));
884      else
885        return in_acclist ((const char *const *)opt.accepts, s, true);
886    }
887  else if (opt.rejects)
888    return !in_acclist ((const char *const *)opt.rejects, s, true);
889  return true;
890}
891
892/* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
893   will return true if and only if D2 begins with `/something/' or is exactly
894   '/something'.  */
895bool
896subdir_p (const char *d1, const char *d2)
897{
898  if (*d1 == '\0')
899    return true;
900  if (!opt.ignore_case)
901    for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
902      ;
903  else
904    for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2)
905      ;
906
907  return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
908}
909
910/* Iterate through DIRLIST (which must be NULL-terminated), and return the
911   first element that matches DIR, through wildcards or front comparison (as
912   appropriate).  */
913static bool
914dir_matches_p (char **dirlist, const char *dir)
915{
916  char **x;
917  int (*matcher) (const char *, const char *, int)
918    = opt.ignore_case ? fnmatch_nocase : fnmatch;
919
920  for (x = dirlist; *x; x++)
921    {
922      /* Remove leading '/' */
923      char *p = *x + (**x == '/');
924      if (has_wildcards_p (p))
925        {
926          if (matcher (p, dir, FNM_PATHNAME) == 0)
927            break;
928        }
929      else
930        {
931          if (subdir_p (p, dir))
932            break;
933        }
934    }
935
936  return *x ? true : false;
937}
938
939/* Returns whether DIRECTORY is acceptable for download, wrt the
940   include/exclude lists.
941
942   The leading `/' is ignored in paths; relative and absolute paths
943   may be freely intermixed.  */
944
945bool
946accdir (const char *directory)
947{
948  /* Remove starting '/'.  */
949  if (*directory == '/')
950    ++directory;
951  if (opt.includes)
952    {
953      if (!dir_matches_p (opt.includes, directory))
954        return false;
955    }
956  if (opt.excludes)
957    {
958      if (dir_matches_p (opt.excludes, directory))
959        return false;
960    }
961  return true;
962}
963
964/* Return true if STRING ends with TAIL.  For instance:
965
966   match_tail ("abc", "bc", false)  -> 1
967   match_tail ("abc", "ab", false)  -> 0
968   match_tail ("abc", "abc", false) -> 1
969
970   If FOLD_CASE is true, the comparison will be case-insensitive.  */
971
972bool
973match_tail (const char *string, const char *tail, bool fold_case)
974{
975  int i, j;
976
977  /* We want this to be fast, so we code two loops, one with
978     case-folding, one without. */
979
980  if (!fold_case)
981    {
982      for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
983        if (string[i] != tail[j])
984          break;
985    }
986  else
987    {
988      for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
989        if (c_tolower (string[i]) != c_tolower (tail[j]))
990          break;
991    }
992
993  /* If the tail was exhausted, the match was succesful.  */
994  if (j == -1)
995    return true;
996  else
997    return false;
998}
999
1000/* Checks whether string S matches each element of ACCEPTS.  A list
1001   element are matched either with fnmatch() or match_tail(),
1002   according to whether the element contains wildcards or not.
1003
1004   If the BACKWARD is false, don't do backward comparison -- just compare
1005   them normally.  */
1006static bool
1007in_acclist (const char *const *accepts, const char *s, bool backward)
1008{
1009  for (; *accepts; accepts++)
1010    {
1011      if (has_wildcards_p (*accepts))
1012        {
1013          int res = opt.ignore_case
1014            ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
1015          /* fnmatch returns 0 if the pattern *does* match the string.  */
1016          if (res == 0)
1017            return true;
1018        }
1019      else
1020        {
1021          if (backward)
1022            {
1023              if (match_tail (s, *accepts, opt.ignore_case))
1024                return true;
1025            }
1026          else
1027            {
1028              int cmp = opt.ignore_case
1029                ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
1030              if (cmp == 0)
1031                return true;
1032            }
1033        }
1034    }
1035  return false;
1036}
1037
1038/* Return the location of STR's suffix (file extension).  Examples:
1039   suffix ("foo.bar")       -> "bar"
1040   suffix ("foo.bar.baz")   -> "baz"
1041   suffix ("/foo/bar")      -> NULL
1042   suffix ("/foo.bar/baz")  -> NULL  */
1043char *
1044suffix (const char *str)
1045{
1046  int i;
1047
1048  for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
1049    ;
1050
1051  if (str[i++] == '.')
1052    return (char *)str + i;
1053  else
1054    return NULL;
1055}
1056
1057/* Return true if S contains globbing wildcards (`*', `?', `[' or
1058   `]').  */
1059
1060bool
1061has_wildcards_p (const char *s)
1062{
1063  for (; *s; s++)
1064    if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
1065      return true;
1066  return false;
1067}
1068
1069/* Return true if FNAME ends with a typical HTML suffix.  The
1070   following (case-insensitive) suffixes are presumed to be HTML
1071   files:
1072
1073     html
1074     htm
1075     ?html (`?' matches one character)
1076
1077   #### CAVEAT.  This is not necessarily a good indication that FNAME
1078   refers to a file that contains HTML!  */
1079bool
1080has_html_suffix_p (const char *fname)
1081{
1082  char *suf;
1083
1084  if ((suf = suffix (fname)) == NULL)
1085    return false;
1086  if (!strcasecmp (suf, "html"))
1087    return true;
1088  if (!strcasecmp (suf, "htm"))
1089    return true;
1090  if (suf[0] && !strcasecmp (suf + 1, "html"))
1091    return true;
1092  return false;
1093}
1094
1095/* Read a line from FP and return the pointer to freshly allocated
1096   storage.  The storage space is obtained through malloc() and should
1097   be freed with free() when it is no longer needed.
1098
1099   The length of the line is not limited, except by available memory.
1100   The newline character at the end of line is retained.  The line is
1101   terminated with a zero character.
1102
1103   After end-of-file is encountered without anything being read, NULL
1104   is returned.  NULL is also returned on error.  To distinguish
1105   between these two cases, use the stdio function ferror().  */
1106
1107char *
1108read_whole_line (FILE *fp)
1109{
1110  int length = 0;
1111  int bufsize = 82;
1112  char *line = xmalloc (bufsize);
1113
1114  while (fgets (line + length, bufsize - length, fp))
1115    {
1116      length += strlen (line + length);
1117      if (length == 0)
1118        /* Possible for example when reading from a binary file where
1119           a line begins with \0.  */
1120        continue;
1121
1122      if (line[length - 1] == '\n')
1123        break;
1124
1125      /* fgets() guarantees to read the whole line, or to use up the
1126         space we've given it.  We can double the buffer
1127         unconditionally.  */
1128      bufsize <<= 1;
1129      line = xrealloc (line, bufsize);
1130    }
1131  if (length == 0 || ferror (fp))
1132    {
1133      xfree (line);
1134      return NULL;
1135    }
1136  if (length + 1 < bufsize)
1137    /* Relieve the memory from our exponential greediness.  We say
1138       `length + 1' because the terminating \0 is not included in
1139       LENGTH.  We don't need to zero-terminate the string ourselves,
1140       though, because fgets() does that.  */
1141    line = xrealloc (line, length + 1);
1142  return line;
1143}
1144
1145/* Read FILE into memory.  A pointer to `struct file_memory' are
1146   returned; use struct element `content' to access file contents, and
1147   the element `length' to know the file length.  `content' is *not*
1148   zero-terminated, and you should *not* read or write beyond the [0,
1149   length) range of characters.
1150
1151   After you are done with the file contents, call read_file_free to
1152   release the memory.
1153
1154   Depending on the operating system and the type of file that is
1155   being read, read_file() either mmap's the file into memory, or
1156   reads the file into the core using read().
1157
1158   If file is named "-", fileno(stdin) is used for reading instead.
1159   If you want to read from a real file named "-", use "./-" instead.  */
1160
1161struct file_memory *
1162read_file (const char *file)
1163{
1164  int fd;
1165  struct file_memory *fm;
1166  long size;
1167  bool inhibit_close = false;
1168
1169  /* Some magic in the finest tradition of Perl and its kin: if FILE
1170     is "-", just use stdin.  */
1171  if (HYPHENP (file))
1172    {
1173      fd = fileno (stdin);
1174      inhibit_close = true;
1175      /* Note that we don't inhibit mmap() in this case.  If stdin is
1176         redirected from a regular file, mmap() will still work.  */
1177    }
1178  else
1179    fd = open (file, O_RDONLY);
1180  if (fd < 0)
1181    return NULL;
1182  fm = xnew (struct file_memory);
1183
1184#ifdef HAVE_MMAP
1185  {
1186    struct_fstat buf;
1187    if (fstat (fd, &buf) < 0)
1188      goto mmap_lose;
1189    fm->length = buf.st_size;
1190    /* NOTE: As far as I know, the callers of this function never
1191       modify the file text.  Relying on this would enable us to
1192       specify PROT_READ and MAP_SHARED for a marginal gain in
1193       efficiency, but at some cost to generality.  */
1194    fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1195                        MAP_PRIVATE, fd, 0);
1196    if (fm->content == (char *)MAP_FAILED)
1197      goto mmap_lose;
1198    if (!inhibit_close)
1199      close (fd);
1200
1201    fm->mmap_p = 1;
1202    return fm;
1203  }
1204
1205 mmap_lose:
1206  /* The most common reason why mmap() fails is that FD does not point
1207     to a plain file.  However, it's also possible that mmap() doesn't
1208     work for a particular type of file.  Therefore, whenever mmap()
1209     fails, we just fall back to the regular method.  */
1210#endif /* HAVE_MMAP */
1211
1212  fm->length = 0;
1213  size = 512;                   /* number of bytes fm->contents can
1214                                   hold at any given time. */
1215  fm->content = xmalloc (size);
1216  while (1)
1217    {
1218      wgint nread;
1219      if (fm->length > size / 2)
1220        {
1221          /* #### I'm not sure whether the whole exponential-growth
1222             thing makes sense with kernel read.  On Linux at least,
1223             read() refuses to read more than 4K from a file at a
1224             single chunk anyway.  But other Unixes might optimize it
1225             better, and it doesn't *hurt* anything, so I'm leaving
1226             it.  */
1227
1228          /* Normally, we grow SIZE exponentially to make the number
1229             of calls to read() and realloc() logarithmic in relation
1230             to file size.  However, read() can read an amount of data
1231             smaller than requested, and it would be unreasonable to
1232             double SIZE every time *something* was read.  Therefore,
1233             we double SIZE only when the length exceeds half of the
1234             entire allocated size.  */
1235          size <<= 1;
1236          fm->content = xrealloc (fm->content, size);
1237        }
1238      nread = read (fd, fm->content + fm->length, size - fm->length);
1239      if (nread > 0)
1240        /* Successful read. */
1241        fm->length += nread;
1242      else if (nread < 0)
1243        /* Error. */
1244        goto lose;
1245      else
1246        /* EOF */
1247        break;
1248    }
1249  if (!inhibit_close)
1250    close (fd);
1251  if (size > fm->length && fm->length != 0)
1252    /* Due to exponential growth of fm->content, the allocated region
1253       might be much larger than what is actually needed.  */
1254    fm->content = xrealloc (fm->content, fm->length);
1255  fm->mmap_p = 0;
1256  return fm;
1257
1258 lose:
1259  if (!inhibit_close)
1260    close (fd);
1261  xfree (fm->content);
1262  xfree (fm);
1263  return NULL;
1264}
1265
1266/* Release the resources held by FM.  Specifically, this calls
1267   munmap() or xfree() on fm->content, depending whether mmap or
1268   malloc/read were used to read in the file.  It also frees the
1269   memory needed to hold the FM structure itself.  */
1270
1271void
1272read_file_free (struct file_memory *fm)
1273{
1274#ifdef HAVE_MMAP
1275  if (fm->mmap_p)
1276    {
1277      munmap (fm->content, fm->length);
1278    }
1279  else
1280#endif
1281    {
1282      xfree (fm->content);
1283    }
1284  xfree (fm);
1285}
1286
1287/* Free the pointers in a NULL-terminated vector of pointers, then
1288   free the pointer itself.  */
1289void
1290free_vec (char **vec)
1291{
1292  if (vec)
1293    {
1294      char **p = vec;
1295      while (*p)
1296        xfree (*p++);
1297      xfree (vec);
1298    }
1299}
1300
1301/* Append vector V2 to vector V1.  The function frees V2 and
1302   reallocates V1 (thus you may not use the contents of neither
1303   pointer after the call).  If V1 is NULL, V2 is returned.  */
1304char **
1305merge_vecs (char **v1, char **v2)
1306{
1307  int i, j;
1308
1309  if (!v1)
1310    return v2;
1311  if (!v2)
1312    return v1;
1313  if (!*v2)
1314    {
1315      /* To avoid j == 0 */
1316      xfree (v2);
1317      return v1;
1318    }
1319  /* Count v1.  */
1320  for (i = 0; v1[i]; i++)
1321    ;
1322  /* Count v2.  */
1323  for (j = 0; v2[j]; j++)
1324    ;
1325  /* Reallocate v1.  */
1326  v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1327  memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1328  xfree (v2);
1329  return v1;
1330}
1331
1332/* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1333   is allocated as needed.  Return the new value of the vector. */
1334
1335char **
1336vec_append (char **vec, const char *str)
1337{
1338  int cnt;                      /* count of vector elements, including
1339                                   the one we're about to append */
1340  if (vec != NULL)
1341    {
1342      for (cnt = 0; vec[cnt]; cnt++)
1343        ;
1344      ++cnt;
1345    }
1346  else
1347    cnt = 1;
1348  /* Reallocate the array to fit the new element and the NULL. */
1349  vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1350  /* Append a copy of STR to the vector. */
1351  vec[cnt - 1] = xstrdup (str);
1352  vec[cnt] = NULL;
1353  return vec;
1354}
1355
1356/* Sometimes it's useful to create "sets" of strings, i.e. special
1357   hash tables where you want to store strings as keys and merely
1358   query for their existence.  Here is a set of utility routines that
1359   makes that transparent.  */
1360
1361void
1362string_set_add (struct hash_table *ht, const char *s)
1363{
1364  /* First check whether the set element already exists.  If it does,
1365     do nothing so that we don't have to free() the old element and
1366     then strdup() a new one.  */
1367  if (hash_table_contains (ht, s))
1368    return;
1369
1370  /* We use "1" as value.  It provides us a useful and clear arbitrary
1371     value, and it consumes no memory -- the pointers to the same
1372     string "1" will be shared by all the key-value pairs in all `set'
1373     hash tables.  */
1374  hash_table_put (ht, xstrdup (s), "1");
1375}
1376
1377/* Synonym for hash_table_contains... */
1378
1379int
1380string_set_contains (struct hash_table *ht, const char *s)
1381{
1382  return hash_table_contains (ht, s);
1383}
1384
1385/* Convert the specified string set to array.  ARRAY should be large
1386   enough to hold hash_table_count(ht) char pointers.  */
1387
1388void string_set_to_array (struct hash_table *ht, char **array)
1389{
1390  hash_table_iterator iter;
1391  for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1392    *array++ = iter.key;
1393}
1394
1395/* Free the string set.  This frees both the storage allocated for
1396   keys and the actual hash table.  (hash_table_destroy would only
1397   destroy the hash table.)  */
1398
1399void
1400string_set_free (struct hash_table *ht)
1401{
1402  hash_table_iterator iter;
1403  for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1404    xfree (iter.key);
1405  hash_table_destroy (ht);
1406}
1407
1408/* Utility function: simply call xfree() on all keys and values of HT.  */
1409
1410void
1411free_keys_and_values (struct hash_table *ht)
1412{
1413  hash_table_iterator iter;
1414  for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1415    {
1416      xfree (iter.key);
1417      xfree (iter.value);
1418    }
1419}
1420
1421/* Get digit grouping data for thousand separors by calling
1422   localeconv().  The data includes separator string and grouping info
1423   and is cached after the first call to the function.
1424
1425   In locales that don't set a thousand separator (such as the "C"
1426   locale), this forces it to be ",".  We are now only showing
1427   thousand separators in one place, so this shouldn't be a problem in
1428   practice.  */
1429
1430static void
1431get_grouping_data (const char **sep, const char **grouping)
1432{
1433  static const char *cached_sep;
1434  static const char *cached_grouping;
1435  static bool initialized;
1436  if (!initialized)
1437    {
1438      /* Get the grouping info from the locale. */
1439      struct lconv *lconv = localeconv ();
1440      cached_sep = lconv->thousands_sep;
1441      cached_grouping = lconv->grouping;
1442#if ! USE_NLS_PROGRESS_BAR
1443      /* We can't count column widths, so ensure that the separator
1444       * is single-byte only (let check below determine what byte). */
1445      if (strlen(cached_sep) > 1)
1446        cached_sep = "";
1447#endif
1448      if (!*cached_sep)
1449        {
1450          /* Many locales (such as "C" or "hr_HR") don't specify
1451             grouping, which we still want to use it for legibility.
1452             In those locales set the sep char to ',', unless that
1453             character is used for decimal point, in which case set it
1454             to ".".  */
1455          if (*lconv->decimal_point != ',')
1456            cached_sep = ",";
1457          else
1458            cached_sep = ".";
1459          cached_grouping = "\x03";
1460        }
1461      initialized = true;
1462    }
1463  *sep = cached_sep;
1464  *grouping = cached_grouping;
1465}
1466
1467/* Return a printed representation of N with thousand separators.
1468   This should respect locale settings, with the exception of the "C"
1469   locale which mandates no separator, but we use one anyway.
1470
1471   Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1472   the separators because it's too non-portable, and it's hard to test
1473   for this feature at configure time.  Besides, it wouldn't display
1474   separators in the "C" locale, still used by many Unix users.  */
1475
1476const char *
1477with_thousand_seps (wgint n)
1478{
1479  static char outbuf[48];
1480  char *p = outbuf + sizeof outbuf;
1481
1482  /* Info received from locale */
1483  const char *grouping, *sep;
1484  int seplen;
1485
1486  /* State information */
1487  int i = 0, groupsize;
1488  const char *atgroup;
1489
1490  bool negative = n < 0;
1491
1492  /* Initialize grouping data. */
1493  get_grouping_data (&sep, &grouping);
1494  seplen = strlen (sep);
1495  atgroup = grouping;
1496  groupsize = *atgroup++;
1497
1498  /* This would overflow on WGINT_MIN, but printing negative numbers
1499     is not an important goal of this fuinction.  */
1500  if (negative)
1501    n = -n;
1502
1503  /* Write the number into the buffer, backwards, inserting the
1504     separators as necessary.  */
1505  *--p = '\0';
1506  while (1)
1507    {
1508      *--p = n % 10 + '0';
1509      n /= 10;
1510      if (n == 0)
1511        break;
1512      /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1513      if (++i == groupsize)
1514        {
1515          if (seplen == 1)
1516            *--p = *sep;
1517          else
1518            memcpy (p -= seplen, sep, seplen);
1519          i = 0;
1520          if (*atgroup)
1521            groupsize = *atgroup++;
1522        }
1523    }
1524  if (negative)
1525    *--p = '-';
1526
1527  return p;
1528}
1529
1530/* N, a byte quantity, is converted to a human-readable abberviated
1531   form a la sizes printed by `ls -lh'.  The result is written to a
1532   static buffer, a pointer to which is returned.
1533
1534   Unlike `with_thousand_seps', this approximates to the nearest unit.
1535   Quoting GNU libit: "Most people visually process strings of 3-4
1536   digits effectively, but longer strings of digits are more prone to
1537   misinterpretation.  Hence, converting to an abbreviated form
1538   usually improves readability."
1539
1540   This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1541   original computer-related meaning of "powers of 1024".  We don't
1542   use the "*bibyte" names invented in 1998, and seldom used in
1543   practice.  Wikipedia's entry on "binary prefix" discusses this in
1544   some detail.  */
1545
1546char *
1547human_readable (HR_NUMTYPE n)
1548{
1549  /* These suffixes are compatible with those of GNU `ls -lh'. */
1550  static char powers[] =
1551    {
1552      'K',                      /* kilobyte, 2^10 bytes */
1553      'M',                      /* megabyte, 2^20 bytes */
1554      'G',                      /* gigabyte, 2^30 bytes */
1555      'T',                      /* terabyte, 2^40 bytes */
1556      'P',                      /* petabyte, 2^50 bytes */
1557      'E',                      /* exabyte,  2^60 bytes */
1558    };
1559  static char buf[8];
1560  size_t i;
1561
1562  /* If the quantity is smaller than 1K, just print it. */
1563  if (n < 1024)
1564    {
1565      snprintf (buf, sizeof (buf), "%d", (int) n);
1566      return buf;
1567    }
1568
1569  /* Loop over powers, dividing N with 1024 in each iteration.  This
1570     works unchanged for all sizes of wgint, while still avoiding
1571     non-portable `long double' arithmetic.  */
1572  for (i = 0; i < countof (powers); i++)
1573    {
1574      /* At each iteration N is greater than the *subsequent* power.
1575         That way N/1024.0 produces a decimal number in the units of
1576         *this* power.  */
1577      if ((n / 1024) < 1024 || i == countof (powers) - 1)
1578        {
1579          double val = n / 1024.0;
1580          /* Print values smaller than 10 with one decimal digits, and
1581             others without any decimals.  */
1582          snprintf (buf, sizeof (buf), "%.*f%c",
1583                    val < 10 ? 1 : 0, val, powers[i]);
1584          return buf;
1585        }
1586      n /= 1024;
1587    }
1588  return NULL;                  /* unreached */
1589}
1590
1591/* Count the digits in the provided number.  Used to allocate space
1592   when printing numbers.  */
1593
1594int
1595numdigit (wgint number)
1596{
1597  int cnt = 1;
1598  if (number < 0)
1599    ++cnt;                      /* accomodate '-' */
1600  while ((number /= 10) != 0)
1601    ++cnt;
1602  return cnt;
1603}
1604
1605#define PR(mask) *p++ = n / (mask) + '0'
1606
1607/* DIGITS_<D> is used to print a D-digit number and should be called
1608   with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1609   n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1610   Recursively this continues until DIGITS_1 is invoked.  */
1611
1612#define DIGITS_1(mask) PR (mask)
1613#define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1614#define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1615#define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1616#define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1617#define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1618#define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1619#define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1620#define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1621#define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1622
1623/* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1624
1625#define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1626#define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1627#define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1628#define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1629#define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1630#define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1631#define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1632#define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1633#define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1634
1635/* Shorthand for casting to wgint. */
1636#define W wgint
1637
1638/* Print NUMBER to BUFFER in base 10.  This is equivalent to
1639   `sprintf(buffer, "%lld", (long long) number)', only typically much
1640   faster and portable to machines without long long.
1641
1642   The speedup may make a difference in programs that frequently
1643   convert numbers to strings.  Some implementations of sprintf,
1644   particularly the one in some versions of GNU libc, have been known
1645   to be quite slow when converting integers to strings.
1646
1647   Return the pointer to the location where the terminating zero was
1648   printed.  (Equivalent to calling buffer+strlen(buffer) after the
1649   function is done.)
1650
1651   BUFFER should be large enough to accept as many bytes as you expect
1652   the number to take up.  On machines with 64-bit wgints the maximum
1653   needed size is 24 bytes.  That includes the digits needed for the
1654   largest 64-bit number, the `-' sign in case it's negative, and the
1655   terminating '\0'.  */
1656
1657char *
1658number_to_string (char *buffer, wgint number)
1659{
1660  char *p = buffer;
1661  wgint n = number;
1662
1663  int last_digit_char = 0;
1664
1665#if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1666  /* We are running in a very strange environment.  Leave the correct
1667     printing to sprintf.  */
1668  p += sprintf (buf, "%j", (intmax_t) (n));
1669#else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1670
1671  if (n < 0)
1672    {
1673      if (n < -WGINT_MAX)
1674        {
1675          /* n = -n would overflow because -n would evaluate to a
1676             wgint value larger than WGINT_MAX.  Need to make n
1677             smaller and handle the last digit separately.  */
1678          int last_digit = n % 10;
1679          /* The sign of n%10 is implementation-defined. */
1680          if (last_digit < 0)
1681            last_digit_char = '0' - last_digit;
1682          else
1683            last_digit_char = '0' + last_digit;
1684          /* After n is made smaller, -n will not overflow. */
1685          n /= 10;
1686        }
1687
1688      *p++ = '-';
1689      n = -n;
1690    }
1691
1692  /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1693     way printing any N is fully open-coded without a loop or jump.
1694     (Also see description of DIGITS_*.)  */
1695
1696  if      (n < 10)                       DIGITS_1 (1);
1697  else if (n < 100)                      DIGITS_2 (10);
1698  else if (n < 1000)                     DIGITS_3 (100);
1699  else if (n < 10000)                    DIGITS_4 (1000);
1700  else if (n < 100000)                   DIGITS_5 (10000);
1701  else if (n < 1000000)                  DIGITS_6 (100000);
1702  else if (n < 10000000)                 DIGITS_7 (1000000);
1703  else if (n < 100000000)                DIGITS_8 (10000000);
1704  else if (n < 1000000000)               DIGITS_9 (100000000);
1705#if SIZEOF_WGINT == 4
1706  /* wgint is 32 bits wide: no number has more than 10 digits. */
1707  else                                   DIGITS_10 (1000000000);
1708#else
1709  /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1710     Constants are constructed by compile-time multiplication to avoid
1711     dealing with different notations for 64-bit constants
1712     (nL/nLL/nI64, depending on the compiler and architecture).  */
1713  else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1714  else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1715  else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1716  else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1717  else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1718  else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1719  else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1720  else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1721  else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1722  else                                   DIGITS_19 (1000000000*(W)1000000000);
1723#endif
1724
1725  if (last_digit_char)
1726    *p++ = last_digit_char;
1727
1728  *p = '\0';
1729#endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1730
1731  return p;
1732}
1733
1734#undef PR
1735#undef W
1736#undef SPRINTF_WGINT
1737#undef DIGITS_1
1738#undef DIGITS_2
1739#undef DIGITS_3
1740#undef DIGITS_4
1741#undef DIGITS_5
1742#undef DIGITS_6
1743#undef DIGITS_7
1744#undef DIGITS_8
1745#undef DIGITS_9
1746#undef DIGITS_10
1747#undef DIGITS_11
1748#undef DIGITS_12
1749#undef DIGITS_13
1750#undef DIGITS_14
1751#undef DIGITS_15
1752#undef DIGITS_16
1753#undef DIGITS_17
1754#undef DIGITS_18
1755#undef DIGITS_19
1756
1757#define RING_SIZE 3
1758
1759/* Print NUMBER to a statically allocated string and return a pointer
1760   to the printed representation.
1761
1762   This function is intended to be used in conjunction with printf.
1763   It is hard to portably print wgint values:
1764    a) you cannot use printf("%ld", number) because wgint can be long
1765       long on 32-bit machines with LFS.
1766    b) you cannot use printf("%lld", number) because NUMBER could be
1767       long on 32-bit machines without LFS, or on 64-bit machines,
1768       which do not require LFS.  Also, Windows doesn't support %lld.
1769    c) you cannot use printf("%j", (int_max_t) number) because not all
1770       versions of printf support "%j", the most notable being the one
1771       on Windows.
1772    d) you cannot #define WGINT_FMT to the appropriate format and use
1773       printf(WGINT_FMT, number) because that would break translations
1774       for user-visible messages, such as printf("Downloaded: %d
1775       bytes\n", number).
1776
1777   What you should use instead is printf("%s", number_to_static_string
1778   (number)).
1779
1780   CAVEAT: since the function returns pointers to static data, you
1781   must be careful to copy its result before calling it again.
1782   However, to make it more useful with printf, the function maintains
1783   an internal ring of static buffers to return.  That way things like
1784   printf("%s %s", number_to_static_string (num1),
1785   number_to_static_string (num2)) work as expected.  Three buffers
1786   are currently used, which means that "%s %s %s" will work, but "%s
1787   %s %s %s" won't.  If you need to print more than three wgints,
1788   bump the RING_SIZE (or rethink your message.)  */
1789
1790char *
1791number_to_static_string (wgint number)
1792{
1793  static char ring[RING_SIZE][24];
1794  static int ringpos;
1795  char *buf = ring[ringpos];
1796  number_to_string (buf, number);
1797  ringpos = (ringpos + 1) % RING_SIZE;
1798  return buf;
1799}
1800
1801/* Determine the width of the terminal we're running on.  If that's
1802   not possible, return 0.  */
1803
1804int
1805determine_screen_width (void)
1806{
1807  /* If there's a way to get the terminal size using POSIX
1808     tcgetattr(), somebody please tell me.  */
1809#ifdef TIOCGWINSZ
1810  int fd;
1811  struct winsize wsz;
1812
1813  if (opt.lfilename != NULL)
1814    return 0;
1815
1816  fd = fileno (stderr);
1817  if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1818    return 0;                   /* most likely ENOTTY */
1819
1820  return wsz.ws_col;
1821#elif defined(WINDOWS)
1822  CONSOLE_SCREEN_BUFFER_INFO csbi;
1823  if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1824    return 0;
1825  return csbi.dwSize.X;
1826#else  /* neither TIOCGWINSZ nor WINDOWS */
1827  return 0;
1828#endif /* neither TIOCGWINSZ nor WINDOWS */
1829}
1830
1831/* Whether the rnd system (either rand or [dl]rand48) has been
1832   seeded.  */
1833static int rnd_seeded;
1834
1835/* Return a random number between 0 and MAX-1, inclusive.
1836
1837   If the system does not support lrand48 and MAX is greater than the
1838   value of RAND_MAX+1 on the system, the returned value will be in
1839   the range [0, RAND_MAX].  This may be fixed in a future release.
1840   The random number generator is seeded automatically the first time
1841   it is called.
1842
1843   This uses lrand48 where available, rand elsewhere.  DO NOT use it
1844   for cryptography.  It is only meant to be used in situations where
1845   quality of the random numbers returned doesn't really matter.  */
1846
1847int
1848random_number (int max)
1849{
1850#ifdef HAVE_DRAND48
1851  if (!rnd_seeded)
1852    {
1853      srand48 ((long) time (NULL) ^ (long) getpid ());
1854      rnd_seeded = 1;
1855    }
1856  return lrand48 () % max;
1857#else  /* not HAVE_DRAND48 */
1858
1859  double bounded;
1860  int rnd;
1861  if (!rnd_seeded)
1862    {
1863      srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1864      rnd_seeded = 1;
1865    }
1866  rnd = rand ();
1867
1868  /* Like rand() % max, but uses the high-order bits for better
1869     randomness on architectures where rand() is implemented using a
1870     simple congruential generator.  */
1871
1872  bounded = (double) max * rnd / (RAND_MAX + 1.0);
1873  return (int) bounded;
1874
1875#endif /* not HAVE_DRAND48 */
1876}
1877
1878/* Return a random uniformly distributed floating point number in the
1879   [0, 1) range.  Uses drand48 where available, and a really lame
1880   kludge elsewhere.  */
1881
1882double
1883random_float (void)
1884{
1885#ifdef HAVE_DRAND48
1886  if (!rnd_seeded)
1887    {
1888      srand48 ((long) time (NULL) ^ (long) getpid ());
1889      rnd_seeded = 1;
1890    }
1891  return drand48 ();
1892#else  /* not HAVE_DRAND48 */
1893  return (  random_number (10000) / 10000.0
1894          + random_number (10000) / (10000.0 * 10000.0)
1895          + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1896          + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1897#endif /* not HAVE_DRAND48 */
1898}
1899
1900/* Implementation of run_with_timeout, a generic timeout-forcing
1901   routine for systems with Unix-like signal handling.  */
1902
1903#ifdef USE_SIGNAL_TIMEOUT
1904# ifdef HAVE_SIGSETJMP
1905#  define SETJMP(env) sigsetjmp (env, 1)
1906
1907static sigjmp_buf run_with_timeout_env;
1908
1909static void
1910abort_run_with_timeout (int sig)
1911{
1912  assert (sig == SIGALRM);
1913  siglongjmp (run_with_timeout_env, -1);
1914}
1915# else /* not HAVE_SIGSETJMP */
1916#  define SETJMP(env) setjmp (env)
1917
1918static jmp_buf run_with_timeout_env;
1919
1920static void
1921abort_run_with_timeout (int sig)
1922{
1923  assert (sig == SIGALRM);
1924  /* We don't have siglongjmp to preserve the set of blocked signals;
1925     if we longjumped out of the handler at this point, SIGALRM would
1926     remain blocked.  We must unblock it manually. */
1927  int mask = siggetmask ();
1928  mask &= ~sigmask (SIGALRM);
1929  sigsetmask (mask);
1930
1931  /* Now it's safe to longjump. */
1932  longjmp (run_with_timeout_env, -1);
1933}
1934# endif /* not HAVE_SIGSETJMP */
1935
1936/* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1937   setitimer where available, alarm otherwise.
1938
1939   TIMEOUT should be non-zero.  If the timeout value is so small that
1940   it would be rounded to zero, it is rounded to the least legal value
1941   instead (1us for setitimer, 1s for alarm).  That ensures that
1942   SIGALRM will be delivered in all cases.  */
1943
1944static void
1945alarm_set (double timeout)
1946{
1947#ifdef ITIMER_REAL
1948  /* Use the modern itimer interface. */
1949  struct itimerval itv;
1950  xzero (itv);
1951  itv.it_value.tv_sec = (long) timeout;
1952  itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1953  if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1954    /* Ensure that we wait for at least the minimum interval.
1955       Specifying zero would mean "wait forever".  */
1956    itv.it_value.tv_usec = 1;
1957  setitimer (ITIMER_REAL, &itv, NULL);
1958#else  /* not ITIMER_REAL */
1959  /* Use the old alarm() interface. */
1960  int secs = (int) timeout;
1961  if (secs == 0)
1962    /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
1963       because alarm(0) means "never deliver the alarm", i.e. "wait
1964       forever", which is not what someone who specifies a 0.5s
1965       timeout would expect.  */
1966    secs = 1;
1967  alarm (secs);
1968#endif /* not ITIMER_REAL */
1969}
1970
1971/* Cancel the alarm set with alarm_set. */
1972
1973static void
1974alarm_cancel (void)
1975{
1976#ifdef ITIMER_REAL
1977  struct itimerval disable;
1978  xzero (disable);
1979  setitimer (ITIMER_REAL, &disable, NULL);
1980#else  /* not ITIMER_REAL */
1981  alarm (0);
1982#endif /* not ITIMER_REAL */
1983}
1984
1985/* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1986   seconds.  Returns true if the function was interrupted with a
1987   timeout, false otherwise.
1988
1989   This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1990   using setitimer() or alarm().  The timeout is enforced by
1991   longjumping out of the SIGALRM handler.  This has several
1992   advantages compared to the traditional approach of relying on
1993   signals causing system calls to exit with EINTR:
1994
1995     * The callback function is *forcibly* interrupted after the
1996       timeout expires, (almost) regardless of what it was doing and
1997       whether it was in a syscall.  For example, a calculation that
1998       takes a long time is interrupted as reliably as an IO
1999       operation.
2000
2001     * It works with both SYSV and BSD signals because it doesn't
2002       depend on the default setting of SA_RESTART.
2003
2004     * It doesn't require special handler setup beyond a simple call
2005       to signal().  (It does use sigsetjmp/siglongjmp, but they're
2006       optional.)
2007
2008   The only downside is that, if FUN allocates internal resources that
2009   are normally freed prior to exit from the functions, they will be
2010   lost in case of timeout.  */
2011
2012bool
2013run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2014{
2015  int saved_errno;
2016
2017  if (timeout == 0)
2018    {
2019      fun (arg);
2020      return false;
2021    }
2022
2023  signal (SIGALRM, abort_run_with_timeout);
2024  if (SETJMP (run_with_timeout_env) != 0)
2025    {
2026      /* Longjumped out of FUN with a timeout. */
2027      signal (SIGALRM, SIG_DFL);
2028      return true;
2029    }
2030  alarm_set (timeout);
2031  fun (arg);
2032
2033  /* Preserve errno in case alarm() or signal() modifies it. */
2034  saved_errno = errno;
2035  alarm_cancel ();
2036  signal (SIGALRM, SIG_DFL);
2037  errno = saved_errno;
2038
2039  return false;
2040}
2041
2042#else  /* not USE_SIGNAL_TIMEOUT */
2043
2044#ifndef WINDOWS
2045/* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
2046   define it under Windows, because Windows has its own version of
2047   run_with_timeout that uses threads.  */
2048
2049bool
2050run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2051{
2052  fun (arg);
2053  return false;
2054}
2055#endif /* not WINDOWS */
2056#endif /* not USE_SIGNAL_TIMEOUT */
2057
2058#ifndef WINDOWS
2059
2060/* Sleep the specified amount of seconds.  On machines without
2061   nanosleep(), this may sleep shorter if interrupted by signals.  */
2062
2063void
2064xsleep (double seconds)
2065{
2066#ifdef HAVE_NANOSLEEP
2067  /* nanosleep is the preferred interface because it offers high
2068     accuracy and, more importantly, because it allows us to reliably
2069     restart receiving a signal such as SIGWINCH.  (There was an
2070     actual Debian bug report about --limit-rate malfunctioning while
2071     the terminal was being resized.)  */
2072  struct timespec sleep, remaining;
2073  sleep.tv_sec = (long) seconds;
2074  sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
2075  while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
2076    /* If nanosleep has been interrupted by a signal, adjust the
2077       sleeping period and return to sleep.  */
2078    sleep = remaining;
2079#elif defined(HAVE_USLEEP)
2080  /* If usleep is available, use it in preference to select.  */
2081  if (seconds >= 1)
2082    {
2083      /* On some systems, usleep cannot handle values larger than
2084         1,000,000.  If the period is larger than that, use sleep
2085         first, then add usleep for subsecond accuracy.  */
2086      sleep (seconds);
2087      seconds -= (long) seconds;
2088    }
2089  usleep (seconds * 1000000);
2090#else /* fall back select */
2091  /* Note that, although Windows supports select, it can't be used to
2092     implement sleeping because Winsock's select doesn't implement
2093     timeout when it is passed NULL pointers for all fd sets.  (But it
2094     does under Cygwin, which implements Unix-compatible select.)  */
2095  struct timeval sleep;
2096  sleep.tv_sec = (long) seconds;
2097  sleep.tv_usec = 1000000 * (seconds - (long) seconds);
2098  select (0, NULL, NULL, NULL, &sleep);
2099  /* If select returns -1 and errno is EINTR, it means we were
2100     interrupted by a signal.  But without knowing how long we've
2101     actually slept, we can't return to sleep.  Using gettimeofday to
2102     track sleeps is slow and unreliable due to clock skew.  */
2103#endif
2104}
2105
2106#endif /* not WINDOWS */
2107
2108/* Encode the octets in DATA of length LENGTH to base64 format,
2109   storing the result to DEST.  The output will be zero-terminated,
2110   and must point to a writable buffer of at least
2111   1+BASE64_LENGTH(length) bytes.  The function returns the length of
2112   the resulting base64 data, not counting the terminating zero.
2113
2114   This implementation does not emit newlines after 76 characters of
2115   base64 data.  */
2116
2117int
2118base64_encode (const void *data, int length, char *dest)
2119{
2120  /* Conversion table.  */
2121  static const char tbl[64] = {
2122    'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
2123    'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
2124    'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
2125    'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
2126  };
2127  /* Access bytes in DATA as unsigned char, otherwise the shifts below
2128     don't work for data with MSB set. */
2129  const unsigned char *s = data;
2130  /* Theoretical ANSI violation when length < 3. */
2131  const unsigned char *end = (const unsigned char *) data + length - 2;
2132  char *p = dest;
2133
2134  /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
2135  for (; s < end; s += 3)
2136    {
2137      *p++ = tbl[s[0] >> 2];
2138      *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2139      *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2140      *p++ = tbl[s[2] & 0x3f];
2141    }
2142
2143  /* Pad the result if necessary...  */
2144  switch (length % 3)
2145    {
2146    case 1:
2147      *p++ = tbl[s[0] >> 2];
2148      *p++ = tbl[(s[0] & 3) << 4];
2149      *p++ = '=';
2150      *p++ = '=';
2151      break;
2152    case 2:
2153      *p++ = tbl[s[0] >> 2];
2154      *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2155      *p++ = tbl[((s[1] & 0xf) << 2)];
2156      *p++ = '=';
2157      break;
2158    }
2159  /* ...and zero-terminate it.  */
2160  *p = '\0';
2161
2162  return p - dest;
2163}
2164
2165/* Store in C the next non-whitespace character from the string, or \0
2166   when end of string is reached.  */
2167#define NEXT_CHAR(c, p) do {                    \
2168  c = (unsigned char) *p++;                     \
2169} while (c_isspace (c))
2170
2171#define IS_ASCII(c) (((c) & 0x80) == 0)
2172
2173/* Decode data from BASE64 (a null-terminated string) into memory
2174   pointed to by DEST.  DEST is assumed to be large enough to
2175   accomodate the decoded data, which is guaranteed to be no more than
2176   3/4*strlen(base64).
2177
2178   Since DEST is assumed to contain binary data, it is not
2179   NUL-terminated.  The function returns the length of the data
2180   written to TO.  -1 is returned in case of error caused by malformed
2181   base64 input.
2182
2183   This function originates from Free Recode.  */
2184
2185int
2186base64_decode (const char *base64, void *dest)
2187{
2188  /* Table of base64 values for first 128 characters.  Note that this
2189     assumes ASCII (but so does Wget in other places).  */
2190  static const signed char base64_char_to_value[128] =
2191    {
2192      -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
2193      -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
2194      -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
2195      -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
2196      -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
2197      54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
2198      -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
2199      5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
2200      15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
2201      25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
2202      29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
2203      39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
2204      49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
2205    };
2206#define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
2207#define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
2208
2209  const char *p = base64;
2210  char *q = dest;
2211
2212  while (1)
2213    {
2214      unsigned char c;
2215      unsigned long value;
2216
2217      /* Process first byte of a quadruplet.  */
2218      NEXT_CHAR (c, p);
2219      if (!c)
2220        break;
2221      if (c == '=' || !IS_BASE64 (c))
2222        return -1;              /* illegal char while decoding base64 */
2223      value = BASE64_CHAR_TO_VALUE (c) << 18;
2224
2225      /* Process second byte of a quadruplet.  */
2226      NEXT_CHAR (c, p);
2227      if (!c)
2228        return -1;              /* premature EOF while decoding base64 */
2229      if (c == '=' || !IS_BASE64 (c))
2230        return -1;              /* illegal char while decoding base64 */
2231      value |= BASE64_CHAR_TO_VALUE (c) << 12;
2232      *q++ = value >> 16;
2233
2234      /* Process third byte of a quadruplet.  */
2235      NEXT_CHAR (c, p);
2236      if (!c)
2237        return -1;              /* premature EOF while decoding base64 */
2238      if (!IS_BASE64 (c))
2239        return -1;              /* illegal char while decoding base64 */
2240
2241      if (c == '=')
2242        {
2243          NEXT_CHAR (c, p);
2244          if (!c)
2245            return -1;          /* premature EOF while decoding base64 */
2246          if (c != '=')
2247            return -1;          /* padding `=' expected but not found */
2248          continue;
2249        }
2250
2251      value |= BASE64_CHAR_TO_VALUE (c) << 6;
2252      *q++ = 0xff & value >> 8;
2253
2254      /* Process fourth byte of a quadruplet.  */
2255      NEXT_CHAR (c, p);
2256      if (!c)
2257        return -1;              /* premature EOF while decoding base64 */
2258      if (c == '=')
2259        continue;
2260      if (!IS_BASE64 (c))
2261        return -1;              /* illegal char while decoding base64 */
2262
2263      value |= BASE64_CHAR_TO_VALUE (c);
2264      *q++ = 0xff & value;
2265    }
2266#undef IS_BASE64
2267#undef BASE64_CHAR_TO_VALUE
2268
2269  return q - (char *) dest;
2270}
2271
2272#undef IS_ASCII
2273#undef NEXT_CHAR
2274
2275/* Simple merge sort for use by stable_sort.  Implementation courtesy
2276   Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2277
2278static void
2279mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2280                    int (*cmpfun) (const void *, const void *))
2281{
2282#define ELT(array, pos) ((char *)(array) + (pos) * size)
2283  if (from < to)
2284    {
2285      size_t i, j, k;
2286      size_t mid = (to + from) / 2;
2287      mergesort_internal (base, temp, size, from, mid, cmpfun);
2288      mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2289      i = from;
2290      j = mid + 1;
2291      for (k = from; (i <= mid) && (j <= to); k++)
2292        if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2293          memcpy (ELT (temp, k), ELT (base, i++), size);
2294        else
2295          memcpy (ELT (temp, k), ELT (base, j++), size);
2296      while (i <= mid)
2297        memcpy (ELT (temp, k++), ELT (base, i++), size);
2298      while (j <= to)
2299        memcpy (ELT (temp, k++), ELT (base, j++), size);
2300      for (k = from; k <= to; k++)
2301        memcpy (ELT (base, k), ELT (temp, k), size);
2302    }
2303#undef ELT
2304}
2305
2306/* Stable sort with interface exactly like standard library's qsort.
2307   Uses mergesort internally, allocating temporary storage with
2308   alloca.  */
2309
2310void
2311stable_sort (void *base, size_t nmemb, size_t size,
2312             int (*cmpfun) (const void *, const void *))
2313{
2314  if (size > 1)
2315    {
2316      void *temp = alloca (nmemb * size * sizeof (void *));
2317      mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2318    }
2319}
2320
2321/* Print a decimal number.  If it is equal to or larger than ten, the
2322   number is rounded.  Otherwise it is printed with one significant
2323   digit without trailing zeros and with no more than three fractional
2324   digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2325   printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2326
2327   This is useful for displaying durations because it provides
2328   order-of-magnitude information without unnecessary clutter --
2329   long-running downloads are shown without the fractional part, and
2330   short ones still retain one significant digit.  */
2331
2332const char *
2333print_decimal (double number)
2334{
2335  static char buf[32];
2336  double n = number >= 0 ? number : -number;
2337
2338  if (n >= 9.95)
2339    /* Cut off at 9.95 because the below %.1f would round 9.96 to
2340       "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2341    snprintf (buf, sizeof buf, "%.0f", number);
2342  else if (n >= 0.95)
2343    snprintf (buf, sizeof buf, "%.1f", number);
2344  else if (n >= 0.001)
2345    snprintf (buf, sizeof buf, "%.1g", number);
2346  else if (n >= 0.0005)
2347    /* round [0.0005, 0.001) to 0.001 */
2348    snprintf (buf, sizeof buf, "%.3f", number);
2349  else
2350    /* print numbers close to 0 as 0, not 0.000 */
2351    strcpy (buf, "0");
2352
2353  return buf;
2354}
2355
2356#ifdef TESTING
2357
2358const char *
2359test_subdir_p()
2360{
2361  int i;
2362  struct {
2363    char *d1;
2364    char *d2;
2365    bool result;
2366  } test_array[] = {
2367    { "/somedir", "/somedir", true },
2368    { "/somedir", "/somedir/d2", true },
2369    { "/somedir/d1", "/somedir", false },
2370  };
2371
2372  for (i = 0; i < countof(test_array); ++i)
2373    {
2374      bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2375
2376      mu_assert ("test_subdir_p: wrong result",
2377                 res == test_array[i].result);
2378    }
2379
2380  return NULL;
2381}
2382
2383const char *
2384test_dir_matches_p()
2385{
2386  int i;
2387  struct {
2388    char *dirlist[3];
2389    char *dir;
2390    bool result;
2391  } test_array[] = {
2392    { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2393    { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2394    { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2395    { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2396    { { "*/*d1", "/someotherdir", NULL }, "somedir/d1", true },
2397    { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2398    { { "!COMPLETE", NULL, NULL }, "!COMPLETE", true },
2399    { { "*COMPLETE", NULL, NULL }, "!COMPLETE", true },
2400    { { "*/!COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2401    { { "*COMPLETE", NULL, NULL }, "foo/!COMPLETE", false },
2402    { { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2403    { { "/dir with spaces", NULL, NULL }, "dir with spaces", true },
2404    { { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true },
2405    { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false },
2406    { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false },
2407  };
2408
2409  for (i = 0; i < countof(test_array); ++i)
2410    {
2411      bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2412
2413      mu_assert ("test_dir_matches_p: wrong result",
2414                 res == test_array[i].result);
2415    }
2416
2417  return NULL;
2418}
2419
2420#endif /* TESTING */
2421
2422