1/*  man.c: How to read and format man files.
2    $Id: man.c,v 1.4 2004/04/11 17:56:46 karl Exp $
3
4   Copyright (C) 1995, 1997, 1998, 1999, 2000, 2002, 2003, 2004 Free Software
5   Foundation, Inc.
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 2, or (at your option)
10   any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program; if not, write to the Free Software
19   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20
21   Written by Brian Fox Thu May  4 09:17:52 1995 (bfox@ai.mit.edu). */
22
23#include "info.h"
24#include <sys/ioctl.h>
25#include "signals.h"
26#if defined (HAVE_SYS_TIME_H)
27#include <sys/time.h>
28#endif
29#if defined (HAVE_SYS_WAIT_H)
30#include <sys/wait.h>
31#endif
32
33#include "tilde.h"
34#include "man.h"
35
36#if !defined (_POSIX_VERSION)
37#define pid_t int
38#endif
39
40#if defined (FD_SET)
41#  if defined (hpux)
42#    define fd_set_cast(x) (int *)(x)
43#  else
44#    define fd_set_cast(x) (fd_set *)(x)
45#  endif /* !hpux */
46#endif /* FD_SET */
47
48#if STRIP_DOT_EXE
49static char const * const exec_extensions[] = {
50  ".exe", ".com", ".bat", ".btm", ".sh", ".ksh", ".pl", ".sed", "", NULL
51};
52#else
53static char const * const exec_extensions[] = { "", NULL };
54#endif
55
56static char *read_from_fd (int fd);
57static void clean_manpage (char *manpage);
58static NODE *manpage_node_of_file_buffer (FILE_BUFFER *file_buffer,
59    char *pagename);
60static char *get_manpage_contents (char *pagename);
61
62NODE *
63make_manpage_node (char *pagename)
64{
65  return (info_get_node (MANPAGE_FILE_BUFFER_NAME, pagename));
66}
67
68NODE *
69get_manpage_node (FILE_BUFFER *file_buffer, char *pagename)
70{
71  NODE *node;
72
73  node = manpage_node_of_file_buffer (file_buffer, pagename);
74
75  if (!node)
76    {
77      char *page;
78
79      page = get_manpage_contents (pagename);
80
81      if (page)
82        {
83          char header[1024];
84          long oldsize, newsize;
85          int hlen, plen;
86	  char *old_contents = file_buffer->contents;
87
88          sprintf (header, "\n\n%c\n%s %s,  %s %s,  %s (dir)\n\n",
89                   INFO_COOKIE,
90                   INFO_FILE_LABEL, file_buffer->filename,
91                   INFO_NODE_LABEL, pagename,
92                   INFO_UP_LABEL);
93          oldsize = file_buffer->filesize;
94          hlen = strlen (header);
95          plen = strlen (page);
96          newsize = (oldsize + hlen + plen);
97          file_buffer->contents =
98            (char *)xrealloc (file_buffer->contents, 1 + newsize);
99          memcpy (file_buffer->contents + oldsize, header, hlen);
100          memcpy (file_buffer->contents + oldsize + hlen, page, plen);
101          file_buffer->contents[newsize] = '\0';
102          file_buffer->filesize = newsize;
103          file_buffer->finfo.st_size = newsize;
104          build_tags_and_nodes (file_buffer);
105          free (page);
106	  /* We have just relocated file_buffer->contents from under
107	     the feet of info_windows[] array.  Therefore, all the
108	     nodes on that list which are showing man pages have their
109	     contents member pointing into the blue.  Undo that harm.  */
110	  if (old_contents && oldsize && old_contents != file_buffer->contents)
111	    {
112	      int iw;
113	      INFO_WINDOW *info_win;
114	      char *old_contents_end = old_contents + oldsize;
115
116	      for (iw = 0; (info_win = info_windows[iw]); iw++)
117		{
118		  int in;
119
120		  for (in = 0; in < info_win->nodes_index; in++)
121		    {
122		      NODE *tmp_node = info_win->nodes[in];
123
124		      /* It really only suffices to see that node->filename
125			 is "*manpages*".  But after several hours of
126			 debugging this, would you blame me for being a bit
127			 paranoid?  */
128		      if (tmp_node && tmp_node->filename
129                          && tmp_node->contents
130                          && strcmp (tmp_node->filename,
131				  MANPAGE_FILE_BUFFER_NAME) == 0
132                          && tmp_node->contents >= old_contents
133                          && tmp_node->contents + tmp_node->nodelen
134                                <= old_contents_end)
135			{
136			  info_win->nodes[in] =
137			    manpage_node_of_file_buffer (file_buffer,
138                                tmp_node->nodename);
139			  free (tmp_node->nodename);
140			  free (tmp_node);
141			}
142		    }
143		}
144	    }
145        }
146
147      node = manpage_node_of_file_buffer (file_buffer, pagename);
148    }
149
150  return (node);
151}
152
153FILE_BUFFER *
154create_manpage_file_buffer (void)
155{
156  FILE_BUFFER *file_buffer = make_file_buffer ();
157  file_buffer->filename = xstrdup (MANPAGE_FILE_BUFFER_NAME);
158  file_buffer->fullpath = xstrdup (MANPAGE_FILE_BUFFER_NAME);
159  file_buffer->finfo.st_size = 0;
160  file_buffer->filesize = 0;
161  file_buffer->contents = (char *)NULL;
162  file_buffer->flags = (N_IsInternal | N_CannotGC | N_IsManPage);
163
164  return (file_buffer);
165}
166
167/* Scan the list of directories in PATH looking for FILENAME.  If we find
168   one that is an executable file, return it as a new string.  Otherwise,
169   return a NULL pointer. */
170static char *
171executable_file_in_path (char *filename, char *path)
172{
173  struct stat finfo;
174  char *temp_dirname;
175  int statable, dirname_index;
176
177  dirname_index = 0;
178
179  while ((temp_dirname = extract_colon_unit (path, &dirname_index)))
180    {
181      char *temp;
182      char *temp_end;
183      int i;
184
185      /* Expand a leading tilde if one is present. */
186      if (*temp_dirname == '~')
187        {
188          char *expanded_dirname;
189
190          expanded_dirname = tilde_expand_word (temp_dirname);
191          free (temp_dirname);
192          temp_dirname = expanded_dirname;
193        }
194
195      temp = (char *)xmalloc (34 + strlen (temp_dirname) + strlen (filename));
196      strcpy (temp, temp_dirname);
197      if (!IS_SLASH (temp[(strlen (temp)) - 1]))
198        strcat (temp, "/");
199      strcat (temp, filename);
200      temp_end = temp + strlen (temp);
201
202      free (temp_dirname);
203
204      /* Look for FILENAME, possibly with any of the extensions
205	 in EXEC_EXTENSIONS[].  */
206      for (i = 0; exec_extensions[i]; i++)
207	{
208	  if (exec_extensions[i][0])
209	    strcpy (temp_end, exec_extensions[i]);
210	  statable = (stat (temp, &finfo) == 0);
211
212	  /* If we have found a regular executable file, then use it. */
213	  if ((statable) && (S_ISREG (finfo.st_mode)) &&
214	      (access (temp, X_OK) == 0))
215	    return (temp);
216	}
217
218      free (temp);
219    }
220  return ((char *)NULL);
221}
222
223/* Return the full pathname of the system man page formatter. */
224static char *
225find_man_formatter (void)
226{
227  return (executable_file_in_path ("man", (char *)getenv ("PATH")));
228}
229
230static char *manpage_pagename = (char *)NULL;
231static char *manpage_section  = (char *)NULL;
232
233static void
234get_page_and_section (char *pagename)
235{
236  register int i;
237
238  if (manpage_pagename)
239    free (manpage_pagename);
240
241  if (manpage_section)
242    free (manpage_section);
243
244  manpage_pagename = (char *)NULL;
245  manpage_section  = (char *)NULL;
246
247  for (i = 0; pagename[i] != '\0' && pagename[i] != '('; i++);
248
249  manpage_pagename = (char *)xmalloc (1 + i);
250  strncpy (manpage_pagename, pagename, i);
251  manpage_pagename[i] = '\0';
252
253  if (pagename[i] == '(')
254    {
255      int start;
256
257      start = i + 1;
258
259      for (i = start; pagename[i] != '\0' && pagename[i] != ')'; i++);
260
261      manpage_section = (char *)xmalloc (1 + (i - start));
262      strncpy (manpage_section, pagename + start, (i - start));
263      manpage_section[i - start] = '\0';
264    }
265}
266
267#if PIPE_USE_FORK
268static void
269reap_children (int sig)
270{
271  wait (NULL);
272}
273#endif
274
275static char *
276get_manpage_contents (char *pagename)
277{
278  static char *formatter_args[4] = { (char *)NULL };
279  int pipes[2];
280  pid_t child;
281  RETSIGTYPE (*sigsave) (int signum);
282  char *formatted_page = NULL;
283  int arg_index = 1;
284
285  if (formatter_args[0] == (char *)NULL)
286    formatter_args[0] = find_man_formatter ();
287
288  if (formatter_args[0] == (char *)NULL)
289    return ((char *)NULL);
290
291  get_page_and_section (pagename);
292
293  if (manpage_section != (char *)NULL)
294    formatter_args[arg_index++] = manpage_section;
295
296  formatter_args[arg_index++] = manpage_pagename;
297  formatter_args[arg_index] = (char *)NULL;
298
299  /* Open a pipe to this program, read the output, and save it away
300     in FORMATTED_PAGE.  The reader end of the pipe is pipes[0]; the
301     writer end is pipes[1]. */
302#if PIPE_USE_FORK
303  pipe (pipes);
304
305  sigsave = signal (SIGCHLD, reap_children);
306
307  child = fork ();
308  if (child == -1)
309    return ((char *)NULL);
310
311  if (child != 0)
312    {
313      /* In the parent, close the writing end of the pipe, and read from
314         the exec'd child. */
315      close (pipes[1]);
316      formatted_page = read_from_fd (pipes[0]);
317      close (pipes[0]);
318      signal (SIGCHLD, sigsave);
319    }
320  else
321    { /* In the child, close the read end of the pipe, make the write end
322         of the pipe be stdout, and execute the man page formatter. */
323      close (pipes[0]);
324      freopen (NULL_DEVICE, "w", stderr);
325      freopen (NULL_DEVICE, "r", stdin);
326      dup2 (pipes[1], fileno (stdout));
327
328      execv (formatter_args[0], formatter_args);
329
330      /* If we get here, we couldn't exec, so close out the pipe and
331         exit. */
332      close (pipes[1]);
333      xexit (0);
334    }
335#else  /* !PIPE_USE_FORK */
336  /* Cannot fork/exec, but can popen/pclose.  */
337  {
338    FILE *fpipe;
339    char *cmdline = xmalloc (strlen (formatter_args[0])
340			     + strlen (manpage_pagename)
341			     + (arg_index > 2 ? strlen (manpage_section) : 0)
342 			     + 3);
343    int save_stderr = dup (fileno (stderr));
344    int fd_err = open (NULL_DEVICE, O_WRONLY, 0666);
345
346    if (fd_err > 2)
347      dup2 (fd_err, fileno (stderr)); /* Don't print errors. */
348    sprintf (cmdline, "%s %s %s", formatter_args[0], manpage_pagename,
349				  arg_index > 2 ? manpage_section : "");
350    fpipe = popen (cmdline, "r");
351    free (cmdline);
352    if (fd_err > 2)
353      close (fd_err);
354    dup2 (save_stderr, fileno (stderr));
355    if (fpipe == 0)
356      return ((char *)NULL);
357    formatted_page = read_from_fd (fileno (fpipe));
358    if (pclose (fpipe) == -1)
359      {
360	if (formatted_page)
361	  free (formatted_page);
362	return ((char *)NULL);
363      }
364  }
365#endif /* !PIPE_USE_FORK */
366
367  /* If we have the page, then clean it up. */
368  if (formatted_page)
369    clean_manpage (formatted_page);
370
371  return (formatted_page);
372}
373
374static void
375clean_manpage (char *manpage)
376{
377  register int i, j;
378  int newline_count = 0;
379  char *newpage;
380
381  newpage = (char *)xmalloc (1 + strlen (manpage));
382
383  for (i = 0, j = 0; (newpage[j] = manpage[i]); i++, j++)
384    {
385      if (manpage[i] == '\n')
386        newline_count++;
387      else
388        newline_count = 0;
389
390      if (newline_count == 3)
391        {
392          j--;
393          newline_count--;
394        }
395
396      /* A malformed man page could have a \b as its first character,
397         in which case decrementing j by 2 will cause us to write into
398         newpage[-1], smashing the hidden info stored there by malloc.  */
399      if (manpage[i] == '\b' || (manpage[i] == '\f' && j > 0))
400        j -= 2;
401      else if (!raw_escapes_p)
402	{
403	  /* Remove the ANSI escape sequences for color, boldface,
404	     underlining, and italics, generated by some versions of
405	     Groff.  */
406	  if (manpage[i] == '\033' && manpage[i + 1] == '['
407	      && isdigit (manpage[i + 2]))
408	    {
409	      if (isdigit (manpage[i + 3]) && manpage[i + 4] == 'm')
410		{
411		  i += 4;
412		  j--;
413		}
414	      else if (manpage[i + 3] == 'm')
415		{
416		  i += 3;
417		  j--;
418		}
419	      /* Else do nothing: it's some unknown escape sequence,
420		 so let's leave it alone.  */
421	    }
422	}
423    }
424
425  newpage[j++] = 0;
426
427  strcpy (manpage, newpage);
428  free (newpage);
429}
430
431static NODE *
432manpage_node_of_file_buffer (FILE_BUFFER *file_buffer, char *pagename)
433{
434  NODE *node = (NODE *)NULL;
435  TAG *tag = (TAG *)NULL;
436
437  if (file_buffer->contents)
438    {
439      register int i;
440
441      for (i = 0; (tag = file_buffer->tags[i]); i++)
442        {
443          if (strcasecmp (pagename, tag->nodename) == 0)
444            break;
445        }
446    }
447
448  if (tag)
449    {
450      node = (NODE *)xmalloc (sizeof (NODE));
451      node->filename = file_buffer->filename;
452      node->nodename = xstrdup (tag->nodename);
453      node->contents = file_buffer->contents + tag->nodestart;
454      node->nodelen = tag->nodelen;
455      node->flags    = 0;
456      node->display_pos = 0;
457      node->parent   = (char *)NULL;
458      node->flags = (N_HasTagsTable | N_IsManPage);
459      node->contents += skip_node_separator (node->contents);
460    }
461
462  return (node);
463}
464
465static char *
466read_from_fd (int fd)
467{
468  struct timeval timeout;
469  char *buffer = (char *)NULL;
470  int bsize = 0;
471  int bindex = 0;
472  int select_result;
473#if defined (FD_SET)
474  fd_set read_fds;
475
476  timeout.tv_sec = 15;
477  timeout.tv_usec = 0;
478
479  FD_ZERO (&read_fds);
480  FD_SET (fd, &read_fds);
481
482  select_result = select (fd + 1, fd_set_cast (&read_fds), 0, 0, &timeout);
483#else /* !FD_SET */
484  select_result = 1;
485#endif /* !FD_SET */
486
487  switch (select_result)
488    {
489    case 0:
490    case -1:
491      break;
492
493    default:
494      {
495        int amount_read;
496        int done = 0;
497
498        while (!done)
499          {
500            while ((bindex + 1024) > (bsize))
501              buffer = (char *)xrealloc (buffer, (bsize += 1024));
502            buffer[bindex] = '\0';
503
504            amount_read = read (fd, buffer + bindex, 1023);
505
506            if (amount_read < 0)
507              {
508                done = 1;
509              }
510            else
511              {
512                bindex += amount_read;
513                buffer[bindex] = '\0';
514                if (amount_read == 0)
515                  done = 1;
516              }
517          }
518      }
519    }
520
521  if ((buffer != (char *)NULL) && (*buffer == '\0'))
522    {
523      free (buffer);
524      buffer = (char *)NULL;
525    }
526
527  return (buffer);
528}
529
530static char *reference_section_starters[] =
531{
532  "\nRELATED INFORMATION",
533  "\nRELATED\tINFORMATION",
534  "RELATED INFORMATION\n",
535  "RELATED\tINFORMATION\n",
536  "\nSEE ALSO",
537  "\nSEE\tALSO",
538  "SEE ALSO\n",
539  "SEE\tALSO\n",
540  (char *)NULL
541};
542
543static SEARCH_BINDING frs_binding;
544
545static SEARCH_BINDING *
546find_reference_section (NODE *node)
547{
548  register int i;
549  long position = -1;
550
551  frs_binding.buffer = node->contents;
552  frs_binding.start = 0;
553  frs_binding.end = node->nodelen;
554  frs_binding.flags = S_SkipDest;
555
556  for (i = 0; reference_section_starters[i] != (char *)NULL; i++)
557    {
558      position = search_forward (reference_section_starters[i], &frs_binding);
559      if (position != -1)
560        break;
561    }
562
563  if (position == -1)
564    return ((SEARCH_BINDING *)NULL);
565
566  /* We found the start of the reference section, and point is right after
567     the string which starts it.  The text from here to the next header
568     (or end of buffer) contains the only references in this manpage. */
569  frs_binding.start = position;
570
571  for (i = frs_binding.start; i < frs_binding.end - 2; i++)
572    {
573      if ((frs_binding.buffer[i] == '\n') &&
574          (!whitespace (frs_binding.buffer[i + 1])))
575        {
576          frs_binding.end = i;
577          break;
578        }
579    }
580
581  return (&frs_binding);
582}
583
584REFERENCE **
585xrefs_of_manpage (NODE *node)
586{
587  SEARCH_BINDING *reference_section;
588  REFERENCE **refs = (REFERENCE **)NULL;
589  int refs_index = 0;
590  int refs_slots = 0;
591  long position;
592
593  reference_section = find_reference_section (node);
594
595  if (reference_section == (SEARCH_BINDING *)NULL)
596    return ((REFERENCE **)NULL);
597
598  /* Grovel the reference section building a list of references found there.
599     A reference is alphabetic characters followed by non-whitespace text
600     within parenthesis. */
601  reference_section->flags = 0;
602
603  while ((position = search_forward ("(", reference_section)) != -1)
604    {
605      register int start, end;
606
607      for (start = position; start > reference_section->start; start--)
608        if (whitespace (reference_section->buffer[start]))
609          break;
610
611      start++;
612
613      for (end = position; end < reference_section->end; end++)
614        {
615          if (whitespace (reference_section->buffer[end]))
616            {
617              end = start;
618              break;
619            }
620
621          if (reference_section->buffer[end] == ')')
622            {
623              end++;
624              break;
625            }
626        }
627
628      if (end != start)
629        {
630          REFERENCE *entry;
631          int len = end - start;
632
633          entry = (REFERENCE *)xmalloc (sizeof (REFERENCE));
634          entry->label = (char *)xmalloc (1 + len);
635          strncpy (entry->label, (reference_section->buffer) + start, len);
636          entry->label[len] = '\0';
637          entry->filename = xstrdup (node->filename);
638          entry->nodename = xstrdup (entry->label);
639          entry->start = start;
640          entry->end = end;
641
642          add_pointer_to_array
643            (entry, refs_index, refs, refs_slots, 10, REFERENCE *);
644        }
645
646      reference_section->start = position + 1;
647    }
648
649  return (refs);
650}
651
652long
653locate_manpage_xref (NODE *node, long int start, int dir)
654{
655  REFERENCE **refs;
656  long position = -1;
657
658  refs = xrefs_of_manpage (node);
659
660  if (refs)
661    {
662      register int i, count;
663      REFERENCE *entry;
664
665      for (i = 0; refs[i]; i++);
666      count = i;
667
668      if (dir > 0)
669        {
670          for (i = 0; (entry = refs[i]); i++)
671            if (entry->start > start)
672              {
673                position = entry->start;
674                break;
675              }
676        }
677      else
678        {
679          for (i = count - 1; i > -1; i--)
680            {
681              entry = refs[i];
682
683              if (entry->start < start)
684                {
685                  position = entry->start;
686                  break;
687                }
688            }
689        }
690
691      info_free_references (refs);
692    }
693  return (position);
694}
695
696/* This one was a little tricky.  The binding buffer that is passed in has
697   a START and END value of 0 -- strlen (window-line-containing-point).
698   The BUFFER is a pointer to the start of that line. */
699REFERENCE **
700manpage_xrefs_in_binding (NODE *node, SEARCH_BINDING *binding)
701{
702  register int i;
703  REFERENCE **all_refs = xrefs_of_manpage (node);
704  REFERENCE **brefs = (REFERENCE **)NULL;
705  REFERENCE *entry;
706  int brefs_index = 0;
707  int brefs_slots = 0;
708  int start, end;
709
710  if (!all_refs)
711    return ((REFERENCE **)NULL);
712
713  start = binding->start + (binding->buffer - node->contents);
714  end = binding->end + (binding->buffer - node->contents);
715
716  for (i = 0; (entry = all_refs[i]); i++)
717    {
718      if ((entry->start > start) && (entry->end < end))
719        {
720          add_pointer_to_array
721            (entry, brefs_index, brefs, brefs_slots, 10, REFERENCE *);
722        }
723      else
724        {
725          maybe_free (entry->label);
726          maybe_free (entry->filename);
727          maybe_free (entry->nodename);
728          free (entry);
729        }
730    }
731
732  free (all_refs);
733  return (brefs);
734}
735