1// -*- C++ -*-
2/* Copyright (C) 1989-1992, 2000, 2001, 2002, 2004
3   Free Software Foundation, Inc.
4     Written by James Clark (jjc@jclark.com)
5
6This file is part of groff.
7
8groff is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 2, or (at your option) any later
11version.
12
13groff is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License along
19with groff; see the file COPYING.  If not, write to the Free Software
20Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
21
22#include "refer.h"
23#include "refid.h"
24#include "ref.h"
25#include "token.h"
26#include "search.h"
27#include "command.h"
28
29extern "C" const char *Version_string;
30
31const char PRE_LABEL_MARKER = '\013';
32const char POST_LABEL_MARKER = '\014';
33const char LABEL_MARKER = '\015'; // label_type is added on
34
35#define FORCE_LEFT_BRACKET 04
36#define FORCE_RIGHT_BRACKET 010
37
38static FILE *outfp = stdout;
39
40string capitalize_fields;
41string reverse_fields;
42string abbreviate_fields;
43string period_before_last_name = ". ";
44string period_before_initial = ".";
45string period_before_hyphen = "";
46string period_before_other = ". ";
47string sort_fields;
48int annotation_field = -1;
49string annotation_macro;
50string discard_fields = "XYZ";
51string pre_label = "\\*([.";
52string post_label = "\\*(.]";
53string sep_label = ", ";
54int accumulate = 0;
55int move_punctuation = 0;
56int abbreviate_label_ranges = 0;
57string label_range_indicator;
58int label_in_text = 1;
59int label_in_reference = 1;
60int date_as_label = 0;
61int sort_adjacent_labels = 0;
62// Join exactly two authors with this.
63string join_authors_exactly_two = " and ";
64// When there are more than two authors join the last two with this.
65string join_authors_last_two = ", and ";
66// Otherwise join authors with this.
67string join_authors_default = ", ";
68string separate_label_second_parts = ", ";
69// Use this string to represent that there are other authors.
70string et_al = " et al";
71// Use et al only if it can replace at least this many authors.
72int et_al_min_elide = 2;
73// Use et al only if the total number of authors is at least this.
74int et_al_min_total = 3;
75
76
77int compatible_flag = 0;
78
79int short_label_flag = 0;
80
81static int recognize_R1_R2 = 1;
82
83search_list database_list;
84int search_default = 1;
85static int default_database_loaded = 0;
86
87static reference **citation = 0;
88static int ncitations = 0;
89static int citation_max = 0;
90
91static reference **reference_hash_table = 0;
92static int hash_table_size;
93static int nreferences = 0;
94
95static int need_syncing = 0;
96string pending_line;
97string pending_lf_lines;
98
99static void output_pending_line();
100static unsigned immediately_handle_reference(const string &);
101static void immediately_output_references();
102static unsigned store_reference(const string &);
103static void divert_to_temporary_file();
104static reference *make_reference(const string &, unsigned *);
105static void usage(FILE *stream);
106static void do_file(const char *);
107static void split_punct(string &line, string &punct);
108static void output_citation_group(reference **v, int n, label_type, FILE *fp);
109static void possibly_load_default_database();
110
111int main(int argc, char **argv)
112{
113  program_name = argv[0];
114  static char stderr_buf[BUFSIZ];
115  setbuf(stderr, stderr_buf);
116  outfp = stdout;
117  int finished_options = 0;
118  int bib_flag = 0;
119  int done_spec = 0;
120
121  for (--argc, ++argv;
122       !finished_options && argc > 0 && argv[0][0] == '-'
123       && argv[0][1] != '\0';
124       argv++, argc--) {
125    const char *opt = argv[0] + 1;
126    while (opt != 0 && *opt != '\0') {
127      switch (*opt) {
128      case 'C':
129	compatible_flag = 1;
130	opt++;
131	break;
132      case 'B':
133	bib_flag = 1;
134	label_in_reference = 0;
135	label_in_text = 0;
136	++opt;
137	if (*opt == '\0') {
138	  annotation_field = 'X';
139	  annotation_macro = "AP";
140	}
141	else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
142	  annotation_field = opt[0];
143	  annotation_macro = opt + 2;
144	}
145	opt = 0;
146	break;
147      case 'P':
148	move_punctuation = 1;
149	opt++;
150	break;
151      case 'R':
152	recognize_R1_R2 = 0;
153	opt++;
154	break;
155      case 'S':
156	// Not a very useful spec.
157	set_label_spec("(A.n|Q)', '(D.y|D)");
158	done_spec = 1;
159	pre_label = " (";
160	post_label = ")";
161	sep_label = "; ";
162	opt++;
163	break;
164      case 'V':
165	verify_flag = 1;
166	opt++;
167	break;
168      case 'f':
169	{
170	  const char *num = 0;
171	  if (*++opt == '\0') {
172	    if (argc > 1) {
173	      num = *++argv;
174	      --argc;
175	    }
176	    else {
177	      error("option `f' requires an argument");
178	      usage(stderr);
179	      exit(1);
180	    }
181	  }
182	  else {
183	    num = opt;
184	    opt = 0;
185	  }
186	  const char *ptr;
187	  for (ptr = num; *ptr; ptr++)
188	    if (!csdigit(*ptr)) {
189	      error("bad character `%1' in argument to -f option", *ptr);
190	      break;
191	    }
192	  if (*ptr == '\0') {
193	    string spec;
194	    spec = '%';
195	    spec += num;
196	    spec += '\0';
197	    set_label_spec(spec.contents());
198	    done_spec = 1;
199	  }
200	  break;
201	}
202      case 'b':
203	label_in_text = 0;
204	label_in_reference = 0;
205	opt++;
206	break;
207      case 'e':
208	accumulate = 1;
209	opt++;
210	break;
211      case 'c':
212	capitalize_fields = ++opt;
213	opt = 0;
214	break;
215      case 'k':
216	{
217	  char buf[5];
218	  if (csalpha(*++opt))
219	    buf[0] = *opt++;
220	  else {
221	    if (*opt != '\0')
222	      error("bad field name `%1'", *opt++);
223	    buf[0] = 'L';
224	  }
225	  buf[1] = '~';
226	  buf[2] = '%';
227	  buf[3] = 'a';
228	  buf[4] = '\0';
229	  set_label_spec(buf);
230	  done_spec = 1;
231	}
232	break;
233      case 'a':
234	{
235	  const char *ptr;
236	  for (ptr = ++opt; *ptr; ptr++)
237	    if (!csdigit(*ptr)) {
238	      error("argument to `a' option not a number");
239	      break;
240	    }
241	  if (*ptr == '\0') {
242	    reverse_fields = 'A';
243	    reverse_fields += opt;
244	  }
245	  opt = 0;
246	}
247	break;
248      case 'i':
249	linear_ignore_fields = ++opt;
250	opt = 0;
251	break;
252      case 'l':
253	{
254	  char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
255	  strcpy(buf, "A.n");
256	  if (*++opt != '\0' && *opt != ',') {
257	    char *ptr;
258	    long n = strtol(opt, &ptr, 10);
259	    if (n == 0 && ptr == opt) {
260	      error("bad integer `%1' in `l' option", opt);
261	      opt = 0;
262	      break;
263	    }
264	    if (n < 0)
265	      n = 0;
266	    opt = ptr;
267	    sprintf(strchr(buf, '\0'), "+%ld", n);
268	  }
269	  strcat(buf, "D.y");
270	  if (*opt == ',')
271	    opt++;
272	  if (*opt != '\0') {
273	    char *ptr;
274	    long n = strtol(opt, &ptr, 10);
275	    if (n == 0 && ptr == opt) {
276	      error("bad integer `%1' in `l' option", opt);
277	      opt = 0;
278	      break;
279	    }
280	    if (n < 0)
281	      n = 0;
282	    sprintf(strchr(buf, '\0'), "-%ld", n);
283	    opt = ptr;
284	    if (*opt != '\0')
285	      error("argument to `l' option not of form `m,n'");
286	  }
287	  strcat(buf, "%a");
288	  if (!set_label_spec(buf))
289	    assert(0);
290	  done_spec = 1;
291	}
292	break;
293      case 'n':
294	search_default = 0;
295	opt++;
296	break;
297      case 'p':
298	{
299	  const char *filename = 0;
300	  if (*++opt == '\0') {
301	    if (argc > 1) {
302	      filename = *++argv;
303	      argc--;
304	    }
305	    else {
306	      error("option `p' requires an argument");
307	      usage(stderr);
308	      exit(1);
309	    }
310	  }
311	  else {
312	    filename = opt;
313	    opt = 0;
314	  }
315	  database_list.add_file(filename);
316	}
317	break;
318      case 's':
319	if (*++opt == '\0')
320	  sort_fields = "AD";
321	else {
322	  sort_fields = opt;
323	  opt = 0;
324	}
325	accumulate = 1;
326	break;
327      case 't':
328	{
329	  char *ptr;
330	  long n = strtol(opt, &ptr, 10);
331	  if (n == 0 && ptr == opt) {
332	    error("bad integer `%1' in `t' option", opt);
333	    opt = 0;
334	    break;
335	  }
336	  if (n < 1)
337	    n = 1;
338	  linear_truncate_len = int(n);
339	  opt = ptr;
340	  break;
341	}
342      case '-':
343	if (opt[1] == '\0') {
344	  finished_options = 1;
345	  opt++;
346	  break;
347	}
348	if (strcmp(opt,"-version")==0) {
349      case 'v':
350	  printf("GNU refer (groff) version %s\n", Version_string);
351	  exit(0);
352	  break;
353	}
354	if (strcmp(opt,"-help")==0) {
355	  usage(stdout);
356	  exit(0);
357	  break;
358	}
359	// fall through
360      default:
361	error("unrecognized option `%1'", *opt);
362	usage(stderr);
363	exit(1);
364	break;
365      }
366    }
367  }
368  if (!done_spec)
369    set_label_spec("%1");
370  if (argc <= 0) {
371    if (bib_flag)
372      do_bib("-");
373    else
374      do_file("-");
375  }
376  else {
377    for (int i = 0; i < argc; i++) {
378      if (bib_flag)
379	do_bib(argv[i]);
380      else
381	do_file(argv[i]);
382    }
383  }
384  if (accumulate)
385    output_references();
386  if (fflush(stdout) < 0)
387    fatal("output error");
388  return 0;
389}
390
391static void usage(FILE *stream)
392{
393  fprintf(stream,
394"usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
395"       [-sXYZ] [-tN] [-BL.M] [files ...]\n",
396	  program_name);
397}
398
399static void possibly_load_default_database()
400{
401  if (search_default && !default_database_loaded) {
402    char *filename = getenv("REFER");
403    if (filename)
404      database_list.add_file(filename);
405    else
406      database_list.add_file(DEFAULT_INDEX, 1);
407    default_database_loaded = 1;
408  }
409}
410
411static int is_list(const string &str)
412{
413  const char *start = str.contents();
414  const char *end = start + str.length();
415  while (end > start && csspace(end[-1]))
416    end--;
417  while (start < end && csspace(*start))
418    start++;
419  return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
420}
421
422static void do_file(const char *filename)
423{
424  FILE *fp;
425  if (strcmp(filename, "-") == 0) {
426    fp = stdin;
427  }
428  else {
429    errno = 0;
430    fp = fopen(filename, "r");
431    if (fp == 0) {
432      error("can't open `%1': %2", filename, strerror(errno));
433      return;
434    }
435  }
436  current_filename = filename;
437  fprintf(outfp, ".lf 1 %s\n", filename);
438  string line;
439  current_lineno = 0;
440  for (;;) {
441    line.clear();
442    for (;;) {
443      int c = getc(fp);
444      if (c == EOF) {
445	if (line.length() > 0)
446	  line += '\n';
447	break;
448      }
449      if (invalid_input_char(c))
450	error("invalid input character code %1", c);
451      else {
452	line += c;
453	if (c == '\n')
454	  break;
455      }
456    }
457    int len = line.length();
458    if (len == 0)
459      break;
460    current_lineno++;
461    if (len >= 2 && line[0] == '.' && line[1] == '[') {
462      int start_lineno = current_lineno;
463      int start_of_line = 1;
464      string str;
465      string post;
466      string pre(line.contents() + 2, line.length() - 3);
467      for (;;) {
468	int c = getc(fp);
469	if (c == EOF) {
470	  error_with_file_and_line(current_filename, start_lineno,
471				   "missing `.]' line");
472	  break;
473	}
474	if (start_of_line)
475	  current_lineno++;
476	if (start_of_line && c == '.') {
477	  int d = getc(fp);
478	  if (d == ']') {
479	    while ((d = getc(fp)) != '\n' && d != EOF) {
480	      if (invalid_input_char(d))
481		error("invalid input character code %1", d);
482	      else
483		post += d;
484	    }
485	    break;
486	  }
487	  if (d != EOF)
488	    ungetc(d, fp);
489	}
490	if (invalid_input_char(c))
491	  error("invalid input character code %1", c);
492	else
493	  str += c;
494	start_of_line = (c == '\n');
495      }
496      if (is_list(str)) {
497	output_pending_line();
498	if (accumulate)
499	  output_references();
500	else
501	  error("found `$LIST$' but not accumulating references");
502      }
503      else {
504	unsigned flags = (accumulate
505			  ? store_reference(str)
506			  : immediately_handle_reference(str));
507	if (label_in_text) {
508	  if (accumulate && outfp == stdout)
509	    divert_to_temporary_file();
510	  if (pending_line.length() == 0) {
511	    warning("can't attach citation to previous line");
512	  }
513	  else
514	    pending_line.set_length(pending_line.length() - 1);
515	  string punct;
516	  if (move_punctuation)
517	    split_punct(pending_line, punct);
518	  int have_text = pre.length() > 0 || post.length() > 0;
519	  label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
520					       |FORCE_RIGHT_BRACKET));
521	  if ((flags & FORCE_LEFT_BRACKET) || !have_text)
522	    pending_line += PRE_LABEL_MARKER;
523	  pending_line += pre;
524	  char lm = LABEL_MARKER + (int)lt;
525	  pending_line += lm;
526	  pending_line += post;
527	  if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
528	    pending_line += POST_LABEL_MARKER;
529	  pending_line += punct;
530	  pending_line += '\n';
531	}
532      }
533      need_syncing = 1;
534    }
535    else if (len >= 4
536	     && line[0] == '.' && line[1] == 'l' && line[2] == 'f'
537	     && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
538      pending_lf_lines += line;
539      line += '\0';
540      if (interpret_lf_args(line.contents() + 3))
541	current_lineno--;
542    }
543    else if (recognize_R1_R2
544	     && len >= 4
545	     && line[0] == '.' && line[1] == 'R' && line[2] == '1'
546	     && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
547      line.clear();
548      int start_of_line = 1;
549      int start_lineno = current_lineno;
550      for (;;) {
551	int c = getc(fp);
552	if (c != EOF && start_of_line)
553	  current_lineno++;
554	if (start_of_line && c == '.') {
555	  c = getc(fp);
556	  if (c == 'R') {
557	    c = getc(fp);
558	    if (c == '2') {
559	      c = getc(fp);
560	      if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
561		while (c != EOF && c != '\n')
562		  c = getc(fp);
563		break;
564	      }
565	      else {
566		line += '.';
567		line += 'R';
568		line += '2';
569	      }
570	    }
571	    else {
572	      line += '.';
573	      line += 'R';
574	    }
575	  }
576	  else
577	    line += '.';
578	}
579	if (c == EOF) {
580	  error_with_file_and_line(current_filename, start_lineno,
581				   "missing `.R2' line");
582	  break;
583	}
584	if (invalid_input_char(c))
585	  error("invalid input character code %1", int(c));
586	else {
587	  line += c;
588	  start_of_line = c == '\n';
589	}
590      }
591      output_pending_line();
592      if (accumulate)
593	output_references();
594      else
595	nreferences = 0;
596      process_commands(line, current_filename, start_lineno + 1);
597      need_syncing = 1;
598    }
599    else {
600      output_pending_line();
601      pending_line = line;
602    }
603  }
604  need_syncing = 0;
605  output_pending_line();
606  if (fp != stdin)
607    fclose(fp);
608}
609
610class label_processing_state {
611  enum {
612    NORMAL,
613    PENDING_LABEL,
614    PENDING_LABEL_POST,
615    PENDING_LABEL_POST_PRE,
616    PENDING_POST
617    } state;
618  label_type type;		// type of pending labels
619  int count;			// number of pending labels
620  reference **rptr;		// pointer to next reference
621  int rcount;			// number of references left
622  FILE *fp;
623  int handle_pending(int c);
624public:
625  label_processing_state(reference **, int, FILE *);
626  ~label_processing_state();
627  void process(int c);
628};
629
630static void output_pending_line()
631{
632  if (label_in_text && !accumulate && ncitations > 0) {
633    label_processing_state state(citation, ncitations, outfp);
634    int len = pending_line.length();
635    for (int i = 0; i < len; i++)
636      state.process((unsigned char)(pending_line[i]));
637  }
638  else
639    put_string(pending_line, outfp);
640  pending_line.clear();
641  if (pending_lf_lines.length() > 0) {
642    put_string(pending_lf_lines, outfp);
643    pending_lf_lines.clear();
644  }
645  if (!accumulate)
646    immediately_output_references();
647  if (need_syncing) {
648    fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
649    need_syncing = 0;
650  }
651}
652
653static void split_punct(string &line, string &punct)
654{
655  const char *start = line.contents();
656  const char *end = start + line.length();
657  const char *ptr = start;
658  const char *last_token_start = 0;
659  for (;;) {
660    if (ptr >= end)
661      break;
662    last_token_start = ptr;
663    if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
664	|| (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES))
665      ptr++;
666    else if (!get_token(&ptr, end))
667      break;
668  }
669  if (last_token_start) {
670    const token_info *ti = lookup_token(last_token_start, end);
671    if (ti->is_punct()) {
672      punct.append(last_token_start, end - last_token_start);
673      line.set_length(last_token_start - start);
674    }
675  }
676}
677
678static void divert_to_temporary_file()
679{
680  outfp = xtmpfile();
681}
682
683static void store_citation(reference *ref)
684{
685  if (ncitations >= citation_max) {
686    if (citation == 0)
687      citation = new reference*[citation_max = 100];
688    else {
689      reference **old_citation = citation;
690      citation_max *= 2;
691      citation = new reference *[citation_max];
692      memcpy(citation, old_citation, ncitations*sizeof(reference *));
693      a_delete old_citation;
694    }
695  }
696  citation[ncitations++] = ref;
697}
698
699static unsigned store_reference(const string &str)
700{
701  if (reference_hash_table == 0) {
702    reference_hash_table = new reference *[17];
703    hash_table_size = 17;
704    for (int i = 0; i < hash_table_size; i++)
705      reference_hash_table[i] = 0;
706  }
707  unsigned flags;
708  reference *ref = make_reference(str, &flags);
709  ref->compute_hash_code();
710  unsigned h = ref->hash();
711  reference **ptr;
712  for (ptr = reference_hash_table + (h % hash_table_size);
713       *ptr != 0;
714       ((ptr == reference_hash_table)
715	? (ptr = reference_hash_table + hash_table_size - 1)
716	: --ptr))
717    if (same_reference(**ptr, *ref))
718      break;
719  if (*ptr != 0) {
720    if (ref->is_merged())
721      warning("fields ignored because reference already used");
722    delete ref;
723    ref = *ptr;
724  }
725  else {
726    *ptr = ref;
727    ref->set_number(nreferences);
728    nreferences++;
729    ref->pre_compute_label();
730    ref->compute_sort_key();
731    if (nreferences*2 >= hash_table_size) {
732      // Rehash it.
733      reference **old_table = reference_hash_table;
734      int old_size = hash_table_size;
735      hash_table_size = next_size(hash_table_size);
736      reference_hash_table = new reference*[hash_table_size];
737      int i;
738      for (i = 0; i < hash_table_size; i++)
739	reference_hash_table[i] = 0;
740      for (i = 0; i < old_size; i++)
741	if (old_table[i]) {
742	  reference **p;
743	  for (p = (reference_hash_table
744				+ (old_table[i]->hash() % hash_table_size));
745	       *p;
746	       ((p == reference_hash_table)
747		? (p = reference_hash_table + hash_table_size - 1)
748		: --p))
749	    ;
750	  *p = old_table[i];
751	}
752      a_delete old_table;
753    }
754  }
755  if (label_in_text)
756    store_citation(ref);
757  return flags;
758}
759
760unsigned immediately_handle_reference(const string &str)
761{
762  unsigned flags;
763  reference *ref = make_reference(str, &flags);
764  ref->set_number(nreferences);
765  if (label_in_text || label_in_reference) {
766    ref->pre_compute_label();
767    ref->immediate_compute_label();
768  }
769  nreferences++;
770  store_citation(ref);
771  return flags;
772}
773
774static void immediately_output_references()
775{
776  for (int i = 0; i < ncitations; i++) {
777    reference *ref = citation[i];
778    if (label_in_reference) {
779      fputs(".ds [F ", outfp);
780      const string &label = ref->get_label(NORMAL_LABEL);
781      if (label.length() > 0
782	  && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
783	putc('"', outfp);
784      put_string(label, outfp);
785      putc('\n', outfp);
786    }
787    ref->output(outfp);
788    delete ref;
789  }
790  ncitations = 0;
791}
792
793static void output_citation_group(reference **v, int n, label_type type,
794				  FILE *fp)
795{
796  if (sort_adjacent_labels) {
797    // Do an insertion sort.  Usually n will be very small.
798    for (int i = 1; i < n; i++) {
799      int num = v[i]->get_number();
800      reference *temp = v[i];
801      int j;
802      for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
803	v[j + 1] = v[j];
804      v[j + 1] = temp;
805    }
806  }
807  // This messes up if !accumulate.
808  if (accumulate && n > 1) {
809    // remove duplicates
810    int j = 1;
811    for (int i = 1; i < n; i++)
812      if (v[i]->get_label(type) != v[i - 1]->get_label(type))
813	v[j++] = v[i];
814    n = j;
815  }
816  string merged_label;
817  for (int i = 0; i < n; i++) {
818    int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label);
819    if (nmerged > 0) {
820      put_string(merged_label, fp);
821      i += nmerged;
822    }
823    else
824      put_string(v[i]->get_label(type), fp);
825    if (i < n - 1)
826      put_string(sep_label, fp);
827  }
828}
829
830
831label_processing_state::label_processing_state(reference **p, int n, FILE *f)
832: state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
833{
834}
835
836label_processing_state::~label_processing_state()
837{
838  int handled = handle_pending(EOF);
839  assert(!handled);
840  assert(rcount == 0);
841}
842
843int label_processing_state::handle_pending(int c)
844{
845  switch (state) {
846  case NORMAL:
847    break;
848  case PENDING_LABEL:
849    if (c == POST_LABEL_MARKER) {
850      state = PENDING_LABEL_POST;
851      return 1;
852    }
853    else {
854      output_citation_group(rptr, count, type, fp);
855      rptr += count ;
856      rcount -= count;
857      state = NORMAL;
858    }
859    break;
860  case PENDING_LABEL_POST:
861    if (c == PRE_LABEL_MARKER) {
862      state = PENDING_LABEL_POST_PRE;
863      return 1;
864    }
865    else {
866      output_citation_group(rptr, count, type, fp);
867      rptr += count;
868      rcount -= count;
869      put_string(post_label, fp);
870      state = NORMAL;
871    }
872    break;
873  case PENDING_LABEL_POST_PRE:
874    if (c >= LABEL_MARKER
875	&& c < LABEL_MARKER + N_LABEL_TYPES
876	&& c - LABEL_MARKER == type) {
877      count += 1;
878      state = PENDING_LABEL;
879      return 1;
880    }
881    else {
882      output_citation_group(rptr, count, type, fp);
883      rptr += count;
884      rcount -= count;
885      put_string(sep_label, fp);
886      state = NORMAL;
887    }
888    break;
889  case PENDING_POST:
890    if (c == PRE_LABEL_MARKER) {
891      put_string(sep_label, fp);
892      state = NORMAL;
893      return 1;
894    }
895    else {
896      put_string(post_label, fp);
897      state = NORMAL;
898    }
899    break;
900  }
901  return 0;
902}
903
904void label_processing_state::process(int c)
905{
906  if (handle_pending(c))
907    return;
908  assert(state == NORMAL);
909  switch (c) {
910  case PRE_LABEL_MARKER:
911    put_string(pre_label, fp);
912    state = NORMAL;
913    break;
914  case POST_LABEL_MARKER:
915    state = PENDING_POST;
916    break;
917  case LABEL_MARKER:
918  case LABEL_MARKER + 1:
919    count = 1;
920    state = PENDING_LABEL;
921    type = label_type(c - LABEL_MARKER);
922    break;
923  default:
924    state = NORMAL;
925    putc(c, fp);
926    break;
927  }
928}
929
930extern "C" {
931
932int rcompare(const void *p1, const void *p2)
933{
934  return compare_reference(**(reference **)p1, **(reference **)p2);
935}
936
937}
938
939void output_references()
940{
941  assert(accumulate);
942  if (!hash_table_size) {
943    error("nothing to reference (probably `bibliography' before `sort')");
944    accumulate = 0;
945    nreferences = 0;
946    return;
947  }
948  if (nreferences > 0) {
949    int j = 0;
950    int i;
951    for (i = 0; i < hash_table_size; i++)
952      if (reference_hash_table[i] != 0)
953	reference_hash_table[j++] = reference_hash_table[i];
954    assert(j == nreferences);
955    for (; j < hash_table_size; j++)
956      reference_hash_table[j] = 0;
957    qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare);
958    for (i = 0; i < nreferences; i++)
959      reference_hash_table[i]->set_number(i);
960    compute_labels(reference_hash_table, nreferences);
961  }
962  if (outfp != stdout) {
963    rewind(outfp);
964    {
965      label_processing_state state(citation, ncitations, stdout);
966      int c;
967      while ((c = getc(outfp)) != EOF)
968	state.process(c);
969    }
970    ncitations = 0;
971    fclose(outfp);
972    outfp = stdout;
973  }
974  if (nreferences > 0) {
975    fputs(".]<\n", outfp);
976    for (int i = 0; i < nreferences; i++) {
977      if (sort_fields.length() > 0)
978	reference_hash_table[i]->print_sort_key_comment(outfp);
979      if (label_in_reference) {
980	fputs(".ds [F ", outfp);
981	const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL);
982	if (label.length() > 0
983	    && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
984	  putc('"', outfp);
985	put_string(label, outfp);
986	putc('\n', outfp);
987      }
988      reference_hash_table[i]->output(outfp);
989      delete reference_hash_table[i];
990      reference_hash_table[i] = 0;
991    }
992    fputs(".]>\n", outfp);
993    nreferences = 0;
994  }
995  clear_labels();
996}
997
998static reference *find_reference(const char *query, int query_len)
999{
1000  // This is so that error messages look better.
1001  while (query_len > 0 && csspace(query[query_len - 1]))
1002    query_len--;
1003  string str;
1004  for (int i = 0; i < query_len; i++)
1005    str += query[i] == '\n' ? ' ' : query[i];
1006  str += '\0';
1007  possibly_load_default_database();
1008  search_list_iterator iter(&database_list, str.contents());
1009  reference_id rid;
1010  const char *start;
1011  int len;
1012  if (!iter.next(&start, &len, &rid)) {
1013    error("no matches for `%1'", str.contents());
1014    return 0;
1015  }
1016  const char *end = start + len;
1017  while (start < end) {
1018    if (*start == '%')
1019      break;
1020    while (start < end && *start++ != '\n')
1021      ;
1022  }
1023  if (start >= end) {
1024    error("found a reference for `%1' but it didn't contain any fields",
1025	  str.contents());
1026    return 0;
1027  }
1028  reference *result = new reference(start, end - start, &rid);
1029  if (iter.next(&start, &len, &rid))
1030    warning("multiple matches for `%1'", str.contents());
1031  return result;
1032}
1033
1034static reference *make_reference(const string &str, unsigned *flagsp)
1035{
1036  const char *start = str.contents();
1037  const char *end = start + str.length();
1038  const char *ptr = start;
1039  while (ptr < end) {
1040    if (*ptr == '%')
1041      break;
1042    while (ptr < end && *ptr++ != '\n')
1043      ;
1044  }
1045  *flagsp = 0;
1046  for (; start < ptr; start++) {
1047    if (*start == '#')
1048      *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
1049					   | FORCE_LEFT_BRACKET)));
1050    else if (*start == '[')
1051      *flagsp |= FORCE_LEFT_BRACKET;
1052    else if (*start == ']')
1053      *flagsp |= FORCE_RIGHT_BRACKET;
1054    else if (!csspace(*start))
1055      break;
1056  }
1057  if (start >= end) {
1058    error("empty reference");
1059    return new reference;
1060  }
1061  reference *database_ref = 0;
1062  if (start < ptr)
1063    database_ref = find_reference(start, ptr - start);
1064  reference *inline_ref = 0;
1065  if (ptr < end)
1066    inline_ref = new reference(ptr, end - ptr);
1067  if (inline_ref) {
1068    if (database_ref) {
1069      database_ref->merge(*inline_ref);
1070      delete inline_ref;
1071      return database_ref;
1072    }
1073    else
1074      return inline_ref;
1075  }
1076  else if (database_ref)
1077    return database_ref;
1078  else
1079    return new reference;
1080}
1081
1082static void do_ref(const string &str)
1083{
1084  if (accumulate)
1085    (void)store_reference(str);
1086  else {
1087    (void)immediately_handle_reference(str);
1088    immediately_output_references();
1089  }
1090}
1091
1092static void trim_blanks(string &str)
1093{
1094  const char *start = str.contents();
1095  const char *end = start + str.length();
1096  while (end > start && end[-1] != '\n' && csspace(end[-1]))
1097    --end;
1098  str.set_length(end - start);
1099}
1100
1101void do_bib(const char *filename)
1102{
1103  FILE *fp;
1104  if (strcmp(filename, "-") == 0)
1105    fp = stdin;
1106  else {
1107    errno = 0;
1108    fp = fopen(filename, "r");
1109    if (fp == 0) {
1110      error("can't open `%1': %2", filename, strerror(errno));
1111      return;
1112    }
1113    current_filename = filename;
1114  }
1115  enum {
1116    START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
1117    } state = START;
1118  string body;
1119  for (;;) {
1120    int c = getc(fp);
1121    if (c == EOF)
1122      break;
1123    if (invalid_input_char(c)) {
1124      error("invalid input character code %1", c);
1125      continue;
1126    }
1127    switch (state) {
1128    case START:
1129      if (c == '%') {
1130	body = c;
1131	state = BODY;
1132      }
1133      else if (c != '\n')
1134	state = MIDDLE;
1135      break;
1136    case MIDDLE:
1137      if (c == '\n')
1138	state = START;
1139      break;
1140    case BODY:
1141      body += c;
1142      if (c == '\n')
1143	state = BODY_START;
1144      break;
1145    case BODY_START:
1146      if (c == '\n') {
1147	do_ref(body);
1148	state = START;
1149      }
1150      else if (c == '.')
1151	state = BODY_DOT;
1152      else if (csspace(c)) {
1153	state = BODY_BLANK;
1154	body += c;
1155      }
1156      else {
1157	body += c;
1158	state = BODY;
1159      }
1160      break;
1161    case BODY_BLANK:
1162      if (c == '\n') {
1163	trim_blanks(body);
1164	do_ref(body);
1165	state = START;
1166      }
1167      else if (csspace(c))
1168	body += c;
1169      else {
1170	body += c;
1171	state = BODY;
1172      }
1173      break;
1174    case BODY_DOT:
1175      if (c == ']') {
1176	do_ref(body);
1177	state = MIDDLE;
1178      }
1179      else {
1180	body += '.';
1181	body += c;
1182	state = c == '\n' ? BODY_START : BODY;
1183      }
1184      break;
1185    default:
1186      assert(0);
1187    }
1188    if (c == '\n')
1189      current_lineno++;
1190  }
1191  switch (state) {
1192  case START:
1193  case MIDDLE:
1194    break;
1195  case BODY:
1196    body += '\n';
1197    do_ref(body);
1198    break;
1199  case BODY_DOT:
1200  case BODY_START:
1201    do_ref(body);
1202    break;
1203  case BODY_BLANK:
1204    trim_blanks(body);
1205    do_ref(body);
1206    break;
1207  }
1208  fclose(fp);
1209}
1210
1211// from the Dragon Book
1212
1213unsigned hash_string(const char *s, int len)
1214{
1215  const char *end = s + len;
1216  unsigned h = 0, g;
1217  while (s < end) {
1218    h <<= 4;
1219    h += *s++;
1220    if ((g = h & 0xf0000000) != 0) {
1221      h ^= g >> 24;
1222      h ^= g;
1223    }
1224  }
1225  return h;
1226}
1227
1228int next_size(int n)
1229{
1230  static const int table_sizes[] = {
1231    101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
1232    80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
1233    16000057, 32000011, 64000031, 128000003, 0
1234  };
1235
1236  const int *p;
1237  for (p = table_sizes; *p <= n && *p != 0; p++)
1238    ;
1239  assert(*p != 0);
1240  return *p;
1241}
1242
1243