11590Srgrimes/*-
21590Srgrimes * Copyright (c) 1991, 1993, 1994
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * This code is derived from software contributed to Berkeley by
61590Srgrimes * Steve Hayman of the Computer Science Department, Indiana University,
71590Srgrimes * Michiro Hikida and David Goodenough.
81590Srgrimes *
91590Srgrimes * Redistribution and use in source and binary forms, with or without
101590Srgrimes * modification, are permitted provided that the following conditions
111590Srgrimes * are met:
121590Srgrimes * 1. Redistributions of source code must retain the above copyright
131590Srgrimes *    notice, this list of conditions and the following disclaimer.
141590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
151590Srgrimes *    notice, this list of conditions and the following disclaimer in the
161590Srgrimes *    documentation and/or other materials provided with the distribution.
171590Srgrimes * 4. Neither the name of the University nor the names of its contributors
181590Srgrimes *    may be used to endorse or promote products derived from this software
191590Srgrimes *    without specific prior written permission.
201590Srgrimes *
211590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311590Srgrimes * SUCH DAMAGE.
321590Srgrimes */
331590Srgrimes
341590Srgrimes#ifndef lint
3527422Scharnierstatic const char copyright[] =
361590Srgrimes"@(#) Copyright (c) 1991, 1993, 1994\n\
371590Srgrimes	The Regents of the University of California.  All rights reserved.\n";
381590Srgrimes#endif /* not lint */
391590Srgrimes
401590Srgrimes#ifndef lint
4127422Scharnier#if 0
4223694Speterstatic char sccsid[] = "@(#)join.c	8.6 (Berkeley) 5/4/95";
4327422Scharnier#endif
441590Srgrimes#endif /* not lint */
4599112Sobrien#include <sys/cdefs.h>
4699112Sobrien__FBSDID("$FreeBSD$");
471590Srgrimes
481590Srgrimes#include <sys/param.h>
491590Srgrimes
501590Srgrimes#include <err.h>
511590Srgrimes#include <errno.h>
52131624Stjr#include <limits.h>
5395096Stjr#include <locale.h>
541590Srgrimes#include <stdio.h>
551590Srgrimes#include <stdlib.h>
561590Srgrimes#include <string.h>
5723694Speter#include <unistd.h>
58131624Stjr#include <wchar.h>
591590Srgrimes
601590Srgrimes/*
611590Srgrimes * There's a structure per input file which encapsulates the state of the
621590Srgrimes * file.  We repeatedly read lines from each file until we've read in all
631590Srgrimes * the consecutive lines from the file with a common join field.  Then we
641590Srgrimes * compare the set of lines with an equivalent set from the other file.
651590Srgrimes */
661590Srgrimestypedef struct {
671590Srgrimes	char *line;		/* line */
681590Srgrimes	u_long linealloc;	/* line allocated count */
691590Srgrimes	char **fields;		/* line field(s) */
701590Srgrimes	u_long fieldcnt;	/* line field(s) count */
711590Srgrimes	u_long fieldalloc;	/* line field(s) allocated count */
721590Srgrimes} LINE;
731590Srgrimes
741590Srgrimestypedef struct {
751590Srgrimes	FILE *fp;		/* file descriptor */
761590Srgrimes	u_long joinf;		/* join field (-1, -2, -j) */
771590Srgrimes	int unpair;		/* output unpairable lines (-a) */
78102944Sdwmalone	u_long number;		/* 1 for file 1, 2 for file 2 */
791590Srgrimes
801590Srgrimes	LINE *set;		/* set of lines with same field */
811590Srgrimes	int pushbool;		/* if pushback is set */
821590Srgrimes	u_long pushback;	/* line on the stack */
831590Srgrimes	u_long setcnt;		/* set count */
841590Srgrimes	u_long setalloc;	/* set allocated count */
851590Srgrimes} INPUT;
86227167Sedstatic INPUT input1 = { NULL, 0, 0, 1, NULL, 0, 0, 0, 0 },
87227167Sed    input2 = { NULL, 0, 0, 2, NULL, 0, 0, 0, 0 };
881590Srgrimes
891590Srgrimestypedef struct {
901590Srgrimes	u_long	filenum;	/* file number */
911590Srgrimes	u_long	fieldno;	/* field number */
921590Srgrimes} OLIST;
93227167Sedstatic OLIST *olist;		/* output field list */
94227167Sedstatic u_long olistcnt;		/* output field list count */
95227167Sedstatic u_long olistalloc;	/* output field allocated count */
961590Srgrimes
97227167Sedstatic int joinout = 1;		/* show lines with matched join fields (-v) */
98227167Sedstatic int needsep;		/* need separator character */
99227167Sedstatic int spans = 1;		/* span multiple delimiters (-t) */
100227167Sedstatic char *empty;		/* empty field replacement string (-e) */
101131624Stjrstatic wchar_t default_tabchar[] = L" \t";
102227167Sedstatic wchar_t *tabchar = default_tabchar; /* delimiter characters (-t) */
1031590Srgrimes
104227167Sedstatic int  cmp(LINE *, u_long, LINE *, u_long);
105227167Sedstatic void fieldarg(char *);
106227167Sedstatic void joinlines(INPUT *, INPUT *);
107227167Sedstatic int  mbscoll(const char *, const char *);
108227167Sedstatic char *mbssep(char **, const wchar_t *);
109227167Sedstatic void obsolete(char **);
110227167Sedstatic void outfield(LINE *, u_long, int);
111227167Sedstatic void outoneline(INPUT *, LINE *);
112227167Sedstatic void outtwoline(INPUT *, LINE *, INPUT *, LINE *);
113227167Sedstatic void slurp(INPUT *);
114227167Sedstatic wchar_t *towcs(const char *);
115227167Sedstatic void usage(void);
1161590Srgrimes
1171590Srgrimesint
118102944Sdwmalonemain(int argc, char *argv[])
1191590Srgrimes{
1201590Srgrimes	INPUT *F1, *F2;
1211590Srgrimes	int aflag, ch, cval, vflag;
1221590Srgrimes	char *end;
1231590Srgrimes
12495096Stjr	setlocale(LC_ALL, "");
12595096Stjr
1261590Srgrimes	F1 = &input1;
1271590Srgrimes	F2 = &input2;
1281590Srgrimes
1291590Srgrimes	aflag = vflag = 0;
1301590Srgrimes	obsolete(argv);
13124360Simp	while ((ch = getopt(argc, argv, "\01a:e:j:1:2:o:t:v:")) != -1) {
1321590Srgrimes		switch (ch) {
1331590Srgrimes		case '\01':		/* See comment in obsolete(). */
1341590Srgrimes			aflag = 1;
1351590Srgrimes			F1->unpair = F2->unpair = 1;
1361590Srgrimes			break;
1371590Srgrimes		case '1':
1381590Srgrimes			if ((F1->joinf = strtol(optarg, &end, 10)) < 1)
1391590Srgrimes				errx(1, "-1 option field number less than 1");
1401590Srgrimes			if (*end)
1411590Srgrimes				errx(1, "illegal field number -- %s", optarg);
1421590Srgrimes			--F1->joinf;
1431590Srgrimes			break;
1441590Srgrimes		case '2':
1451590Srgrimes			if ((F2->joinf = strtol(optarg, &end, 10)) < 1)
1461590Srgrimes				errx(1, "-2 option field number less than 1");
1471590Srgrimes			if (*end)
1481590Srgrimes				errx(1, "illegal field number -- %s", optarg);
1491590Srgrimes			--F2->joinf;
1501590Srgrimes			break;
1511590Srgrimes		case 'a':
1521590Srgrimes			aflag = 1;
1531590Srgrimes			switch(strtol(optarg, &end, 10)) {
1541590Srgrimes			case 1:
1551590Srgrimes				F1->unpair = 1;
1561590Srgrimes				break;
1571590Srgrimes			case 2:
1581590Srgrimes				F2->unpair = 1;
1591590Srgrimes				break;
1601590Srgrimes			default:
1611590Srgrimes				errx(1, "-a option file number not 1 or 2");
1621590Srgrimes				break;
1631590Srgrimes			}
1641590Srgrimes			if (*end)
1651590Srgrimes				errx(1, "illegal file number -- %s", optarg);
1661590Srgrimes			break;
1671590Srgrimes		case 'e':
1681590Srgrimes			empty = optarg;
1691590Srgrimes			break;
1701590Srgrimes		case 'j':
1711590Srgrimes			if ((F1->joinf = F2->joinf =
1721590Srgrimes			    strtol(optarg, &end, 10)) < 1)
1731590Srgrimes				errx(1, "-j option field number less than 1");
1741590Srgrimes			if (*end)
1751590Srgrimes				errx(1, "illegal field number -- %s", optarg);
1761590Srgrimes			--F1->joinf;
1771590Srgrimes			--F2->joinf;
1781590Srgrimes			break;
1791590Srgrimes		case 'o':
1801590Srgrimes			fieldarg(optarg);
1811590Srgrimes			break;
1821590Srgrimes		case 't':
1831590Srgrimes			spans = 0;
184131624Stjr			if (mbrtowc(&tabchar[0], optarg, MB_LEN_MAX, NULL) !=
185131624Stjr			    strlen(optarg))
1861590Srgrimes				errx(1, "illegal tab character specification");
187131624Stjr			tabchar[1] = L'\0';
1881590Srgrimes			break;
1891590Srgrimes		case 'v':
1901590Srgrimes			vflag = 1;
1911590Srgrimes			joinout = 0;
1921590Srgrimes			switch (strtol(optarg, &end, 10)) {
1931590Srgrimes			case 1:
1941590Srgrimes				F1->unpair = 1;
1951590Srgrimes				break;
1961590Srgrimes			case 2:
1971590Srgrimes				F2->unpair = 1;
1981590Srgrimes				break;
1991590Srgrimes			default:
2001590Srgrimes				errx(1, "-v option file number not 1 or 2");
2011590Srgrimes				break;
2021590Srgrimes			}
2031590Srgrimes			if (*end)
2041590Srgrimes				errx(1, "illegal file number -- %s", optarg);
2051590Srgrimes			break;
2061590Srgrimes		case '?':
2071590Srgrimes		default:
2081590Srgrimes			usage();
2091590Srgrimes		}
2101590Srgrimes	}
2111590Srgrimes	argc -= optind;
2121590Srgrimes	argv += optind;
2131590Srgrimes
2141590Srgrimes	if (aflag && vflag)
2151590Srgrimes		errx(1, "the -a and -v options are mutually exclusive");
2161590Srgrimes
2171590Srgrimes	if (argc != 2)
2181590Srgrimes		usage();
2191590Srgrimes
2201590Srgrimes	/* Open the files; "-" means stdin. */
2211590Srgrimes	if (!strcmp(*argv, "-"))
2221590Srgrimes		F1->fp = stdin;
2231590Srgrimes	else if ((F1->fp = fopen(*argv, "r")) == NULL)
2241590Srgrimes		err(1, "%s", *argv);
2251590Srgrimes	++argv;
2261590Srgrimes	if (!strcmp(*argv, "-"))
2271590Srgrimes		F2->fp = stdin;
2281590Srgrimes	else if ((F2->fp = fopen(*argv, "r")) == NULL)
2291590Srgrimes		err(1, "%s", *argv);
2301590Srgrimes	if (F1->fp == stdin && F2->fp == stdin)
2311590Srgrimes		errx(1, "only one input file may be stdin");
2321590Srgrimes
2331590Srgrimes	slurp(F1);
2341590Srgrimes	slurp(F2);
2351590Srgrimes	while (F1->setcnt && F2->setcnt) {
2361590Srgrimes		cval = cmp(F1->set, F1->joinf, F2->set, F2->joinf);
2371590Srgrimes		if (cval == 0) {
2381590Srgrimes			/* Oh joy, oh rapture, oh beauty divine! */
2391590Srgrimes			if (joinout)
2401590Srgrimes				joinlines(F1, F2);
2411590Srgrimes			slurp(F1);
2421590Srgrimes			slurp(F2);
2431590Srgrimes		} else if (cval < 0) {
2441590Srgrimes			/* File 1 takes the lead... */
2451590Srgrimes			if (F1->unpair)
2461590Srgrimes				joinlines(F1, NULL);
2471590Srgrimes			slurp(F1);
2481590Srgrimes		} else {
2491590Srgrimes			/* File 2 takes the lead... */
2501590Srgrimes			if (F2->unpair)
2511590Srgrimes				joinlines(F2, NULL);
2521590Srgrimes			slurp(F2);
2531590Srgrimes		}
2541590Srgrimes	}
2551590Srgrimes
2561590Srgrimes	/*
2571590Srgrimes	 * Now that one of the files is used up, optionally output any
2581590Srgrimes	 * remaining lines from the other file.
2591590Srgrimes	 */
2601590Srgrimes	if (F1->unpair)
2611590Srgrimes		while (F1->setcnt) {
2621590Srgrimes			joinlines(F1, NULL);
2631590Srgrimes			slurp(F1);
2641590Srgrimes		}
2651590Srgrimes	if (F2->unpair)
2661590Srgrimes		while (F2->setcnt) {
2671590Srgrimes			joinlines(F2, NULL);
2681590Srgrimes			slurp(F2);
2691590Srgrimes		}
2701590Srgrimes	exit(0);
2711590Srgrimes}
2721590Srgrimes
273227167Sedstatic void
274102944Sdwmaloneslurp(INPUT *F)
2751590Srgrimes{
2761590Srgrimes	LINE *lp, *lastlp, tmp;
2771590Srgrimes	size_t len;
2781590Srgrimes	int cnt;
2791590Srgrimes	char *bp, *fieldp;
2801590Srgrimes
2811590Srgrimes	/*
2821590Srgrimes	 * Read all of the lines from an input file that have the same
2831590Srgrimes	 * join field.
2841590Srgrimes	 */
2851590Srgrimes	F->setcnt = 0;
28619069Sphk	for (lastlp = NULL;; ++F->setcnt) {
2871590Srgrimes		/*
2881590Srgrimes		 * If we're out of space to hold line structures, allocate
2891590Srgrimes		 * more.  Initialize the structure so that we know that this
2901590Srgrimes		 * is new space.
2911590Srgrimes		 */
2921590Srgrimes		if (F->setcnt == F->setalloc) {
2931590Srgrimes			cnt = F->setalloc;
2941590Srgrimes			F->setalloc += 50;
2951590Srgrimes			if ((F->set = realloc(F->set,
2961590Srgrimes			    F->setalloc * sizeof(LINE))) == NULL)
2971590Srgrimes				err(1, NULL);
2981590Srgrimes			memset(F->set + cnt, 0, 50 * sizeof(LINE));
29923694Speter
30023694Speter			/* re-set lastlp in case it moved */
30123694Speter			if (lastlp != NULL)
30223694Speter				lastlp = &F->set[F->setcnt - 1];
3031590Srgrimes		}
3048874Srgrimes
3051590Srgrimes		/*
3061590Srgrimes		 * Get any pushed back line, else get the next line.  Allocate
3071590Srgrimes		 * space as necessary.  If taking the line from the stack swap
3081590Srgrimes		 * the two structures so that we don't lose space allocated to
3091590Srgrimes		 * either structure.  This could be avoided by doing another
3101590Srgrimes		 * level of indirection, but it's probably okay as is.
3111590Srgrimes		 */
3121590Srgrimes		lp = &F->set[F->setcnt];
31319069Sphk		if (F->setcnt)
31419069Sphk			lastlp = &F->set[F->setcnt - 1];
3151590Srgrimes		if (F->pushbool) {
3161590Srgrimes			tmp = F->set[F->setcnt];
3171590Srgrimes			F->set[F->setcnt] = F->set[F->pushback];
3181590Srgrimes			F->set[F->pushback] = tmp;
3191590Srgrimes			F->pushbool = 0;
3201590Srgrimes			continue;
3211590Srgrimes		}
3221590Srgrimes		if ((bp = fgetln(F->fp, &len)) == NULL)
3231590Srgrimes			return;
3241590Srgrimes		if (lp->linealloc <= len + 1) {
32523694Speter			lp->linealloc += MAX(100, len + 1 - lp->linealloc);
3261590Srgrimes			if ((lp->line =
3271590Srgrimes			    realloc(lp->line, lp->linealloc)) == NULL)
3281590Srgrimes				err(1, NULL);
3291590Srgrimes		}
3301590Srgrimes		memmove(lp->line, bp, len);
3311590Srgrimes
3321590Srgrimes		/* Replace trailing newline, if it exists. */
3331590Srgrimes		if (bp[len - 1] == '\n')
3341590Srgrimes			lp->line[len - 1] = '\0';
3351590Srgrimes		else
3361590Srgrimes			lp->line[len] = '\0';
3371590Srgrimes		bp = lp->line;
3381590Srgrimes
3391590Srgrimes		/* Split the line into fields, allocate space as necessary. */
3401590Srgrimes		lp->fieldcnt = 0;
341131624Stjr		while ((fieldp = mbssep(&bp, tabchar)) != NULL) {
3421590Srgrimes			if (spans && *fieldp == '\0')
3431590Srgrimes				continue;
3441590Srgrimes			if (lp->fieldcnt == lp->fieldalloc) {
3451590Srgrimes				lp->fieldalloc += 50;
3461590Srgrimes				if ((lp->fields = realloc(lp->fields,
3471590Srgrimes				    lp->fieldalloc * sizeof(char *))) == NULL)
3481590Srgrimes					err(1, NULL);
3491590Srgrimes			}
3501590Srgrimes			lp->fields[lp->fieldcnt++] = fieldp;
3511590Srgrimes		}
3521590Srgrimes
3531590Srgrimes		/* See if the join field value has changed. */
3541590Srgrimes		if (lastlp != NULL && cmp(lp, F->joinf, lastlp, F->joinf)) {
3551590Srgrimes			F->pushbool = 1;
3561590Srgrimes			F->pushback = F->setcnt;
3571590Srgrimes			break;
3581590Srgrimes		}
3591590Srgrimes	}
3601590Srgrimes}
3611590Srgrimes
362227167Sedstatic char *
363131624Stjrmbssep(char **stringp, const wchar_t *delim)
364131624Stjr{
365131624Stjr	char *s, *tok;
366131624Stjr	const wchar_t *spanp;
367131624Stjr	wchar_t c, sc;
368131624Stjr	size_t n;
369131624Stjr
370131624Stjr	if ((s = *stringp) == NULL)
371131624Stjr		return (NULL);
372131624Stjr	for (tok = s;;) {
373131624Stjr		n = mbrtowc(&c, s, MB_LEN_MAX, NULL);
374131624Stjr		if (n == (size_t)-1 || n == (size_t)-2)
375131624Stjr			errc(1, EILSEQ, NULL);	/* XXX */
376131624Stjr		s += n;
377131624Stjr		spanp = delim;
378131624Stjr		do {
379131624Stjr			if ((sc = *spanp++) == c) {
380131624Stjr				if (c == 0)
381131624Stjr					s = NULL;
382131624Stjr				else
383131624Stjr					s[-n] = '\0';
384131624Stjr				*stringp = s;
385131624Stjr				return (tok);
386131624Stjr			}
387131624Stjr		} while (sc != 0);
388131624Stjr	}
389131624Stjr}
390131624Stjr
391227167Sedstatic int
392102944Sdwmalonecmp(LINE *lp1, u_long fieldno1, LINE *lp2, u_long fieldno2)
3931590Srgrimes{
39421811Sjoerg	if (lp1->fieldcnt <= fieldno1)
39523694Speter		return (lp2->fieldcnt <= fieldno2 ? 0 : 1);
39621811Sjoerg	if (lp2->fieldcnt <= fieldno2)
3971590Srgrimes		return (-1);
398131624Stjr	return (mbscoll(lp1->fields[fieldno1], lp2->fields[fieldno2]));
3991590Srgrimes}
4001590Srgrimes
401227167Sedstatic int
402131624Stjrmbscoll(const char *s1, const char *s2)
403131624Stjr{
404131624Stjr	wchar_t *w1, *w2;
405131624Stjr	int ret;
406131624Stjr
407131624Stjr	if (MB_CUR_MAX == 1)
408131624Stjr		return (strcoll(s1, s2));
409131624Stjr	if ((w1 = towcs(s1)) == NULL || (w2 = towcs(s2)) == NULL)
410131624Stjr		err(1, NULL);	/* XXX */
411131624Stjr	ret = wcscoll(w1, w2);
412131624Stjr	free(w1);
413131624Stjr	free(w2);
414131624Stjr	return (ret);
415131624Stjr}
416131624Stjr
417227167Sedstatic wchar_t *
418131624Stjrtowcs(const char *s)
419131624Stjr{
420131624Stjr	wchar_t *wcs;
421131624Stjr	size_t n;
422131624Stjr
423131624Stjr	if ((n = mbsrtowcs(NULL, &s, 0, NULL)) == (size_t)-1)
424131624Stjr		return (NULL);
425131624Stjr	if ((wcs = malloc((n + 1) * sizeof(*wcs))) == NULL)
426131624Stjr		return (NULL);
427131624Stjr	mbsrtowcs(wcs, &s, n + 1, NULL);
428131624Stjr	return (wcs);
429131624Stjr}
430131624Stjr
431227167Sedstatic void
432102944Sdwmalonejoinlines(INPUT *F1, INPUT *F2)
4331590Srgrimes{
434102944Sdwmalone	u_long cnt1, cnt2;
4351590Srgrimes
4361590Srgrimes	/*
4371590Srgrimes	 * Output the results of a join comparison.  The output may be from
4381590Srgrimes	 * either file 1 or file 2 (in which case the first argument is the
4391590Srgrimes	 * file from which to output) or from both.
4401590Srgrimes	 */
4411590Srgrimes	if (F2 == NULL) {
4421590Srgrimes		for (cnt1 = 0; cnt1 < F1->setcnt; ++cnt1)
4431590Srgrimes			outoneline(F1, &F1->set[cnt1]);
4441590Srgrimes		return;
4451590Srgrimes	}
4461590Srgrimes	for (cnt1 = 0; cnt1 < F1->setcnt; ++cnt1)
4471590Srgrimes		for (cnt2 = 0; cnt2 < F2->setcnt; ++cnt2)
4481590Srgrimes			outtwoline(F1, &F1->set[cnt1], F2, &F2->set[cnt2]);
4491590Srgrimes}
4501590Srgrimes
451227167Sedstatic void
452102944Sdwmaloneoutoneline(INPUT *F, LINE *lp)
4531590Srgrimes{
454102944Sdwmalone	u_long cnt;
4551590Srgrimes
4561590Srgrimes	/*
4571590Srgrimes	 * Output a single line from one of the files, according to the
4581590Srgrimes	 * join rules.  This happens when we are writing unmatched single
4591590Srgrimes	 * lines.  Output empty fields in the right places.
4601590Srgrimes	 */
4611590Srgrimes	if (olist)
4621590Srgrimes		for (cnt = 0; cnt < olistcnt; ++cnt) {
46395650Smarkm			if (olist[cnt].filenum == (unsigned)F->number)
4641590Srgrimes				outfield(lp, olist[cnt].fieldno, 0);
46593193Sjmallett			else if (olist[cnt].filenum == 0)
46693193Sjmallett				outfield(lp, F->joinf, 0);
4671590Srgrimes			else
4681590Srgrimes				outfield(lp, 0, 1);
4691590Srgrimes		}
4701590Srgrimes	else
4711590Srgrimes		for (cnt = 0; cnt < lp->fieldcnt; ++cnt)
4721590Srgrimes			outfield(lp, cnt, 0);
4731590Srgrimes	(void)printf("\n");
4741590Srgrimes	if (ferror(stdout))
4751590Srgrimes		err(1, "stdout");
4761590Srgrimes	needsep = 0;
4771590Srgrimes}
4781590Srgrimes
479227167Sedstatic void
480102944Sdwmaloneouttwoline(INPUT *F1, LINE *lp1, INPUT *F2, LINE *lp2)
4811590Srgrimes{
482102944Sdwmalone	u_long cnt;
4831590Srgrimes
4841590Srgrimes	/* Output a pair of lines according to the join list (if any). */
4851590Srgrimes	if (olist)
4861590Srgrimes		for (cnt = 0; cnt < olistcnt; ++cnt)
48793193Sjmallett			if (olist[cnt].filenum == 0) {
48893193Sjmallett				if (lp1->fieldcnt >= F1->joinf)
48993193Sjmallett					outfield(lp1, F1->joinf, 0);
49093193Sjmallett				else
49193193Sjmallett					outfield(lp2, F2->joinf, 0);
49293193Sjmallett			} else if (olist[cnt].filenum == 1)
4931590Srgrimes				outfield(lp1, olist[cnt].fieldno, 0);
4941590Srgrimes			else /* if (olist[cnt].filenum == 2) */
4951590Srgrimes				outfield(lp2, olist[cnt].fieldno, 0);
4961590Srgrimes	else {
4971590Srgrimes		/*
4981590Srgrimes		 * Output the join field, then the remaining fields from F1
4991590Srgrimes		 * and F2.
5001590Srgrimes		 */
5011590Srgrimes		outfield(lp1, F1->joinf, 0);
5021590Srgrimes		for (cnt = 0; cnt < lp1->fieldcnt; ++cnt)
5031590Srgrimes			if (F1->joinf != cnt)
5041590Srgrimes				outfield(lp1, cnt, 0);
5051590Srgrimes		for (cnt = 0; cnt < lp2->fieldcnt; ++cnt)
5061590Srgrimes			if (F2->joinf != cnt)
5071590Srgrimes				outfield(lp2, cnt, 0);
5081590Srgrimes	}
5091590Srgrimes	(void)printf("\n");
5101590Srgrimes	if (ferror(stdout))
5111590Srgrimes		err(1, "stdout");
5121590Srgrimes	needsep = 0;
5131590Srgrimes}
5141590Srgrimes
515227167Sedstatic void
516102944Sdwmaloneoutfield(LINE *lp, u_long fieldno, int out_empty)
5171590Srgrimes{
5181590Srgrimes	if (needsep++)
519246319Sandrew		(void)printf("%lc", (wint_t)*tabchar);
52048566Sbillf	if (!ferror(stdout)) {
52121811Sjoerg		if (lp->fieldcnt <= fieldno || out_empty) {
5221590Srgrimes			if (empty != NULL)
5231590Srgrimes				(void)printf("%s", empty);
5241590Srgrimes		} else {
5251590Srgrimes			if (*lp->fields[fieldno] == '\0')
5261590Srgrimes				return;
5271590Srgrimes			(void)printf("%s", lp->fields[fieldno]);
5281590Srgrimes		}
52948566Sbillf	}
5301590Srgrimes	if (ferror(stdout))
5311590Srgrimes		err(1, "stdout");
5321590Srgrimes}
5331590Srgrimes
5341590Srgrimes/*
5351590Srgrimes * Convert an output list argument "2.1, 1.3, 2.4" into an array of output
5361590Srgrimes * fields.
5371590Srgrimes */
538227167Sedstatic void
539102944Sdwmalonefieldarg(char *option)
5401590Srgrimes{
54193193Sjmallett	u_long fieldno, filenum;
5421590Srgrimes	char *end, *token;
5431590Srgrimes
54423694Speter	while ((token = strsep(&option, ", \t")) != NULL) {
5451590Srgrimes		if (*token == '\0')
5461590Srgrimes			continue;
54793193Sjmallett		if (token[0] == '0')
54893193Sjmallett			filenum = fieldno = 0;
54993193Sjmallett		else if ((token[0] == '1' || token[0] == '2') &&
55093193Sjmallett		    token[1] == '.') {
55193193Sjmallett			filenum = token[0] - '0';
55293193Sjmallett			fieldno = strtol(token + 2, &end, 10);
55393193Sjmallett			if (*end)
55493193Sjmallett				errx(1, "malformed -o option field");
55593193Sjmallett			if (fieldno == 0)
55693193Sjmallett				errx(1, "field numbers are 1 based");
55793193Sjmallett			--fieldno;
55893193Sjmallett		} else
5591590Srgrimes			errx(1, "malformed -o option field");
5601590Srgrimes		if (olistcnt == olistalloc) {
5611590Srgrimes			olistalloc += 50;
5621590Srgrimes			if ((olist = realloc(olist,
5631590Srgrimes			    olistalloc * sizeof(OLIST))) == NULL)
5641590Srgrimes				err(1, NULL);
5651590Srgrimes		}
56693193Sjmallett		olist[olistcnt].filenum = filenum;
56793193Sjmallett		olist[olistcnt].fieldno = fieldno;
5681590Srgrimes		++olistcnt;
5691590Srgrimes	}
5701590Srgrimes}
5711590Srgrimes
572227167Sedstatic void
573102944Sdwmaloneobsolete(char **argv)
5741590Srgrimes{
575102944Sdwmalone	size_t len;
5761590Srgrimes	char **p, *ap, *t;
5771590Srgrimes
5781590Srgrimes	while ((ap = *++argv) != NULL) {
5791590Srgrimes		/* Return if "--". */
5801590Srgrimes		if (ap[0] == '-' && ap[1] == '-')
5811590Srgrimes			return;
58228423Sjlemon		/* skip if not an option */
58328423Sjlemon		if (ap[0] != '-')
58428423Sjlemon			continue;
5851590Srgrimes		switch (ap[1]) {
5861590Srgrimes		case 'a':
5878874Srgrimes			/*
5881590Srgrimes			 * The original join allowed "-a", which meant the
5891590Srgrimes			 * same as -a1 plus -a2.  POSIX 1003.2, Draft 11.2
5901590Srgrimes			 * only specifies this as "-a 1" and "a -2", so we
5911590Srgrimes			 * have to use another option flag, one that is
5921590Srgrimes			 * unlikely to ever be used or accidentally entered
5931590Srgrimes			 * on the command line.  (Well, we could reallocate
5941590Srgrimes			 * the argv array, but that hardly seems worthwhile.)
5951590Srgrimes			 */
59695096Stjr			if (ap[2] == '\0' && (argv[1] == NULL ||
59795096Stjr			    (strcmp(argv[1], "1") != 0 &&
59895096Stjr			    strcmp(argv[1], "2") != 0))) {
5991590Srgrimes				ap[1] = '\01';
60095096Stjr				warnx("-a option used without an argument; "
60195096Stjr				    "reverting to historical behavior");
60295096Stjr			}
6031590Srgrimes			break;
6041590Srgrimes		case 'j':
6051590Srgrimes			/*
6061590Srgrimes			 * The original join allowed "-j[12] arg" and "-j arg".
6071590Srgrimes			 * Convert the former to "-[12] arg".  Don't convert
6081590Srgrimes			 * the latter since getopt(3) can handle it.
6091590Srgrimes			 */
6101590Srgrimes			switch(ap[2]) {
6111590Srgrimes			case '1':
6121590Srgrimes				if (ap[3] != '\0')
6131590Srgrimes					goto jbad;
6141590Srgrimes				ap[1] = '1';
6151590Srgrimes				ap[2] = '\0';
6161590Srgrimes				break;
6171590Srgrimes			case '2':
6181590Srgrimes				if (ap[3] != '\0')
6191590Srgrimes					goto jbad;
6201590Srgrimes				ap[1] = '2';
6211590Srgrimes				ap[2] = '\0';
6221590Srgrimes				break;
6231590Srgrimes			case '\0':
6241590Srgrimes				break;
6251590Srgrimes			default:
6261590Srgrimesjbad:				errx(1, "illegal option -- %s", ap);
6271590Srgrimes				usage();
6281590Srgrimes			}
6291590Srgrimes			break;
6301590Srgrimes		case 'o':
6311590Srgrimes			/*
6321590Srgrimes			 * The original join allowed "-o arg arg".
6331590Srgrimes			 * Convert to "-o arg -o arg".
6341590Srgrimes			 */
6351590Srgrimes			if (ap[2] != '\0')
6361590Srgrimes				break;
6371590Srgrimes			for (p = argv + 2; *p; ++p) {
638102944Sdwmalone				if (p[0][0] == '0' || ((p[0][0] != '1' &&
639102944Sdwmalone				    p[0][0] != '2') || p[0][1] != '.'))
6401590Srgrimes					break;
6411590Srgrimes				len = strlen(*p);
6421590Srgrimes				if (len - 2 != strspn(*p + 2, "0123456789"))
6431590Srgrimes					break;
6441590Srgrimes				if ((t = malloc(len + 3)) == NULL)
6451590Srgrimes					err(1, NULL);
6461590Srgrimes				t[0] = '-';
6471590Srgrimes				t[1] = 'o';
6481590Srgrimes				memmove(t + 2, *p, len + 1);
6491590Srgrimes				*p = t;
6501590Srgrimes			}
6511590Srgrimes			argv = p - 1;
6521590Srgrimes			break;
6531590Srgrimes		}
6541590Srgrimes	}
6551590Srgrimes}
6561590Srgrimes
657227167Sedstatic void
658102944Sdwmaloneusage(void)
6591590Srgrimes{
660134333Smaxim	(void)fprintf(stderr, "%s %s\n%s\n",
661134333Smaxim	    "usage: join [-a fileno | -v fileno ] [-e string] [-1 field]",
662134333Smaxim	    "[-2 field]",
663134333Smaxim		"            [-o list] [-t char] file1 file2");
6641590Srgrimes	exit(1);
6651590Srgrimes}
666