join.c revision 95650
11590Srgrimes/*-
21590Srgrimes * Copyright (c) 1991, 1993, 1994
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * This code is derived from software contributed to Berkeley by
61590Srgrimes * Steve Hayman of the Computer Science Department, Indiana University,
71590Srgrimes * Michiro Hikida and David Goodenough.
81590Srgrimes *
91590Srgrimes * Redistribution and use in source and binary forms, with or without
101590Srgrimes * modification, are permitted provided that the following conditions
111590Srgrimes * are met:
121590Srgrimes * 1. Redistributions of source code must retain the above copyright
131590Srgrimes *    notice, this list of conditions and the following disclaimer.
141590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
151590Srgrimes *    notice, this list of conditions and the following disclaimer in the
161590Srgrimes *    documentation and/or other materials provided with the distribution.
171590Srgrimes * 3. All advertising materials mentioning features or use of this software
181590Srgrimes *    must display the following acknowledgement:
191590Srgrimes *	This product includes software developed by the University of
201590Srgrimes *	California, Berkeley and its contributors.
211590Srgrimes * 4. Neither the name of the University nor the names of its contributors
221590Srgrimes *    may be used to endorse or promote products derived from this software
231590Srgrimes *    without specific prior written permission.
241590Srgrimes *
251590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
261590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
271590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
281590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
291590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
301590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
311590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
321590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
331590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
341590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
351590Srgrimes * SUCH DAMAGE.
361590Srgrimes */
371590Srgrimes
381590Srgrimes#ifndef lint
3927422Scharnierstatic const char copyright[] =
401590Srgrimes"@(#) Copyright (c) 1991, 1993, 1994\n\
411590Srgrimes	The Regents of the University of California.  All rights reserved.\n";
421590Srgrimes#endif /* not lint */
431590Srgrimes
441590Srgrimes#ifndef lint
4527422Scharnier#if 0
4623694Speterstatic char sccsid[] = "@(#)join.c	8.6 (Berkeley) 5/4/95";
4727422Scharnier#endif
4827422Scharnierstatic const char rcsid[] =
4950477Speter  "$FreeBSD: head/usr.bin/join/join.c 95650 2002-04-28 13:46:40Z markm $";
501590Srgrimes#endif /* not lint */
511590Srgrimes
521590Srgrimes#include <sys/param.h>
531590Srgrimes
541590Srgrimes#include <err.h>
551590Srgrimes#include <errno.h>
5695096Stjr#include <locale.h>
571590Srgrimes#include <stdio.h>
581590Srgrimes#include <stdlib.h>
591590Srgrimes#include <string.h>
6023694Speter#include <unistd.h>
611590Srgrimes
621590Srgrimes/*
631590Srgrimes * There's a structure per input file which encapsulates the state of the
641590Srgrimes * file.  We repeatedly read lines from each file until we've read in all
651590Srgrimes * the consecutive lines from the file with a common join field.  Then we
661590Srgrimes * compare the set of lines with an equivalent set from the other file.
671590Srgrimes */
681590Srgrimestypedef struct {
691590Srgrimes	char *line;		/* line */
701590Srgrimes	u_long linealloc;	/* line allocated count */
711590Srgrimes	char **fields;		/* line field(s) */
721590Srgrimes	u_long fieldcnt;	/* line field(s) count */
731590Srgrimes	u_long fieldalloc;	/* line field(s) allocated count */
741590Srgrimes} LINE;
751590Srgrimes
761590Srgrimestypedef struct {
771590Srgrimes	FILE *fp;		/* file descriptor */
781590Srgrimes	u_long joinf;		/* join field (-1, -2, -j) */
791590Srgrimes	int unpair;		/* output unpairable lines (-a) */
801590Srgrimes	int number;		/* 1 for file 1, 2 for file 2 */
811590Srgrimes
821590Srgrimes	LINE *set;		/* set of lines with same field */
831590Srgrimes	int pushbool;		/* if pushback is set */
841590Srgrimes	u_long pushback;	/* line on the stack */
851590Srgrimes	u_long setcnt;		/* set count */
861590Srgrimes	u_long setalloc;	/* set allocated count */
871590Srgrimes} INPUT;
8895650SmarkmINPUT input1 = { NULL, 0, 0, 1, NULL, 0, 0, 0, 0 },
8995650Smarkm      input2 = { NULL, 0, 0, 2, NULL, 0, 0, 0, 0 };
901590Srgrimes
911590Srgrimestypedef struct {
921590Srgrimes	u_long	filenum;	/* file number */
931590Srgrimes	u_long	fieldno;	/* field number */
941590Srgrimes} OLIST;
951590SrgrimesOLIST *olist;			/* output field list */
961590Srgrimesu_long olistcnt;		/* output field list count */
971590Srgrimesu_long olistalloc;		/* output field allocated count */
981590Srgrimes
991590Srgrimesint joinout = 1;		/* show lines with matched join fields (-v) */
1001590Srgrimesint needsep;			/* need separator character */
1011590Srgrimesint spans = 1;			/* span multiple delimiters (-t) */
1021590Srgrimeschar *empty;			/* empty field replacement string (-e) */
10395650Smarkmstatic char default_tabchar[] = " \t";
10495650Smarkmchar *tabchar = default_tabchar;/* delimiter characters (-t) */
1051590Srgrimes
10692920Simpint  cmp(LINE *, u_long, LINE *, u_long);
10792920Simpvoid fieldarg(char *);
10892920Simpvoid joinlines(INPUT *, INPUT *);
10992920Simpvoid obsolete(char **);
11092920Simpvoid outfield(LINE *, u_long, int);
11192920Simpvoid outoneline(INPUT *, LINE *);
11292920Simpvoid outtwoline(INPUT *, LINE *, INPUT *, LINE *);
11392920Simpvoid slurp(INPUT *);
11492920Simpvoid usage(void);
1151590Srgrimes
1161590Srgrimesint
1171590Srgrimesmain(argc, argv)
1181590Srgrimes	int argc;
1191590Srgrimes	char *argv[];
1201590Srgrimes{
1211590Srgrimes	INPUT *F1, *F2;
1221590Srgrimes	int aflag, ch, cval, vflag;
1231590Srgrimes	char *end;
1241590Srgrimes
12595096Stjr	setlocale(LC_ALL, "");
12695096Stjr
1271590Srgrimes	F1 = &input1;
1281590Srgrimes	F2 = &input2;
1291590Srgrimes
1301590Srgrimes	aflag = vflag = 0;
1311590Srgrimes	obsolete(argv);
13224360Simp	while ((ch = getopt(argc, argv, "\01a:e:j:1:2:o:t:v:")) != -1) {
1331590Srgrimes		switch (ch) {
1341590Srgrimes		case '\01':		/* See comment in obsolete(). */
1351590Srgrimes			aflag = 1;
1361590Srgrimes			F1->unpair = F2->unpair = 1;
1371590Srgrimes			break;
1381590Srgrimes		case '1':
1391590Srgrimes			if ((F1->joinf = strtol(optarg, &end, 10)) < 1)
1401590Srgrimes				errx(1, "-1 option field number less than 1");
1411590Srgrimes			if (*end)
1421590Srgrimes				errx(1, "illegal field number -- %s", optarg);
1431590Srgrimes			--F1->joinf;
1441590Srgrimes			break;
1451590Srgrimes		case '2':
1461590Srgrimes			if ((F2->joinf = strtol(optarg, &end, 10)) < 1)
1471590Srgrimes				errx(1, "-2 option field number less than 1");
1481590Srgrimes			if (*end)
1491590Srgrimes				errx(1, "illegal field number -- %s", optarg);
1501590Srgrimes			--F2->joinf;
1511590Srgrimes			break;
1521590Srgrimes		case 'a':
1531590Srgrimes			aflag = 1;
1541590Srgrimes			switch(strtol(optarg, &end, 10)) {
1551590Srgrimes			case 1:
1561590Srgrimes				F1->unpair = 1;
1571590Srgrimes				break;
1581590Srgrimes			case 2:
1591590Srgrimes				F2->unpair = 1;
1601590Srgrimes				break;
1611590Srgrimes			default:
1621590Srgrimes				errx(1, "-a option file number not 1 or 2");
1631590Srgrimes				break;
1641590Srgrimes			}
1651590Srgrimes			if (*end)
1661590Srgrimes				errx(1, "illegal file number -- %s", optarg);
1671590Srgrimes			break;
1681590Srgrimes		case 'e':
1691590Srgrimes			empty = optarg;
1701590Srgrimes			break;
1711590Srgrimes		case 'j':
1721590Srgrimes			if ((F1->joinf = F2->joinf =
1731590Srgrimes			    strtol(optarg, &end, 10)) < 1)
1741590Srgrimes				errx(1, "-j option field number less than 1");
1751590Srgrimes			if (*end)
1761590Srgrimes				errx(1, "illegal field number -- %s", optarg);
1771590Srgrimes			--F1->joinf;
1781590Srgrimes			--F2->joinf;
1791590Srgrimes			break;
1801590Srgrimes		case 'o':
1811590Srgrimes			fieldarg(optarg);
1821590Srgrimes			break;
1831590Srgrimes		case 't':
1841590Srgrimes			spans = 0;
1851590Srgrimes			if (strlen(tabchar = optarg) != 1)
1861590Srgrimes				errx(1, "illegal tab character specification");
1871590Srgrimes			break;
1881590Srgrimes		case 'v':
1891590Srgrimes			vflag = 1;
1901590Srgrimes			joinout = 0;
1911590Srgrimes			switch (strtol(optarg, &end, 10)) {
1921590Srgrimes			case 1:
1931590Srgrimes				F1->unpair = 1;
1941590Srgrimes				break;
1951590Srgrimes			case 2:
1961590Srgrimes				F2->unpair = 1;
1971590Srgrimes				break;
1981590Srgrimes			default:
1991590Srgrimes				errx(1, "-v option file number not 1 or 2");
2001590Srgrimes				break;
2011590Srgrimes			}
2021590Srgrimes			if (*end)
2031590Srgrimes				errx(1, "illegal file number -- %s", optarg);
2041590Srgrimes			break;
2051590Srgrimes		case '?':
2061590Srgrimes		default:
2071590Srgrimes			usage();
2081590Srgrimes		}
2091590Srgrimes	}
2101590Srgrimes	argc -= optind;
2111590Srgrimes	argv += optind;
2121590Srgrimes
2131590Srgrimes	if (aflag && vflag)
2141590Srgrimes		errx(1, "the -a and -v options are mutually exclusive");
2151590Srgrimes
2161590Srgrimes	if (argc != 2)
2171590Srgrimes		usage();
2181590Srgrimes
2191590Srgrimes	/* Open the files; "-" means stdin. */
2201590Srgrimes	if (!strcmp(*argv, "-"))
2211590Srgrimes		F1->fp = stdin;
2221590Srgrimes	else if ((F1->fp = fopen(*argv, "r")) == NULL)
2231590Srgrimes		err(1, "%s", *argv);
2241590Srgrimes	++argv;
2251590Srgrimes	if (!strcmp(*argv, "-"))
2261590Srgrimes		F2->fp = stdin;
2271590Srgrimes	else if ((F2->fp = fopen(*argv, "r")) == NULL)
2281590Srgrimes		err(1, "%s", *argv);
2291590Srgrimes	if (F1->fp == stdin && F2->fp == stdin)
2301590Srgrimes		errx(1, "only one input file may be stdin");
2311590Srgrimes
2321590Srgrimes	slurp(F1);
2331590Srgrimes	slurp(F2);
2341590Srgrimes	while (F1->setcnt && F2->setcnt) {
2351590Srgrimes		cval = cmp(F1->set, F1->joinf, F2->set, F2->joinf);
2361590Srgrimes		if (cval == 0) {
2371590Srgrimes			/* Oh joy, oh rapture, oh beauty divine! */
2381590Srgrimes			if (joinout)
2391590Srgrimes				joinlines(F1, F2);
2401590Srgrimes			slurp(F1);
2411590Srgrimes			slurp(F2);
2421590Srgrimes		} else if (cval < 0) {
2431590Srgrimes			/* File 1 takes the lead... */
2441590Srgrimes			if (F1->unpair)
2451590Srgrimes				joinlines(F1, NULL);
2461590Srgrimes			slurp(F1);
2471590Srgrimes		} else {
2481590Srgrimes			/* File 2 takes the lead... */
2491590Srgrimes			if (F2->unpair)
2501590Srgrimes				joinlines(F2, NULL);
2511590Srgrimes			slurp(F2);
2521590Srgrimes		}
2531590Srgrimes	}
2541590Srgrimes
2551590Srgrimes	/*
2561590Srgrimes	 * Now that one of the files is used up, optionally output any
2571590Srgrimes	 * remaining lines from the other file.
2581590Srgrimes	 */
2591590Srgrimes	if (F1->unpair)
2601590Srgrimes		while (F1->setcnt) {
2611590Srgrimes			joinlines(F1, NULL);
2621590Srgrimes			slurp(F1);
2631590Srgrimes		}
2641590Srgrimes	if (F2->unpair)
2651590Srgrimes		while (F2->setcnt) {
2661590Srgrimes			joinlines(F2, NULL);
2671590Srgrimes			slurp(F2);
2681590Srgrimes		}
2691590Srgrimes	exit(0);
2701590Srgrimes}
2711590Srgrimes
2721590Srgrimesvoid
2731590Srgrimesslurp(F)
2741590Srgrimes	INPUT *F;
2751590Srgrimes{
2761590Srgrimes	LINE *lp, *lastlp, tmp;
2771590Srgrimes	size_t len;
2781590Srgrimes	int cnt;
2791590Srgrimes	char *bp, *fieldp;
2801590Srgrimes
2811590Srgrimes	/*
2821590Srgrimes	 * Read all of the lines from an input file that have the same
2831590Srgrimes	 * join field.
2841590Srgrimes	 */
2851590Srgrimes	F->setcnt = 0;
28619069Sphk	for (lastlp = NULL;; ++F->setcnt) {
2871590Srgrimes		/*
2881590Srgrimes		 * If we're out of space to hold line structures, allocate
2891590Srgrimes		 * more.  Initialize the structure so that we know that this
2901590Srgrimes		 * is new space.
2911590Srgrimes		 */
2921590Srgrimes		if (F->setcnt == F->setalloc) {
2931590Srgrimes			cnt = F->setalloc;
2941590Srgrimes			F->setalloc += 50;
2951590Srgrimes			if ((F->set = realloc(F->set,
2961590Srgrimes			    F->setalloc * sizeof(LINE))) == NULL)
2971590Srgrimes				err(1, NULL);
2981590Srgrimes			memset(F->set + cnt, 0, 50 * sizeof(LINE));
29923694Speter
30023694Speter			/* re-set lastlp in case it moved */
30123694Speter			if (lastlp != NULL)
30223694Speter				lastlp = &F->set[F->setcnt - 1];
3031590Srgrimes		}
3048874Srgrimes
3051590Srgrimes		/*
3061590Srgrimes		 * Get any pushed back line, else get the next line.  Allocate
3071590Srgrimes		 * space as necessary.  If taking the line from the stack swap
3081590Srgrimes		 * the two structures so that we don't lose space allocated to
3091590Srgrimes		 * either structure.  This could be avoided by doing another
3101590Srgrimes		 * level of indirection, but it's probably okay as is.
3111590Srgrimes		 */
3121590Srgrimes		lp = &F->set[F->setcnt];
31319069Sphk		if (F->setcnt)
31419069Sphk			lastlp = &F->set[F->setcnt - 1];
3151590Srgrimes		if (F->pushbool) {
3161590Srgrimes			tmp = F->set[F->setcnt];
3171590Srgrimes			F->set[F->setcnt] = F->set[F->pushback];
3181590Srgrimes			F->set[F->pushback] = tmp;
3191590Srgrimes			F->pushbool = 0;
3201590Srgrimes			continue;
3211590Srgrimes		}
3221590Srgrimes		if ((bp = fgetln(F->fp, &len)) == NULL)
3231590Srgrimes			return;
3241590Srgrimes		if (lp->linealloc <= len + 1) {
32523694Speter			lp->linealloc += MAX(100, len + 1 - lp->linealloc);
3261590Srgrimes			if ((lp->line =
3271590Srgrimes			    realloc(lp->line, lp->linealloc)) == NULL)
3281590Srgrimes				err(1, NULL);
3291590Srgrimes		}
3301590Srgrimes		memmove(lp->line, bp, len);
3311590Srgrimes
3321590Srgrimes		/* Replace trailing newline, if it exists. */
3331590Srgrimes		if (bp[len - 1] == '\n')
3341590Srgrimes			lp->line[len - 1] = '\0';
3351590Srgrimes		else
3361590Srgrimes			lp->line[len] = '\0';
3371590Srgrimes		bp = lp->line;
3381590Srgrimes
3391590Srgrimes		/* Split the line into fields, allocate space as necessary. */
3401590Srgrimes		lp->fieldcnt = 0;
3411590Srgrimes		while ((fieldp = strsep(&bp, tabchar)) != NULL) {
3421590Srgrimes			if (spans && *fieldp == '\0')
3431590Srgrimes				continue;
3441590Srgrimes			if (lp->fieldcnt == lp->fieldalloc) {
3451590Srgrimes				lp->fieldalloc += 50;
3461590Srgrimes				if ((lp->fields = realloc(lp->fields,
3471590Srgrimes				    lp->fieldalloc * sizeof(char *))) == NULL)
3481590Srgrimes					err(1, NULL);
3491590Srgrimes			}
3501590Srgrimes			lp->fields[lp->fieldcnt++] = fieldp;
3511590Srgrimes		}
3521590Srgrimes
3531590Srgrimes		/* See if the join field value has changed. */
3541590Srgrimes		if (lastlp != NULL && cmp(lp, F->joinf, lastlp, F->joinf)) {
3551590Srgrimes			F->pushbool = 1;
3561590Srgrimes			F->pushback = F->setcnt;
3571590Srgrimes			break;
3581590Srgrimes		}
3591590Srgrimes	}
3601590Srgrimes}
3611590Srgrimes
3621590Srgrimesint
3631590Srgrimescmp(lp1, fieldno1, lp2, fieldno2)
3641590Srgrimes	LINE *lp1, *lp2;
3651590Srgrimes	u_long fieldno1, fieldno2;
3661590Srgrimes{
36721811Sjoerg	if (lp1->fieldcnt <= fieldno1)
36823694Speter		return (lp2->fieldcnt <= fieldno2 ? 0 : 1);
36921811Sjoerg	if (lp2->fieldcnt <= fieldno2)
3701590Srgrimes		return (-1);
37195096Stjr	return (strcoll(lp1->fields[fieldno1], lp2->fields[fieldno2]));
3721590Srgrimes}
3731590Srgrimes
3741590Srgrimesvoid
3751590Srgrimesjoinlines(F1, F2)
3761590Srgrimes	INPUT *F1, *F2;
3771590Srgrimes{
37895650Smarkm	unsigned int cnt1, cnt2;
3791590Srgrimes
3801590Srgrimes	/*
3811590Srgrimes	 * Output the results of a join comparison.  The output may be from
3821590Srgrimes	 * either file 1 or file 2 (in which case the first argument is the
3831590Srgrimes	 * file from which to output) or from both.
3841590Srgrimes	 */
3851590Srgrimes	if (F2 == NULL) {
3861590Srgrimes		for (cnt1 = 0; cnt1 < F1->setcnt; ++cnt1)
3871590Srgrimes			outoneline(F1, &F1->set[cnt1]);
3881590Srgrimes		return;
3891590Srgrimes	}
3901590Srgrimes	for (cnt1 = 0; cnt1 < F1->setcnt; ++cnt1)
3911590Srgrimes		for (cnt2 = 0; cnt2 < F2->setcnt; ++cnt2)
3921590Srgrimes			outtwoline(F1, &F1->set[cnt1], F2, &F2->set[cnt2]);
3931590Srgrimes}
3941590Srgrimes
3951590Srgrimesvoid
3961590Srgrimesoutoneline(F, lp)
3971590Srgrimes	INPUT *F;
3981590Srgrimes	LINE *lp;
3991590Srgrimes{
40095650Smarkm	unsigned int cnt;
4011590Srgrimes
4021590Srgrimes	/*
4031590Srgrimes	 * Output a single line from one of the files, according to the
4041590Srgrimes	 * join rules.  This happens when we are writing unmatched single
4051590Srgrimes	 * lines.  Output empty fields in the right places.
4061590Srgrimes	 */
4071590Srgrimes	if (olist)
4081590Srgrimes		for (cnt = 0; cnt < olistcnt; ++cnt) {
40995650Smarkm			if (olist[cnt].filenum == (unsigned)F->number)
4101590Srgrimes				outfield(lp, olist[cnt].fieldno, 0);
41193193Sjmallett			else if (olist[cnt].filenum == 0)
41293193Sjmallett				outfield(lp, F->joinf, 0);
4131590Srgrimes			else
4141590Srgrimes				outfield(lp, 0, 1);
4151590Srgrimes		}
4161590Srgrimes	else
4171590Srgrimes		for (cnt = 0; cnt < lp->fieldcnt; ++cnt)
4181590Srgrimes			outfield(lp, cnt, 0);
4191590Srgrimes	(void)printf("\n");
4201590Srgrimes	if (ferror(stdout))
4211590Srgrimes		err(1, "stdout");
4221590Srgrimes	needsep = 0;
4231590Srgrimes}
4241590Srgrimes
4251590Srgrimesvoid
4261590Srgrimesouttwoline(F1, lp1, F2, lp2)
4271590Srgrimes	INPUT *F1, *F2;
4281590Srgrimes	LINE *lp1, *lp2;
4291590Srgrimes{
43095650Smarkm	unsigned int cnt;
4311590Srgrimes
4321590Srgrimes	/* Output a pair of lines according to the join list (if any). */
4331590Srgrimes	if (olist)
4341590Srgrimes		for (cnt = 0; cnt < olistcnt; ++cnt)
43593193Sjmallett			if (olist[cnt].filenum == 0) {
43693193Sjmallett				if (lp1->fieldcnt >= F1->joinf)
43793193Sjmallett					outfield(lp1, F1->joinf, 0);
43893193Sjmallett				else
43993193Sjmallett					outfield(lp2, F2->joinf, 0);
44093193Sjmallett			} else if (olist[cnt].filenum == 1)
4411590Srgrimes				outfield(lp1, olist[cnt].fieldno, 0);
4421590Srgrimes			else /* if (olist[cnt].filenum == 2) */
4431590Srgrimes				outfield(lp2, olist[cnt].fieldno, 0);
4441590Srgrimes	else {
4451590Srgrimes		/*
4461590Srgrimes		 * Output the join field, then the remaining fields from F1
4471590Srgrimes		 * and F2.
4481590Srgrimes		 */
4491590Srgrimes		outfield(lp1, F1->joinf, 0);
4501590Srgrimes		for (cnt = 0; cnt < lp1->fieldcnt; ++cnt)
4511590Srgrimes			if (F1->joinf != cnt)
4521590Srgrimes				outfield(lp1, cnt, 0);
4531590Srgrimes		for (cnt = 0; cnt < lp2->fieldcnt; ++cnt)
4541590Srgrimes			if (F2->joinf != cnt)
4551590Srgrimes				outfield(lp2, cnt, 0);
4561590Srgrimes	}
4571590Srgrimes	(void)printf("\n");
4581590Srgrimes	if (ferror(stdout))
4591590Srgrimes		err(1, "stdout");
4601590Srgrimes	needsep = 0;
4611590Srgrimes}
4621590Srgrimes
4631590Srgrimesvoid
4641590Srgrimesoutfield(lp, fieldno, out_empty)
4651590Srgrimes	LINE *lp;
4661590Srgrimes	u_long fieldno;
4671590Srgrimes	int out_empty;
4681590Srgrimes{
4691590Srgrimes	if (needsep++)
4701590Srgrimes		(void)printf("%c", *tabchar);
47148566Sbillf	if (!ferror(stdout)) {
47221811Sjoerg		if (lp->fieldcnt <= fieldno || out_empty) {
4731590Srgrimes			if (empty != NULL)
4741590Srgrimes				(void)printf("%s", empty);
4751590Srgrimes		} else {
4761590Srgrimes			if (*lp->fields[fieldno] == '\0')
4771590Srgrimes				return;
4781590Srgrimes			(void)printf("%s", lp->fields[fieldno]);
4791590Srgrimes		}
48048566Sbillf	}
4811590Srgrimes	if (ferror(stdout))
4821590Srgrimes		err(1, "stdout");
4831590Srgrimes}
4841590Srgrimes
4851590Srgrimes/*
4861590Srgrimes * Convert an output list argument "2.1, 1.3, 2.4" into an array of output
4871590Srgrimes * fields.
4881590Srgrimes */
4891590Srgrimesvoid
4901590Srgrimesfieldarg(option)
4911590Srgrimes	char *option;
4921590Srgrimes{
49393193Sjmallett	u_long fieldno, filenum;
4941590Srgrimes	char *end, *token;
4951590Srgrimes
49623694Speter	while ((token = strsep(&option, ", \t")) != NULL) {
4971590Srgrimes		if (*token == '\0')
4981590Srgrimes			continue;
49993193Sjmallett		if (token[0] == '0')
50093193Sjmallett			filenum = fieldno = 0;
50193193Sjmallett		else if ((token[0] == '1' || token[0] == '2') &&
50293193Sjmallett		    token[1] == '.') {
50393193Sjmallett			filenum = token[0] - '0';
50493193Sjmallett			fieldno = strtol(token + 2, &end, 10);
50593193Sjmallett			if (*end)
50693193Sjmallett				errx(1, "malformed -o option field");
50793193Sjmallett			if (fieldno == 0)
50893193Sjmallett				errx(1, "field numbers are 1 based");
50993193Sjmallett			--fieldno;
51093193Sjmallett		} else
5111590Srgrimes			errx(1, "malformed -o option field");
5121590Srgrimes		if (olistcnt == olistalloc) {
5131590Srgrimes			olistalloc += 50;
5141590Srgrimes			if ((olist = realloc(olist,
5151590Srgrimes			    olistalloc * sizeof(OLIST))) == NULL)
5161590Srgrimes				err(1, NULL);
5171590Srgrimes		}
51893193Sjmallett		olist[olistcnt].filenum = filenum;
51993193Sjmallett		olist[olistcnt].fieldno = fieldno;
5201590Srgrimes		++olistcnt;
5211590Srgrimes	}
5221590Srgrimes}
5231590Srgrimes
5241590Srgrimesvoid
5251590Srgrimesobsolete(argv)
5261590Srgrimes	char **argv;
5271590Srgrimes{
52895650Smarkm	unsigned int len;
5291590Srgrimes	char **p, *ap, *t;
5301590Srgrimes
5311590Srgrimes	while ((ap = *++argv) != NULL) {
5321590Srgrimes		/* Return if "--". */
5331590Srgrimes		if (ap[0] == '-' && ap[1] == '-')
5341590Srgrimes			return;
53528423Sjlemon		/* skip if not an option */
53628423Sjlemon		if (ap[0] != '-')
53728423Sjlemon			continue;
5381590Srgrimes		switch (ap[1]) {
5391590Srgrimes		case 'a':
5408874Srgrimes			/*
5411590Srgrimes			 * The original join allowed "-a", which meant the
5421590Srgrimes			 * same as -a1 plus -a2.  POSIX 1003.2, Draft 11.2
5431590Srgrimes			 * only specifies this as "-a 1" and "a -2", so we
5441590Srgrimes			 * have to use another option flag, one that is
5451590Srgrimes			 * unlikely to ever be used or accidentally entered
5461590Srgrimes			 * on the command line.  (Well, we could reallocate
5471590Srgrimes			 * the argv array, but that hardly seems worthwhile.)
5481590Srgrimes			 */
54995096Stjr			if (ap[2] == '\0' && (argv[1] == NULL ||
55095096Stjr			    (strcmp(argv[1], "1") != 0 &&
55195096Stjr			    strcmp(argv[1], "2") != 0))) {
5521590Srgrimes				ap[1] = '\01';
55395096Stjr				warnx("-a option used without an argument; "
55495096Stjr				    "reverting to historical behavior");
55595096Stjr			}
5561590Srgrimes			break;
5571590Srgrimes		case 'j':
5581590Srgrimes			/*
5591590Srgrimes			 * The original join allowed "-j[12] arg" and "-j arg".
5601590Srgrimes			 * Convert the former to "-[12] arg".  Don't convert
5611590Srgrimes			 * the latter since getopt(3) can handle it.
5621590Srgrimes			 */
5631590Srgrimes			switch(ap[2]) {
5641590Srgrimes			case '1':
5651590Srgrimes				if (ap[3] != '\0')
5661590Srgrimes					goto jbad;
5671590Srgrimes				ap[1] = '1';
5681590Srgrimes				ap[2] = '\0';
5691590Srgrimes				break;
5701590Srgrimes			case '2':
5711590Srgrimes				if (ap[3] != '\0')
5721590Srgrimes					goto jbad;
5731590Srgrimes				ap[1] = '2';
5741590Srgrimes				ap[2] = '\0';
5751590Srgrimes				break;
5761590Srgrimes			case '\0':
5771590Srgrimes				break;
5781590Srgrimes			default:
5791590Srgrimesjbad:				errx(1, "illegal option -- %s", ap);
5801590Srgrimes				usage();
5811590Srgrimes			}
5821590Srgrimes			break;
5831590Srgrimes		case 'o':
5841590Srgrimes			/*
5851590Srgrimes			 * The original join allowed "-o arg arg".
5861590Srgrimes			 * Convert to "-o arg -o arg".
5871590Srgrimes			 */
5881590Srgrimes			if (ap[2] != '\0')
5891590Srgrimes				break;
5901590Srgrimes			for (p = argv + 2; *p; ++p) {
59193193Sjmallett				if (p[0][0] == '0' || (p[0][0] != '1' &&
59293193Sjmallett				    p[0][0] != '2' || p[0][1] != '.'))
5931590Srgrimes					break;
5941590Srgrimes				len = strlen(*p);
5951590Srgrimes				if (len - 2 != strspn(*p + 2, "0123456789"))
5961590Srgrimes					break;
5971590Srgrimes				if ((t = malloc(len + 3)) == NULL)
5981590Srgrimes					err(1, NULL);
5991590Srgrimes				t[0] = '-';
6001590Srgrimes				t[1] = 'o';
6011590Srgrimes				memmove(t + 2, *p, len + 1);
6021590Srgrimes				*p = t;
6031590Srgrimes			}
6041590Srgrimes			argv = p - 1;
6051590Srgrimes			break;
6061590Srgrimes		}
6071590Srgrimes	}
6081590Srgrimes}
6091590Srgrimes
6101590Srgrimesvoid
6111590Srgrimesusage()
6121590Srgrimes{
61327422Scharnier	(void)fprintf(stderr, "%s %s\n%s\n",
61427422Scharnier	    "usage: join [-a fileno | -v fileno ] [-e string] [-1 field]",
61527422Scharnier	    "[-2 field]",
61627422Scharnier		"            [-o list] [-t char] file1 file2");
6171590Srgrimes	exit(1);
6181590Srgrimes}
619