join.c revision 227167
1101206Srwatson/*- 2101206Srwatson * Copyright (c) 1991, 1993, 1994 3101206Srwatson * The Regents of the University of California. All rights reserved. 4101206Srwatson * 5101206Srwatson * This code is derived from software contributed to Berkeley by 6101206Srwatson * Steve Hayman of the Computer Science Department, Indiana University, 7101206Srwatson * Michiro Hikida and David Goodenough. 8101206Srwatson * 9101206Srwatson * Redistribution and use in source and binary forms, with or without 10101206Srwatson * modification, are permitted provided that the following conditions 11101206Srwatson * are met: 12101206Srwatson * 1. Redistributions of source code must retain the above copyright 13101206Srwatson * notice, this list of conditions and the following disclaimer. 14101206Srwatson * 2. Redistributions in binary form must reproduce the above copyright 15101206Srwatson * notice, this list of conditions and the following disclaimer in the 16101206Srwatson * documentation and/or other materials provided with the distribution. 17101206Srwatson * 4. Neither the name of the University nor the names of its contributors 18101206Srwatson * may be used to endorse or promote products derived from this software 19101206Srwatson * without specific prior written permission. 20101206Srwatson * 21101206Srwatson * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22101206Srwatson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23101206Srwatson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24101206Srwatson * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25101206Srwatson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26101206Srwatson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27101206Srwatson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28101206Srwatson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29101206Srwatson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30101206Srwatson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31101206Srwatson * SUCH DAMAGE. 32101206Srwatson */ 33101206Srwatson 34101206Srwatson#ifndef lint 35101206Srwatsonstatic const char copyright[] = 36101206Srwatson"@(#) Copyright (c) 1991, 1993, 1994\n\ 37101206Srwatson The Regents of the University of California. All rights reserved.\n"; 38101206Srwatson#endif /* not lint */ 39101206Srwatson 40101206Srwatson#ifndef lint 41101206Srwatson#if 0 42101206Srwatsonstatic char sccsid[] = "@(#)join.c 8.6 (Berkeley) 5/4/95"; 43101206Srwatson#endif 44101206Srwatson#endif /* not lint */ 45101206Srwatson#include <sys/cdefs.h> 46101206Srwatson__FBSDID("$FreeBSD: head/usr.bin/join/join.c 227167 2011-11-06 08:15:35Z ed $"); 47101206Srwatson 48101206Srwatson#include <sys/param.h> 49101206Srwatson 50101885Srwatson#include <err.h> 51101206Srwatson#include <errno.h> 52101206Srwatson#include <limits.h> 53101206Srwatson#include <locale.h> 54101206Srwatson#include <stdio.h> 55101206Srwatson#include <stdlib.h> 56101206Srwatson#include <string.h> 57101206Srwatson#include <unistd.h> 58101206Srwatson#include <wchar.h> 59101206Srwatson 60101206Srwatson/* 61101206Srwatson * There's a structure per input file which encapsulates the state of the 62101206Srwatson * file. We repeatedly read lines from each file until we've read in all 63101206Srwatson * the consecutive lines from the file with a common join field. Then we 64101206Srwatson * compare the set of lines with an equivalent set from the other file. 65101206Srwatson */ 66101206Srwatsontypedef struct { 67101206Srwatson char *line; /* line */ 68101206Srwatson u_long linealloc; /* line allocated count */ 69101206Srwatson char **fields; /* line field(s) */ 70101206Srwatson u_long fieldcnt; /* line field(s) count */ 71101206Srwatson u_long fieldalloc; /* line field(s) allocated count */ 72101206Srwatson} LINE; 73101206Srwatson 74101206Srwatsontypedef struct { 75101206Srwatson FILE *fp; /* file descriptor */ 76101206Srwatson u_long joinf; /* join field (-1, -2, -j) */ 77101206Srwatson int unpair; /* output unpairable lines (-a) */ 78101206Srwatson u_long number; /* 1 for file 1, 2 for file 2 */ 79101206Srwatson 80101206Srwatson LINE *set; /* set of lines with same field */ 81101206Srwatson int pushbool; /* if pushback is set */ 82101206Srwatson u_long pushback; /* line on the stack */ 83101206Srwatson u_long setcnt; /* set count */ 84101206Srwatson u_long setalloc; /* set allocated count */ 85101206Srwatson} INPUT; 86101206Srwatsonstatic INPUT input1 = { NULL, 0, 0, 1, NULL, 0, 0, 0, 0 }, 87101206Srwatson input2 = { NULL, 0, 0, 2, NULL, 0, 0, 0, 0 }; 88101206Srwatson 89101206Srwatsontypedef struct { 90101206Srwatson u_long filenum; /* file number */ 91101206Srwatson u_long fieldno; /* field number */ 92101206Srwatson} OLIST; 93101206Srwatsonstatic OLIST *olist; /* output field list */ 94101206Srwatsonstatic u_long olistcnt; /* output field list count */ 95101206Srwatsonstatic u_long olistalloc; /* output field allocated count */ 96101206Srwatson 97101206Srwatsonstatic int joinout = 1; /* show lines with matched join fields (-v) */ 98101206Srwatsonstatic int needsep; /* need separator character */ 99101206Srwatsonstatic int spans = 1; /* span multiple delimiters (-t) */ 100101206Srwatsonstatic char *empty; /* empty field replacement string (-e) */ 101101206Srwatsonstatic wchar_t default_tabchar[] = L" \t"; 102101206Srwatsonstatic wchar_t *tabchar = default_tabchar; /* delimiter characters (-t) */ 103101206Srwatson 104101206Srwatsonstatic int cmp(LINE *, u_long, LINE *, u_long); 105101206Srwatsonstatic void fieldarg(char *); 106101206Srwatsonstatic void joinlines(INPUT *, INPUT *); 107101206Srwatsonstatic int mbscoll(const char *, const char *); 108101206Srwatsonstatic char *mbssep(char **, const wchar_t *); 109101206Srwatsonstatic void obsolete(char **); 110101206Srwatsonstatic void outfield(LINE *, u_long, int); 111101206Srwatsonstatic void outoneline(INPUT *, LINE *); 112101206Srwatsonstatic void outtwoline(INPUT *, LINE *, INPUT *, LINE *); 113101206Srwatsonstatic void slurp(INPUT *); 114101206Srwatsonstatic wchar_t *towcs(const char *); 115101206Srwatsonstatic void usage(void); 116101206Srwatson 117101206Srwatsonint 118101206Srwatsonmain(int argc, char *argv[]) 119101206Srwatson{ 120101206Srwatson INPUT *F1, *F2; 121101206Srwatson int aflag, ch, cval, vflag; 122101206Srwatson char *end; 123101206Srwatson 124101206Srwatson setlocale(LC_ALL, ""); 125101206Srwatson 126101206Srwatson F1 = &input1; 127101206Srwatson F2 = &input2; 128101206Srwatson 129101206Srwatson aflag = vflag = 0; 130101206Srwatson obsolete(argv); 131101206Srwatson while ((ch = getopt(argc, argv, "\01a:e:j:1:2:o:t:v:")) != -1) { 132101206Srwatson switch (ch) { 133101206Srwatson case '\01': /* See comment in obsolete(). */ 134101206Srwatson aflag = 1; 135101206Srwatson F1->unpair = F2->unpair = 1; 136101206Srwatson break; 137101206Srwatson case '1': 138101206Srwatson if ((F1->joinf = strtol(optarg, &end, 10)) < 1) 139101206Srwatson errx(1, "-1 option field number less than 1"); 140101206Srwatson if (*end) 141101206Srwatson errx(1, "illegal field number -- %s", optarg); 142101206Srwatson --F1->joinf; 143101206Srwatson break; 144101206Srwatson case '2': 145101206Srwatson if ((F2->joinf = strtol(optarg, &end, 10)) < 1) 146101206Srwatson errx(1, "-2 option field number less than 1"); 147101206Srwatson if (*end) 148101206Srwatson errx(1, "illegal field number -- %s", optarg); 149101206Srwatson --F2->joinf; 150101206Srwatson break; 151101206Srwatson case 'a': 152101206Srwatson aflag = 1; 153101206Srwatson switch(strtol(optarg, &end, 10)) { 154101206Srwatson case 1: 155101206Srwatson F1->unpair = 1; 156101206Srwatson break; 157101206Srwatson case 2: 158101206Srwatson F2->unpair = 1; 159101206Srwatson break; 160101206Srwatson default: 161101206Srwatson errx(1, "-a option file number not 1 or 2"); 162101206Srwatson break; 163101206Srwatson } 164101206Srwatson if (*end) 165101206Srwatson errx(1, "illegal file number -- %s", optarg); 166101206Srwatson break; 167101206Srwatson case 'e': 168101206Srwatson empty = optarg; 169101206Srwatson break; 170101206Srwatson case 'j': 171101206Srwatson if ((F1->joinf = F2->joinf = 172101206Srwatson strtol(optarg, &end, 10)) < 1) 173101206Srwatson errx(1, "-j option field number less than 1"); 174101206Srwatson if (*end) 175101206Srwatson errx(1, "illegal field number -- %s", optarg); 176101206Srwatson --F1->joinf; 177101206Srwatson --F2->joinf; 178101206Srwatson break; 179101206Srwatson case 'o': 180101206Srwatson fieldarg(optarg); 181101206Srwatson break; 182101206Srwatson case 't': 183101206Srwatson spans = 0; 184101206Srwatson if (mbrtowc(&tabchar[0], optarg, MB_LEN_MAX, NULL) != 185101206Srwatson strlen(optarg)) 186101206Srwatson errx(1, "illegal tab character specification"); 187101206Srwatson tabchar[1] = L'\0'; 188101206Srwatson break; 189101206Srwatson case 'v': 190101206Srwatson vflag = 1; 191101206Srwatson joinout = 0; 192101206Srwatson switch (strtol(optarg, &end, 10)) { 193101206Srwatson case 1: 194101206Srwatson F1->unpair = 1; 195101206Srwatson break; 196101206Srwatson case 2: 197101206Srwatson F2->unpair = 1; 198101206Srwatson break; 199101206Srwatson default: 200101206Srwatson errx(1, "-v option file number not 1 or 2"); 201101206Srwatson break; 202101206Srwatson } 203101206Srwatson if (*end) 204101206Srwatson errx(1, "illegal file number -- %s", optarg); 205101206Srwatson break; 206101206Srwatson case '?': 207101206Srwatson default: 208101206Srwatson usage(); 209101206Srwatson } 210101206Srwatson } 211101206Srwatson argc -= optind; 212101206Srwatson argv += optind; 213101206Srwatson 214101206Srwatson if (aflag && vflag) 215101206Srwatson errx(1, "the -a and -v options are mutually exclusive"); 216101206Srwatson 217101206Srwatson if (argc != 2) 218101206Srwatson usage(); 219101206Srwatson 220101206Srwatson /* Open the files; "-" means stdin. */ 221101206Srwatson if (!strcmp(*argv, "-")) 222101206Srwatson F1->fp = stdin; 223101206Srwatson else if ((F1->fp = fopen(*argv, "r")) == NULL) 224101206Srwatson err(1, "%s", *argv); 225101206Srwatson ++argv; 226101206Srwatson if (!strcmp(*argv, "-")) 227101206Srwatson F2->fp = stdin; 228101206Srwatson else if ((F2->fp = fopen(*argv, "r")) == NULL) 229101206Srwatson err(1, "%s", *argv); 230101206Srwatson if (F1->fp == stdin && F2->fp == stdin) 231101206Srwatson errx(1, "only one input file may be stdin"); 232101206Srwatson 233101206Srwatson slurp(F1); 234101206Srwatson slurp(F2); 235101206Srwatson while (F1->setcnt && F2->setcnt) { 236101206Srwatson cval = cmp(F1->set, F1->joinf, F2->set, F2->joinf); 237101206Srwatson if (cval == 0) { 238101206Srwatson /* Oh joy, oh rapture, oh beauty divine! */ 239101206Srwatson if (joinout) 240101206Srwatson joinlines(F1, F2); 241101206Srwatson slurp(F1); 242101206Srwatson slurp(F2); 243101206Srwatson } else if (cval < 0) { 244101206Srwatson /* File 1 takes the lead... */ 245101206Srwatson if (F1->unpair) 246101206Srwatson joinlines(F1, NULL); 247101206Srwatson slurp(F1); 248101206Srwatson } else { 249101206Srwatson /* File 2 takes the lead... */ 250101206Srwatson if (F2->unpair) 251101206Srwatson joinlines(F2, NULL); 252101206Srwatson slurp(F2); 253101206Srwatson } 254101206Srwatson } 255101206Srwatson 256101206Srwatson /* 257101206Srwatson * Now that one of the files is used up, optionally output any 258101206Srwatson * remaining lines from the other file. 259101206Srwatson */ 260101206Srwatson if (F1->unpair) 261101206Srwatson while (F1->setcnt) { 262101206Srwatson joinlines(F1, NULL); 263101206Srwatson slurp(F1); 264101206Srwatson } 265101206Srwatson if (F2->unpair) 266101206Srwatson while (F2->setcnt) { 267101206Srwatson joinlines(F2, NULL); 268101206Srwatson slurp(F2); 269101206Srwatson } 270101206Srwatson exit(0); 271101206Srwatson} 272101206Srwatson 273101206Srwatsonstatic void 274101206Srwatsonslurp(INPUT *F) 275101206Srwatson{ 276101206Srwatson LINE *lp, *lastlp, tmp; 277101206Srwatson size_t len; 278101206Srwatson int cnt; 279101206Srwatson char *bp, *fieldp; 280101206Srwatson 281101206Srwatson /* 282101206Srwatson * Read all of the lines from an input file that have the same 283101206Srwatson * join field. 284101206Srwatson */ 285101206Srwatson F->setcnt = 0; 286101206Srwatson for (lastlp = NULL;; ++F->setcnt) { 287101206Srwatson /* 288101206Srwatson * If we're out of space to hold line structures, allocate 289101206Srwatson * more. Initialize the structure so that we know that this 290101206Srwatson * is new space. 291101206Srwatson */ 292101206Srwatson if (F->setcnt == F->setalloc) { 293101206Srwatson cnt = F->setalloc; 294101206Srwatson F->setalloc += 50; 295101206Srwatson if ((F->set = realloc(F->set, 296101206Srwatson F->setalloc * sizeof(LINE))) == NULL) 297101206Srwatson err(1, NULL); 298101206Srwatson memset(F->set + cnt, 0, 50 * sizeof(LINE)); 299101206Srwatson 300101206Srwatson /* re-set lastlp in case it moved */ 301101206Srwatson if (lastlp != NULL) 302101206Srwatson lastlp = &F->set[F->setcnt - 1]; 303101206Srwatson } 304101206Srwatson 305101206Srwatson /* 306101206Srwatson * Get any pushed back line, else get the next line. Allocate 307101206Srwatson * space as necessary. If taking the line from the stack swap 308101206Srwatson * the two structures so that we don't lose space allocated to 309101206Srwatson * either structure. This could be avoided by doing another 310101206Srwatson * level of indirection, but it's probably okay as is. 311101206Srwatson */ 312101206Srwatson lp = &F->set[F->setcnt]; 313101206Srwatson if (F->setcnt) 314101206Srwatson lastlp = &F->set[F->setcnt - 1]; 315101206Srwatson if (F->pushbool) { 316101206Srwatson tmp = F->set[F->setcnt]; 317101206Srwatson F->set[F->setcnt] = F->set[F->pushback]; 318101206Srwatson F->set[F->pushback] = tmp; 319101206Srwatson F->pushbool = 0; 320101206Srwatson continue; 321101206Srwatson } 322101206Srwatson if ((bp = fgetln(F->fp, &len)) == NULL) 323101206Srwatson return; 324101206Srwatson if (lp->linealloc <= len + 1) { 325101206Srwatson lp->linealloc += MAX(100, len + 1 - lp->linealloc); 326101206Srwatson if ((lp->line = 327101206Srwatson realloc(lp->line, lp->linealloc)) == NULL) 328101206Srwatson err(1, NULL); 329101206Srwatson } 330101206Srwatson memmove(lp->line, bp, len); 331101206Srwatson 332101206Srwatson /* Replace trailing newline, if it exists. */ 333101206Srwatson if (bp[len - 1] == '\n') 334101206Srwatson lp->line[len - 1] = '\0'; 335101206Srwatson else 336101206Srwatson lp->line[len] = '\0'; 337101206Srwatson bp = lp->line; 338101206Srwatson 339101206Srwatson /* Split the line into fields, allocate space as necessary. */ 340101206Srwatson lp->fieldcnt = 0; 341101206Srwatson while ((fieldp = mbssep(&bp, tabchar)) != NULL) { 342101206Srwatson if (spans && *fieldp == '\0') 343101206Srwatson continue; 344101206Srwatson if (lp->fieldcnt == lp->fieldalloc) { 345101206Srwatson lp->fieldalloc += 50; 346101206Srwatson if ((lp->fields = realloc(lp->fields, 347101206Srwatson lp->fieldalloc * sizeof(char *))) == NULL) 348101206Srwatson err(1, NULL); 349101206Srwatson } 350101206Srwatson lp->fields[lp->fieldcnt++] = fieldp; 351101206Srwatson } 352101206Srwatson 353101206Srwatson /* See if the join field value has changed. */ 354101206Srwatson if (lastlp != NULL && cmp(lp, F->joinf, lastlp, F->joinf)) { 355101206Srwatson F->pushbool = 1; 356101206Srwatson F->pushback = F->setcnt; 357101206Srwatson break; 358101206Srwatson } 359101206Srwatson } 360101206Srwatson} 361101206Srwatson 362101206Srwatsonstatic char * 363101206Srwatsonmbssep(char **stringp, const wchar_t *delim) 364101206Srwatson{ 365101206Srwatson char *s, *tok; 366101206Srwatson const wchar_t *spanp; 367101206Srwatson wchar_t c, sc; 368101206Srwatson size_t n; 369101206Srwatson 370101206Srwatson if ((s = *stringp) == NULL) 371101206Srwatson return (NULL); 372101206Srwatson for (tok = s;;) { 373101206Srwatson n = mbrtowc(&c, s, MB_LEN_MAX, NULL); 374101206Srwatson if (n == (size_t)-1 || n == (size_t)-2) 375101206Srwatson errc(1, EILSEQ, NULL); /* XXX */ 376101206Srwatson s += n; 377101206Srwatson spanp = delim; 378101206Srwatson do { 379101206Srwatson if ((sc = *spanp++) == c) { 380101206Srwatson if (c == 0) 381101206Srwatson s = NULL; 382101206Srwatson else 383101206Srwatson s[-n] = '\0'; 384101206Srwatson *stringp = s; 385101206Srwatson return (tok); 386101206Srwatson } 387101206Srwatson } while (sc != 0); 388101206Srwatson } 389101206Srwatson} 390101206Srwatson 391101206Srwatsonstatic int 392101206Srwatsoncmp(LINE *lp1, u_long fieldno1, LINE *lp2, u_long fieldno2) 393101206Srwatson{ 394101206Srwatson if (lp1->fieldcnt <= fieldno1) 395101206Srwatson return (lp2->fieldcnt <= fieldno2 ? 0 : 1); 396101206Srwatson if (lp2->fieldcnt <= fieldno2) 397101206Srwatson return (-1); 398101206Srwatson return (mbscoll(lp1->fields[fieldno1], lp2->fields[fieldno2])); 399101206Srwatson} 400101206Srwatson 401101206Srwatsonstatic int 402101206Srwatsonmbscoll(const char *s1, const char *s2) 403101206Srwatson{ 404101206Srwatson wchar_t *w1, *w2; 405101206Srwatson int ret; 406101206Srwatson 407101206Srwatson if (MB_CUR_MAX == 1) 408101206Srwatson return (strcoll(s1, s2)); 409101206Srwatson if ((w1 = towcs(s1)) == NULL || (w2 = towcs(s2)) == NULL) 410101206Srwatson err(1, NULL); /* XXX */ 411101206Srwatson ret = wcscoll(w1, w2); 412101206Srwatson free(w1); 413101206Srwatson free(w2); 414101206Srwatson return (ret); 415101206Srwatson} 416101206Srwatson 417101206Srwatsonstatic wchar_t * 418101206Srwatsontowcs(const char *s) 419101206Srwatson{ 420101206Srwatson wchar_t *wcs; 421101206Srwatson size_t n; 422101206Srwatson 423101206Srwatson if ((n = mbsrtowcs(NULL, &s, 0, NULL)) == (size_t)-1) 424101206Srwatson return (NULL); 425101206Srwatson if ((wcs = malloc((n + 1) * sizeof(*wcs))) == NULL) 426101206Srwatson return (NULL); 427101206Srwatson mbsrtowcs(wcs, &s, n + 1, NULL); 428101206Srwatson return (wcs); 429101206Srwatson} 430101206Srwatson 431101206Srwatsonstatic void 432101206Srwatsonjoinlines(INPUT *F1, INPUT *F2) 433101206Srwatson{ 434101206Srwatson u_long cnt1, cnt2; 435101206Srwatson 436101206Srwatson /* 437101206Srwatson * Output the results of a join comparison. The output may be from 438101206Srwatson * either file 1 or file 2 (in which case the first argument is the 439101206Srwatson * file from which to output) or from both. 440101206Srwatson */ 441101206Srwatson if (F2 == NULL) { 442101206Srwatson for (cnt1 = 0; cnt1 < F1->setcnt; ++cnt1) 443101206Srwatson outoneline(F1, &F1->set[cnt1]); 444101206Srwatson return; 445101206Srwatson } 446101206Srwatson for (cnt1 = 0; cnt1 < F1->setcnt; ++cnt1) 447101206Srwatson for (cnt2 = 0; cnt2 < F2->setcnt; ++cnt2) 448101206Srwatson outtwoline(F1, &F1->set[cnt1], F2, &F2->set[cnt2]); 449101206Srwatson} 450101206Srwatson 451101206Srwatsonstatic void 452101206Srwatsonoutoneline(INPUT *F, LINE *lp) 453101206Srwatson{ 454101206Srwatson u_long cnt; 455101206Srwatson 456101206Srwatson /* 457101206Srwatson * Output a single line from one of the files, according to the 458101206Srwatson * join rules. This happens when we are writing unmatched single 459101206Srwatson * lines. Output empty fields in the right places. 460101206Srwatson */ 461101206Srwatson if (olist) 462101206Srwatson for (cnt = 0; cnt < olistcnt; ++cnt) { 463101206Srwatson if (olist[cnt].filenum == (unsigned)F->number) 464101206Srwatson outfield(lp, olist[cnt].fieldno, 0); 465101206Srwatson else if (olist[cnt].filenum == 0) 466101206Srwatson outfield(lp, F->joinf, 0); 467101206Srwatson else 468101206Srwatson outfield(lp, 0, 1); 469101206Srwatson } 470101206Srwatson else 471101206Srwatson for (cnt = 0; cnt < lp->fieldcnt; ++cnt) 472101206Srwatson outfield(lp, cnt, 0); 473101206Srwatson (void)printf("\n"); 474101206Srwatson if (ferror(stdout)) 475101206Srwatson err(1, "stdout"); 476101206Srwatson needsep = 0; 477101206Srwatson} 478101206Srwatson 479101206Srwatsonstatic void 480101206Srwatsonouttwoline(INPUT *F1, LINE *lp1, INPUT *F2, LINE *lp2) 481101206Srwatson{ 482101206Srwatson u_long cnt; 483101206Srwatson 484101206Srwatson /* Output a pair of lines according to the join list (if any). */ 485101206Srwatson if (olist) 486101206Srwatson for (cnt = 0; cnt < olistcnt; ++cnt) 487101206Srwatson if (olist[cnt].filenum == 0) { 488101206Srwatson if (lp1->fieldcnt >= F1->joinf) 489101206Srwatson outfield(lp1, F1->joinf, 0); 490101206Srwatson else 491101206Srwatson outfield(lp2, F2->joinf, 0); 492101206Srwatson } else if (olist[cnt].filenum == 1) 493101206Srwatson outfield(lp1, olist[cnt].fieldno, 0); 494101206Srwatson else /* if (olist[cnt].filenum == 2) */ 495101206Srwatson outfield(lp2, olist[cnt].fieldno, 0); 496101206Srwatson else { 497101206Srwatson /* 498101206Srwatson * Output the join field, then the remaining fields from F1 499101206Srwatson * and F2. 500101206Srwatson */ 501101206Srwatson outfield(lp1, F1->joinf, 0); 502101206Srwatson for (cnt = 0; cnt < lp1->fieldcnt; ++cnt) 503101206Srwatson if (F1->joinf != cnt) 504101206Srwatson outfield(lp1, cnt, 0); 505101206Srwatson for (cnt = 0; cnt < lp2->fieldcnt; ++cnt) 506101206Srwatson if (F2->joinf != cnt) 507101206Srwatson outfield(lp2, cnt, 0); 508101206Srwatson } 509101206Srwatson (void)printf("\n"); 510101206Srwatson if (ferror(stdout)) 511101206Srwatson err(1, "stdout"); 512101206Srwatson needsep = 0; 513101206Srwatson} 514101206Srwatson 515101206Srwatsonstatic void 516101206Srwatsonoutfield(LINE *lp, u_long fieldno, int out_empty) 517101206Srwatson{ 518101206Srwatson if (needsep++) 519101206Srwatson (void)printf("%lc", *tabchar); 520101206Srwatson if (!ferror(stdout)) { 521101206Srwatson if (lp->fieldcnt <= fieldno || out_empty) { 522101206Srwatson if (empty != NULL) 523101206Srwatson (void)printf("%s", empty); 524101206Srwatson } else { 525101206Srwatson if (*lp->fields[fieldno] == '\0') 526101206Srwatson return; 527101206Srwatson (void)printf("%s", lp->fields[fieldno]); 528101206Srwatson } 529101206Srwatson } 530101206Srwatson if (ferror(stdout)) 531101206Srwatson err(1, "stdout"); 532101206Srwatson} 533101206Srwatson 534101206Srwatson/* 535101206Srwatson * Convert an output list argument "2.1, 1.3, 2.4" into an array of output 536101206Srwatson * fields. 537101206Srwatson */ 538101206Srwatsonstatic void 539101206Srwatsonfieldarg(char *option) 540101206Srwatson{ 541101206Srwatson u_long fieldno, filenum; 542101206Srwatson char *end, *token; 543101206Srwatson 544101206Srwatson while ((token = strsep(&option, ", \t")) != NULL) { 545101206Srwatson if (*token == '\0') 546101206Srwatson continue; 547101206Srwatson if (token[0] == '0') 548101206Srwatson filenum = fieldno = 0; 549101206Srwatson else if ((token[0] == '1' || token[0] == '2') && 550101206Srwatson token[1] == '.') { 551101206Srwatson filenum = token[0] - '0'; 552101206Srwatson fieldno = strtol(token + 2, &end, 10); 553101206Srwatson if (*end) 554101206Srwatson errx(1, "malformed -o option field"); 555101206Srwatson if (fieldno == 0) 556101206Srwatson errx(1, "field numbers are 1 based"); 557101206Srwatson --fieldno; 558101206Srwatson } else 559101206Srwatson errx(1, "malformed -o option field"); 560101206Srwatson if (olistcnt == olistalloc) { 561101206Srwatson olistalloc += 50; 562101206Srwatson if ((olist = realloc(olist, 563101206Srwatson olistalloc * sizeof(OLIST))) == NULL) 564101206Srwatson err(1, NULL); 565101206Srwatson } 566101206Srwatson olist[olistcnt].filenum = filenum; 567101206Srwatson olist[olistcnt].fieldno = fieldno; 568101206Srwatson ++olistcnt; 569101206Srwatson } 570101206Srwatson} 571101206Srwatson 572101206Srwatsonstatic void 573101206Srwatsonobsolete(char **argv) 574101206Srwatson{ 575101206Srwatson size_t len; 576101206Srwatson char **p, *ap, *t; 577101206Srwatson 578101206Srwatson while ((ap = *++argv) != NULL) { 579101206Srwatson /* Return if "--". */ 580101206Srwatson if (ap[0] == '-' && ap[1] == '-') 581101206Srwatson return; 582101206Srwatson /* skip if not an option */ 583101206Srwatson if (ap[0] != '-') 584101206Srwatson continue; 585101206Srwatson switch (ap[1]) { 586101206Srwatson case 'a': 587101206Srwatson /* 588101206Srwatson * The original join allowed "-a", which meant the 589101206Srwatson * same as -a1 plus -a2. POSIX 1003.2, Draft 11.2 590101206Srwatson * only specifies this as "-a 1" and "a -2", so we 591101206Srwatson * have to use another option flag, one that is 592101206Srwatson * unlikely to ever be used or accidentally entered 593101206Srwatson * on the command line. (Well, we could reallocate 594101206Srwatson * the argv array, but that hardly seems worthwhile.) 595101206Srwatson */ 596101206Srwatson if (ap[2] == '\0' && (argv[1] == NULL || 597101206Srwatson (strcmp(argv[1], "1") != 0 && 598101206Srwatson strcmp(argv[1], "2") != 0))) { 599101206Srwatson ap[1] = '\01'; 600101206Srwatson warnx("-a option used without an argument; " 601101206Srwatson "reverting to historical behavior"); 602101206Srwatson } 603101206Srwatson break; 604101206Srwatson case 'j': 605101206Srwatson /* 606101206Srwatson * The original join allowed "-j[12] arg" and "-j arg". 607101206Srwatson * Convert the former to "-[12] arg". Don't convert 608101206Srwatson * the latter since getopt(3) can handle it. 609101206Srwatson */ 610101206Srwatson switch(ap[2]) { 611101206Srwatson case '1': 612101206Srwatson if (ap[3] != '\0') 613101206Srwatson goto jbad; 614101206Srwatson ap[1] = '1'; 615101206Srwatson ap[2] = '\0'; 616101206Srwatson break; 617101206Srwatson case '2': 618101206Srwatson if (ap[3] != '\0') 619101206Srwatson goto jbad; 620101206Srwatson ap[1] = '2'; 621101206Srwatson ap[2] = '\0'; 622101206Srwatson break; 623101206Srwatson case '\0': 624101206Srwatson break; 625101206Srwatson default: 626101206Srwatsonjbad: errx(1, "illegal option -- %s", ap); 627101206Srwatson usage(); 628101206Srwatson } 629101206Srwatson break; 630101206Srwatson case 'o': 631101206Srwatson /* 632101206Srwatson * The original join allowed "-o arg arg". 633101206Srwatson * Convert to "-o arg -o arg". 634101206Srwatson */ 635101206Srwatson if (ap[2] != '\0') 636101206Srwatson break; 637101206Srwatson for (p = argv + 2; *p; ++p) { 638101206Srwatson if (p[0][0] == '0' || ((p[0][0] != '1' && 639101206Srwatson p[0][0] != '2') || p[0][1] != '.')) 640101206Srwatson break; 641101206Srwatson len = strlen(*p); 642101206Srwatson if (len - 2 != strspn(*p + 2, "0123456789")) 643101206Srwatson break; 644101206Srwatson if ((t = malloc(len + 3)) == NULL) 645101206Srwatson err(1, NULL); 646101206Srwatson t[0] = '-'; 647101206Srwatson t[1] = 'o'; 648101206Srwatson memmove(t + 2, *p, len + 1); 649101206Srwatson *p = t; 650101206Srwatson } 651101206Srwatson argv = p - 1; 652101206Srwatson break; 653101206Srwatson } 654101206Srwatson } 655101206Srwatson} 656101206Srwatson 657101206Srwatsonstatic void 658101206Srwatsonusage(void) 659101206Srwatson{ 660101206Srwatson (void)fprintf(stderr, "%s %s\n%s\n", 661101206Srwatson "usage: join [-a fileno | -v fileno ] [-e string] [-1 field]", 662101206Srwatson "[-2 field]", 663101206Srwatson " [-o list] [-t char] file1 file2"); 664101206Srwatson exit(1); 665101206Srwatson} 666101206Srwatson