1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1980, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32
33#include <sys/types.h>
34
35#include <ctype.h>
36#include <err.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <signal.h>
40#include <string.h>
41#include <unistd.h>
42
43#include "pathnames.h"
44
45/*
46 * xstr - extract and hash strings in a C program
47 *
48 * Bill Joy UCB
49 * November, 1978
50 */
51
52#define	ignore(a)	((void) a)
53
54static off_t	tellpt;
55
56static off_t	mesgpt;
57static char	cstrings[] =	"strings";
58static char	*strings =	cstrings;
59
60static int	cflg;
61static int	vflg;
62static int	readstd;
63
64static char lastchr(char *);
65
66static int fgetNUL(char *, int, FILE *);
67static int istail(char *, char *);
68static int octdigit(char);
69static int xgetc(FILE *);
70
71static off_t hashit(char *, int);
72static off_t yankstr(char **);
73
74static void usage(void) __dead2;
75
76static void flushsh(void);
77static void found(int, off_t, char *);
78static void inithash(void);
79static void onintr(int);
80static void process(const char *);
81static void prstr(char *);
82static void xsdotc(void);
83
84int
85main(int argc, char *argv[])
86{
87	int c;
88	int fdesc;
89
90	while ((c = getopt(argc, argv, "-cv")) != -1)
91		switch (c) {
92		case '-':
93			readstd++;
94			break;
95		case 'c':
96			cflg++;
97			break;
98		case 'v':
99			vflg++;
100			break;
101		default:
102			usage();
103		}
104	argc -= optind;
105	argv += optind;
106
107	if (signal(SIGINT, SIG_IGN) == SIG_DFL)
108		signal(SIGINT, onintr);
109	if (cflg || (argc == 0 && !readstd))
110		inithash();
111	else {
112		strings = strdup(_PATH_TMP);
113		if (strings == NULL)
114			err(1, "strdup() failed");
115		fdesc = mkstemp(strings);
116		if (fdesc == -1)
117			err(1, "Unable to create temporary file");
118		close(fdesc);
119	}
120
121	while (readstd || argc > 0) {
122		if (freopen("x.c", "w", stdout) == NULL)
123			err(1, "x.c");
124		if (!readstd && freopen(argv[0], "r", stdin) == NULL)
125			err(2, "%s", argv[0]);
126		process("x.c");
127		if (readstd == 0)
128			argc--, argv++;
129		else
130			readstd = 0;
131	}
132	flushsh();
133	if (cflg == 0)
134		xsdotc();
135	if (strings[0] == '/')
136		ignore(unlink(strings));
137	exit(0);
138}
139
140static void
141usage(void)
142{
143	fprintf(stderr, "usage: xstr [-cv] [-] [file ...]\n");
144	exit (1);
145}
146
147static char linebuf[BUFSIZ];
148
149static void
150process(const char *name)
151{
152	char *cp;
153	int c;
154	int incomm = 0;
155	int ret;
156
157	printf("extern char\txstr[];\n");
158	for (;;) {
159		if (fgets(linebuf, sizeof linebuf, stdin) == NULL) {
160			if (ferror(stdin))
161				err(3, "%s", name);
162			break;
163		}
164		if (linebuf[0] == '#') {
165			if (linebuf[1] == ' ' && isdigit(linebuf[2]))
166				printf("#line%s", &linebuf[1]);
167			else
168				printf("%s", linebuf);
169			continue;
170		}
171		for (cp = linebuf; (c = *cp++);) switch (c) {
172
173		case '"':
174			if (incomm)
175				goto def;
176			if ((ret = (int) yankstr(&cp)) == -1)
177				goto out;
178			printf("(&xstr[%d])", ret);
179			break;
180
181		case '\'':
182			if (incomm)
183				goto def;
184			putchar(c);
185			if (*cp)
186				putchar(*cp++);
187			break;
188
189		case '/':
190			if (incomm || *cp != '*')
191				goto def;
192			incomm = 1;
193			cp++;
194			printf("/*");
195			continue;
196
197		case '*':
198			if (incomm && *cp == '/') {
199				incomm = 0;
200				cp++;
201				printf("*/");
202				continue;
203			}
204			goto def;
205
206def:
207		default:
208			putchar(c);
209			break;
210		}
211	}
212out:
213	if (ferror(stdout))
214		warn("x.c"), onintr(0);
215}
216
217static off_t
218yankstr(char **cpp)
219{
220	char *cp = *cpp;
221	int c, ch;
222	char dbuf[BUFSIZ];
223	char *dp = dbuf;
224	char *tp;
225	static char tmp[] = "b\bt\tr\rn\nf\f\\\\\"\"";
226
227	while ((c = *cp++)) {
228		if (dp == dbuf + sizeof(dbuf) - 3)
229			errx(1, "message too long");
230		switch (c) {
231
232		case '"':
233			cp++;
234			goto out;
235
236		case '\\':
237			c = *cp++;
238			if (c == 0)
239				break;
240			if (c == '\n') {
241				if (fgets(linebuf, sizeof linebuf, stdin)
242				    == NULL) {
243					if (ferror(stdin))
244						err(3, "x.c");
245					return(-1);
246				}
247				cp = linebuf;
248				continue;
249			}
250			for (tp = tmp; (ch = *tp++); tp++)
251				if (c == ch) {
252					c = *tp;
253					goto gotc;
254				}
255			if (!octdigit(c)) {
256				*dp++ = '\\';
257				break;
258			}
259			c -= '0';
260			if (!octdigit(*cp))
261				break;
262			c <<= 3, c += *cp++ - '0';
263			if (!octdigit(*cp))
264				break;
265			c <<= 3, c += *cp++ - '0';
266			break;
267		}
268gotc:
269		*dp++ = c;
270	}
271out:
272	*cpp = --cp;
273	*dp = 0;
274	return (hashit(dbuf, 1));
275}
276
277static int
278octdigit(char c)
279{
280	return (isdigit(c) && c != '8' && c != '9');
281}
282
283static void
284inithash(void)
285{
286	char buf[BUFSIZ];
287	FILE *mesgread = fopen(strings, "r");
288
289	if (mesgread == NULL)
290		return;
291	for (;;) {
292		mesgpt = tellpt;
293		if (fgetNUL(buf, sizeof buf, mesgread) == 0)
294			break;
295		ignore(hashit(buf, 0));
296	}
297	ignore(fclose(mesgread));
298}
299
300static int
301fgetNUL(char *obuf, int rmdr, FILE *file)
302{
303	int c;
304	char *buf = obuf;
305
306	while (--rmdr > 0 && (c = xgetc(file)) != 0 && c != EOF)
307		*buf++ = c;
308	*buf++ = 0;
309	return ((feof(file) || ferror(file)) ? 0 : 1);
310}
311
312static int
313xgetc(FILE *file)
314{
315
316	tellpt++;
317	return (getc(file));
318}
319
320#define	BUCKETS	128
321
322static struct hash {
323	off_t	hpt;
324	char	*hstr;
325	struct	hash *hnext;
326	short	hnew;
327} bucket[BUCKETS];
328
329static off_t
330hashit(char *str, int new)
331{
332	int i;
333	struct hash *hp, *hp0;
334
335	hp = hp0 = &bucket[lastchr(str) & 0177];
336	while (hp->hnext) {
337		hp = hp->hnext;
338		i = istail(str, hp->hstr);
339		if (i >= 0)
340			return (hp->hpt + i);
341	}
342	if ((hp = (struct hash *) calloc(1, sizeof (*hp))) == NULL)
343		errx(8, "calloc");
344	hp->hpt = mesgpt;
345	if (!(hp->hstr = strdup(str)))
346		err(1, NULL);
347	mesgpt += strlen(hp->hstr) + 1;
348	hp->hnext = hp0->hnext;
349	hp->hnew = new;
350	hp0->hnext = hp;
351	return (hp->hpt);
352}
353
354static void
355flushsh(void)
356{
357	int i;
358	struct hash *hp;
359	FILE *mesgwrit;
360	int old = 0, new = 0;
361
362	for (i = 0; i < BUCKETS; i++)
363		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext)
364			if (hp->hnew)
365				new++;
366			else
367				old++;
368	if (new == 0 && old != 0)
369		return;
370	mesgwrit = fopen(strings, old ? "r+" : "w");
371	if (mesgwrit == NULL)
372		err(4, "%s", strings);
373	for (i = 0; i < BUCKETS; i++)
374		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) {
375			found(hp->hnew, hp->hpt, hp->hstr);
376			if (hp->hnew) {
377				fseek(mesgwrit, hp->hpt, 0);
378				ignore(fwrite(hp->hstr, strlen(hp->hstr) + 1, 1, mesgwrit));
379				if (ferror(mesgwrit))
380					err(4, "%s", strings);
381			}
382		}
383	if (fclose(mesgwrit) == EOF)
384		err(4, "%s", strings);
385}
386
387static void
388found(int new, off_t off, char *str)
389{
390	if (vflg == 0)
391		return;
392	if (!new)
393		fprintf(stderr, "found at %d:", (int) off);
394	else
395		fprintf(stderr, "new at %d:", (int) off);
396	prstr(str);
397	fprintf(stderr, "\n");
398}
399
400static void
401prstr(char *cp)
402{
403	int c;
404
405	while ((c = (*cp++ & 0377)))
406		if (c < ' ')
407			fprintf(stderr, "^%c", c + '`');
408		else if (c == 0177)
409			fprintf(stderr, "^?");
410		else if (c > 0200)
411			fprintf(stderr, "\\%03o", c);
412		else
413			fprintf(stderr, "%c", c);
414}
415
416static void
417xsdotc(void)
418{
419	FILE *strf = fopen(strings, "r");
420	FILE *xdotcf;
421
422	if (strf == NULL)
423		err(5, "%s", strings);
424	xdotcf = fopen("xs.c", "w");
425	if (xdotcf == NULL)
426		err(6, "xs.c");
427	fprintf(xdotcf, "char\txstr[] = {\n");
428	for (;;) {
429		int i, c;
430
431		for (i = 0; i < 8; i++) {
432			c = getc(strf);
433			if (ferror(strf)) {
434				warn("%s", strings);
435				onintr(0);
436			}
437			if (feof(strf)) {
438				fprintf(xdotcf, "\n");
439				goto out;
440			}
441			fprintf(xdotcf, "0x%02x,", c);
442		}
443		fprintf(xdotcf, "\n");
444	}
445out:
446	fprintf(xdotcf, "};\n");
447	ignore(fclose(xdotcf));
448	ignore(fclose(strf));
449}
450
451static char
452lastchr(char *cp)
453{
454
455	while (cp[0] && cp[1])
456		cp++;
457	return (*cp);
458}
459
460static int
461istail(char *str, char *of)
462{
463	int d = strlen(of) - strlen(str);
464
465	if (d < 0 || strcmp(&of[d], str) != 0)
466		return (-1);
467	return (d);
468}
469
470static void
471onintr(int dummy __unused)
472{
473
474	ignore(signal(SIGINT, SIG_IGN));
475	if (strings[0] == '/')
476		ignore(unlink(strings));
477	ignore(unlink("x.c"));
478	ignore(unlink("xs.c"));
479	exit(7);
480}
481