1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 *
25 * sgsmsg generates several message files from an input template file.  Messages
26 * are constructed for use with gettext(3i) - the default - or catgets(3c).  The
27 * files generate are:
28 *
29 * msg.h	a header file containing definitions for each message.  The -h
30 *		option triggers the creation of these definitions and specifies
31 *		the name to use.
32 *
33 * msg.c	a data array of message strings.  The msg.h definitions are
34 *		offsets into this array.  The -d option triggers the creation of
35 *		these definitions and specifies the name to use.
36 *
37 * messages	a message file suitable for catgets(3c) or gettext(3i) use.  The
38 *		-m option triggers this output and specifies the filename to be
39 *		used.
40 *
41 * The template file is processed based on the first character of each line:
42 *
43 * # or $	entries are copied (as is) to the message file (messages).
44 *
45 * @ token(s)	entries are translated.  Two translations are possible dependent
46 *		on whether one or more tokens are supplied:
47 *
48 *		A single token is interpreted as one of two reserved message
49 *		output indicators, or a message identifier.  The reserved output
50 *		indicator _START_ enables output to the message file - Note that
51 *		the occurance of any other @ token will also enable message
52 *		output.  The reserved output indicator _END_ disables output to
53 *		the message file.  The use of these two indicators provides for
54 *		only those message strings that require translation to be output
55 *		to the message file.
56 *
57 *		Besides the reserved output indicators, a single token is taken
58 *		to be a message identifier which will be subsituted for a
59 *		`setid' for catgets(3c) output, or a `domain' name for
60 *		gettext(3i) output.  This value is determine by substituting the
61 *		token for the associated definition found in the message
62 *		identifier file (specified with the -i option).
63 *
64 *		Multiple tokens are taken to be a message definition followed by
65 *		the associated message string.  The message string is copied to
66 *		the data array being built in msg.c.  The index into this array
67 *		becomes the `message' identifier created in the msg.h file.
68 */
69#pragma ident	"%Z%%M%	%I%	%E% SMI"
70
71#include	<fcntl.h>
72#include	<stdlib.h>
73#include	<stdio.h>
74#include	<unistd.h>
75#include	<limits.h>
76#include	<string.h>
77#include	<ctype.h>
78#include	<errno.h>
79#include	<sys/param.h>
80
81#include	<sgs.h>
82#include	<_string_table.h>
83
84/*
85 * Define any error message strings.
86 */
87static const char
88	* Errmsg_malt =	"sgsmsg: file %s: line %d: malformed input "
89			"at line\n",
90	* Errmsg_nmem =	"sgsmsg: memory allocation failed: %s\n",
91	* Errmsg_opne =	"sgsmsg: file %s: open failed: %s\n",
92	* Errmsg_wrte =	"sgsmsg: file %s: write failed: %s\n",
93	* Errmsg_read =	"sgsmsg: file %s: read failed %s\n",
94	* Errmsg_stnw =	"sgsmsg: st_new(): failed: %s\n",
95	* Errmsg_stin =	"sgsmsg: Str_tbl insert failed: %s\n",
96	* Errmsg_mnfn =	"sgsmsg: message not found in Str_tbl: %s\n",
97	* Errmsg_use  =	"usage: sgsmsg [-clv] [-d mesgdata] [-h mesgdefs] "
98			"[-m messages] [-n name] [-i mesgident] file ...\n";
99
100/*
101 * Define all output filenames and associated descriptors.
102 */
103static FILE	*fddefs, *fddata, *fdmsgs, *fdmids, *fddesc;
104static char	*fldefs, *fldata, *flmsgs, *flmids, *fldesc;
105static FILE	*fdlint;
106static char	fllint[MAXPATHLEN];
107
108static uint_t		vflag;	/* verbose flag */
109static Str_tbl		*stp;	/* string table */
110
111/*
112 * Define any default strings.
113 */
114static const char
115	*nmlint =	"/tmp/sgsmsg.lint",
116	*interface =	"sgs_msg",
117	*start =	"_START_",
118	*end =		"_END_";
119
120/*
121 * Define any default flags and data items.
122 */
123static int	cflag = 0, lflag = 0, prtmsgs = 0, line, ptr = 1, msgid = 0;
124static char	*mesgid = 0, *setid = 0, *domain = 0;
125
126typedef struct msg_string {
127	char			*ms_defn;
128	char			*ms_message;
129	struct msg_string	*ms_next;
130} msg_string;
131
132static msg_string	*msg_head;
133static msg_string	*msg_tail;
134
135/*
136 * message_append() is responsible for both inserting strings into
137 * the master Str_tbl as well as maintaining a list of the
138 * DEFINITIONS associated with each string.
139 *
140 * The list of strings is traversed at the end once the full
141 * Str_tbl has been constructed - and string offsets can be
142 * assigned.
143 */
144static void
145message_append(const char *defn, const char *message)
146{
147	msg_string	*msg;
148	if ((msg = calloc(sizeof (msg_string), 1)) == 0) {
149		(void) fprintf(stderr, Errmsg_nmem, strerror(errno));
150		exit(1);
151	}
152
153	/*
154	 * Initialize the string table.
155	 */
156	if ((stp == 0) && ((stp = st_new(FLG_STNEW_COMPRESS)) == NULL)) {
157		(void) fprintf(stderr, Errmsg_stnw, strerror(errno));
158		exit(1);
159	}
160
161
162	if ((msg->ms_defn = strdup(defn)) == 0) {
163		(void) fprintf(stderr, Errmsg_nmem, strerror(errno));
164		exit(1);
165	}
166	if ((msg->ms_message = strdup(message)) == 0) {
167		(void) fprintf(stderr, Errmsg_nmem, strerror(errno));
168		exit(1);
169	}
170
171	if (st_insert(stp, msg->ms_message) == -1) {
172		(void) fprintf(stderr, Errmsg_stin,
173		    message);
174		exit(1);
175	}
176
177	if (msg_head == 0) {
178		msg_head = msg_tail = msg;
179		return;
180	}
181	msg_tail->ms_next = msg;
182	msg_tail = msg;
183}
184
185/*
186 * Initialize a setid value.  Given a setid definition determine its numeric
187 * value from the specified message identifier file (specified with the -i
188 * option).  Return a pointer to the numeric string.
189 */
190static int
191getmesgid(char *id)
192{
193	char	*buffer, *token, *_mesgid = 0, *_setid = 0, *_domain = 0;
194
195	/*
196	 * If we're being asked to interpret a message id but the user didn't
197	 * provide the required message identifier file (-i option) we're in
198	 * trouble.
199	 */
200	if (flmids == 0) {
201		(void) fprintf(stderr, "sgsmsg: file %s: line %d: mesgid %s: "
202		    "unable to process mesgid\n\t"
203		    "no message identifier file specified "
204		    "(see -i option)\n", fldesc, line, id);
205		return (1);
206	}
207
208	if ((buffer = malloc(LINE_MAX)) == 0) {
209		(void) fprintf(stderr, Errmsg_nmem, strerror(errno));
210		return (1);
211	}
212
213	/*
214	 * Read the message identifier file and locate the required mesgid.
215	 */
216	rewind(fdmids);
217	while (fgets(buffer, LINE_MAX, fdmids) != NULL) {
218		if ((token = strstr(buffer, id)) == NULL)
219			continue;
220
221		/*
222		 * Establish individual strings for the mesgid, setid and domain
223		 * values.
224		 */
225		_mesgid = token;
226		while (!(isspace(*token)))
227			token++;
228		*token++ = 0;
229
230		while (isspace(*token))
231			token++;
232		_setid = token;
233		while (!(isspace(*token)))
234			token++;
235		*token++ = 0;
236
237		while (isspace(*token))
238			token++;
239		_domain = token;
240		while (!(isspace(*token)))
241			token++;
242		*token = 0;
243		break;
244	}
245
246	/*
247	 * Did we find a match?
248	 */
249	if ((_mesgid == 0) || (_setid == 0) || (_domain == 0)) {
250		(void) fprintf(stderr, "sgsmsg: file %s: line %d: mesgid %s: "
251		    "unable to process mesgid\n\t"
252		    "identifier does not exist in file %s\n",
253		    fldesc, line, id, flmids);
254		return (1);
255	}
256
257	/*
258	 * Have we been here before?
259	 */
260	if (mesgid) {
261		if (cflag == 1) {
262			/*
263			 * If we're being asked to process more than one mesgid
264			 * warn the user that only one mesgid can be used for
265			 * the catgets(3c) call.
266			 */
267			(void) fprintf(stderr, "sgsmsg: file %s: line %d: "
268			    "setid %s: warning: multiple mesgids "
269			    "encountered\n\t"
270			    "last setting used in messaging code\n",
271			    fldesc, line, id);
272		}
273	}
274
275	mesgid = _mesgid;
276	setid = _setid;
277	domain = _domain;
278
279	/*
280	 * Generate the message file output (insure output flag is enabled).
281	 */
282	if (prtmsgs != -1)
283		prtmsgs = 1;
284	if (fdmsgs && (prtmsgs == 1)) {
285		if (cflag == 1) {
286			if (fprintf(fdmsgs, "$quote \"\n$set %s\n",
287			    setid) < 0) {
288				(void) fprintf(stderr, Errmsg_wrte, flmsgs,
289				    strerror(errno));
290				return (1);
291			}
292		} else {
293			if (fprintf(fdmsgs, "domain\t\"%s\"\n", domain) < 0) {
294				(void) fprintf(stderr, Errmsg_wrte, flmsgs,
295				    strerror(errno));
296				return (1);
297			}
298		}
299	}
300
301	/*
302	 * For catgets(3c) output generate a setid definition in the message
303	 * definition file.
304	 */
305	if (fddefs && (cflag == 1) &&
306	    (fprintf(fddefs, "#define\t%s\t%s\n\n", mesgid, setid) < 0)) {
307		(void) fprintf(stderr, Errmsg_wrte, fldefs, strerror(errno));
308		return (1);
309	}
310
311	return (0);
312}
313
314/*
315 * Dump contents of String Table to standard out
316 */
317static void
318dump_stringtab(Str_tbl *stp)
319{
320	uint_t	cnt;
321
322	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
323		(void) printf("string table full size: %ld: uncompressed\n",
324		    stp->st_fullstrsize);
325		return;
326	}
327
328	(void) printf("string table full size: %ld compressed down to: %ld\n\n",
329	    stp->st_fullstrsize, stp->st_strsize);
330	(void) printf("string table compression information [%d buckets]:\n",
331	    stp->st_hbckcnt);
332
333	for (cnt = 0; cnt < stp->st_hbckcnt; cnt++) {
334		Str_hash	*sthash = stp->st_hashbcks[cnt];
335
336		if (sthash == 0)
337			continue;
338
339		(void) printf(" bucket: [%d]\n", cnt);
340
341		while (sthash) {
342			size_t	stroff = sthash->hi_mstr->sm_strlen -
343			    sthash->hi_strlen;
344
345			if (stroff == 0) {
346				(void) printf("  [%ld]: '%s'  <master>\n",
347				    sthash->hi_refcnt, sthash->hi_mstr->sm_str);
348			} else {
349				(void) printf("  [%ld]: '%s'  <suffix of: "
350				    "'%s'>\n", sthash->hi_refcnt,
351				    &sthash->hi_mstr->sm_str[stroff],
352				    sthash->hi_mstr->sm_str);
353			}
354			sthash = sthash->hi_next;
355		}
356	}
357}
358
359/*
360 * Initialize the message definition header file stream.
361 */
362static int
363init_defs(void)
364{
365	static char	guard[FILENAME_MAX + 6];
366	char		*optr;
367	const char	*iptr, *_ptr;
368
369	/*
370	 * Establish a header guard name using the files basename.
371	 */
372	for (iptr = 0, _ptr = fldefs; _ptr && (*_ptr != '\0'); _ptr++) {
373		if (*_ptr == '/')
374			iptr = _ptr + 1;
375	}
376	if (iptr == 0)
377		iptr = fldefs;
378
379	optr = guard;
380	for (*optr++ = '_'; iptr && (*iptr != '\0'); iptr++, optr++) {
381		if (*iptr == '.') {
382			*optr++ = '_';
383			*optr++ = 'D';
384			*optr++ = 'O';
385			*optr++ = 'T';
386			*optr = '_';
387		} else
388			*optr = toupper(*iptr);
389	}
390
391	if (fprintf(fddefs, "#ifndef\t%s\n#define\t%s\n\n", guard, guard) < 0) {
392		(void) fprintf(stderr, Errmsg_wrte, fldefs, strerror(errno));
393		return (1);
394	}
395
396	if (fprintf(fddefs, "#ifndef\t__lint\n\n") < 0) {
397		(void) fprintf(stderr, Errmsg_wrte, fldefs, strerror(errno));
398		return (1);
399	}
400
401	/*
402	 * add "typedef int	Msg;"
403	 */
404	if (fprintf(fddefs, "typedef int\tMsg;\n\n") < 0) {
405		(void) fprintf(stderr, Errmsg_wrte, fldefs, strerror(errno));
406		return (1);
407	}
408
409	/*
410	 * If the associated data array is global define a prototype.
411	 * Define a macro to access the array elements.
412	 */
413	if (lflag == 0) {
414		if (fprintf(fddefs, "extern\tconst char\t__%s[];\n\n",
415		    interface) < 0) {
416			(void) fprintf(stderr, Errmsg_wrte, fldefs,
417			    strerror(errno));
418			return (1);
419		}
420	}
421	if (fprintf(fddefs, "#define\tMSG_ORIG(x)\t&__%s[x]\n\n",
422	    interface) < 0) {
423		(void) fprintf(stderr, Errmsg_wrte, fldefs, strerror(errno));
424		return (1);
425	}
426
427	/*
428	 * Generate a prototype to access the associated data array.
429	 */
430	if (fprintf(fddefs, "extern\tconst char *\t_%s(Msg);\n\n",
431	    interface) < 0) {
432		(void) fprintf(stderr, Errmsg_wrte, fldefs, strerror(errno));
433		return (1);
434	}
435	if (fprintf(fddefs, "#define\tMSG_INTL(x)\t_%s(x)\n\n",
436	    interface) < 0) {
437		(void) fprintf(stderr, Errmsg_wrte, fldefs, strerror(errno));
438		return (1);
439	}
440
441	return (0);
442}
443
444
445/*
446 * Finish the message definition header file.
447 */
448static int
449fini_defs(void)
450{
451	if (fprintf(fddefs, "\n#else\t/* __lint */\n\n") < 0) {
452		(void) fprintf(stderr, Errmsg_wrte, fldefs, strerror(errno));
453		return (1);
454	}
455
456	/*
457	 * When __lint is defined, Msg is a char *.  This allows lint to
458	 * check our format strings against it's arguments.
459	 */
460	if (fprintf(fddefs, "\ntypedef char *\tMsg;\n\n") < 0) {
461		(void) fprintf(stderr, Errmsg_wrte, fldefs, strerror(errno));
462		return (1);
463	}
464
465	if (fprintf(fddefs, "extern\tconst char *\t_%s(Msg);\n\n",
466	    interface) < 0) {
467		(void) fprintf(stderr, Errmsg_wrte, fldefs, strerror(errno));
468		return (1);
469	}
470
471	if (lflag == 0) {
472		if (fprintf(fddefs, "extern\tconst char\t__%s[];\n\n",
473		    interface) < 0) {
474			(void) fprintf(stderr, Errmsg_wrte, fldefs,
475			    strerror(errno));
476			return (1);
477		}
478	}
479
480	if (fprintf(fddefs,
481	    "#define MSG_ORIG(x)\tx\n#define MSG_INTL(x)\tx\n") < 0) {
482		(void) fprintf(stderr, Errmsg_wrte, fldefs, strerror(errno));
483		return (1);
484	}
485
486	/*
487	 * Copy the temporary lint defs file into the new header.
488	 */
489	if (fdlint) {
490		long	size;
491		char	*buf;
492
493		size = ftell(fdlint);
494		(void) rewind(fdlint);
495
496		if ((buf = malloc(size)) == 0) {
497			(void) fprintf(stderr, Errmsg_nmem, strerror(errno));
498			return (1);
499		}
500		if (fread(buf, size, 1, fdlint) == 0) {
501			(void) fprintf(stderr, Errmsg_read, fllint,
502			    strerror(errno));
503			return (1);
504		}
505		if (fwrite(buf, size, 1, fddefs) == 0) {
506			(void) fprintf(stderr, Errmsg_wrte, fldefs,
507			    strerror(errno));
508			return (1);
509		}
510		(void) free(buf);
511	}
512
513	if (fprintf(fddefs, "\n#endif\t/* __lint */\n") < 0) {
514		(void) fprintf(stderr, Errmsg_wrte, fldefs, strerror(errno));
515		return (1);
516	}
517
518	if (fprintf(fddefs, "\n#endif\n") < 0) {
519		(void) fprintf(stderr, Errmsg_wrte, fldefs, strerror(errno));
520		return (1);
521	}
522
523	return (0);
524}
525
526/*
527 * The entire messaging file has been scanned - and all strings have been
528 * inserted into the string_table.  We can now walk the message queue
529 * and create the '#define <DEFN>' for each string - with the strings
530 * assigned offset into the string_table.
531 */
532static int
533output_defs(void)
534{
535	msg_string	*msg;
536	size_t		stbufsize;
537	char		*stbuf;
538
539	stbufsize = st_getstrtab_sz(stp);
540	if ((stbuf = malloc(stbufsize)) == 0) {
541		(void) fprintf(stderr, Errmsg_nmem, strerror(errno));
542		exit(1);
543	}
544	(void) st_setstrbuf(stp, stbuf, stbufsize);
545	for (msg = msg_head; msg; msg = msg->ms_next) {
546		size_t	stoff;
547		if ((st_setstring(stp, msg->ms_message, &stoff)) == -1) {
548			(void) fprintf(stderr, Errmsg_mnfn, msg->ms_message);
549			return (1);
550		}
551		if (fprintf(fddefs, "\n#define\t%s\t%ld\n",
552		    msg->ms_defn, stoff) < 0) {
553			(void) fprintf(stderr, Errmsg_wrte,
554			    fldefs, strerror(errno));
555			return (1);
556		}
557		if (fddefs && fprintf(fddefs, "#define\t%s_SIZE\t%d\n",
558		    msg->ms_defn, strlen(msg->ms_message)) < 0) {
559			(void) fprintf(stderr, Errmsg_wrte,
560			    fldefs, strerror(errno));
561			return (1);
562		}
563	}
564	return (0);
565}
566
567
568/*
569 * Finish off the data structure definition.
570 */
571static int
572output_data(void)
573{
574	size_t		stbufsize;
575	size_t		ndx;
576	size_t		column = 1;
577	const char	*stbuf;
578	const char	*fmtstr;
579
580	stbufsize = st_getstrtab_sz(stp);
581	stbuf = st_getstrbuf(stp);
582
583	assert(stbuf);
584
585	/*
586	 * Determine from the local flag whether the data declaration should
587	 * be static.
588	 */
589	if (lflag)
590		fmtstr = (const char *)"static const";
591	else
592		fmtstr = (const char *)"const";
593
594	if (fprintf(fddata, "\n%s char __%s[%ld] = { ",
595	    fmtstr, interface, stbufsize) < 0) {
596		(void) fprintf(stderr, Errmsg_wrte, fldata, strerror(errno));
597		return (1);
598	}
599
600	for (ndx = 0; ndx < (stbufsize - 1); ndx++) {
601		if (column == 1) {
602			if (fddata && fprintf(fddata,
603			    "\n/* %4ld */ 0x%.2x,", ndx,
604			    (unsigned char)stbuf[ndx]) < 0) {
605				(void) fprintf(stderr, Errmsg_wrte,
606				    fldata, strerror(errno));
607				return (1);
608			}
609		} else {
610			if (fddata && fprintf(fddata, "  0x%.2x,",
611			    (unsigned char)stbuf[ndx]) < 0) {
612				(void) fprintf(stderr, Errmsg_wrte,
613				    fldata, strerror(errno));
614				return (1);
615			}
616		}
617
618		if (column++ == 10)
619			column = 1;
620	}
621
622	if (column == 1)
623		fmtstr = "\n\t0x%.2x };\n";
624	else
625		fmtstr = "  0x%.2x };\n";
626
627	if (fprintf(fddata, fmtstr, (unsigned char)stbuf[stbufsize - 1]) < 0) {
628		(void) fprintf(stderr, Errmsg_wrte, fldata, strerror(errno));
629		return (1);
630	}
631
632	return (0);
633}
634
635static int
636file()
637{
638	char	buffer[LINE_MAX], * token;
639	uint_t	bufsize;
640	char	*token_buffer;
641	int	escape = 0;
642
643	if ((token_buffer = malloc(LINE_MAX)) == 0) {
644		(void) fprintf(stderr, Errmsg_nmem, strerror(errno));
645		return (1);
646	}
647	bufsize = LINE_MAX;
648
649	line = 1;
650
651	while ((token = fgets(buffer, LINE_MAX, fddesc)) != NULL) {
652		char	defn[PATH_MAX], * _defn, * str;
653		int	len;
654
655		switch (*token) {
656		case '#':
657		case '$':
658			if (escape) {
659				(void) fprintf(stderr, Errmsg_malt, fldesc,
660				    line);
661				return (1);
662			}
663
664			/*
665			 * If a msgid has been output a msgstr must follow
666			 * before we digest the new token.  A msgid is only set
667			 * if fdmsgs is in use.
668			 */
669			if (msgid) {
670				msgid = 0;
671				if (fprintf(fdmsgs, "msgstr\t\"\"\n") < 0) {
672					(void) fprintf(stderr, Errmsg_wrte,
673					    flmsgs, strerror(errno));
674					return (1);
675				}
676			}
677
678			/*
679			 * Pass lines directly through to the output message
680			 * file.
681			 */
682			if (fdmsgs && (prtmsgs == 1)) {
683				char	comment;
684
685				if (cflag == 0)
686					comment = '#';
687				else
688					comment = '$';
689
690				if (fprintf(fdmsgs, "%c%s", comment,
691				    ++token) < 0) {
692					(void) fprintf(stderr, Errmsg_wrte,
693					    flmsgs, strerror(errno));
694					return (1);
695				}
696			}
697			break;
698
699		case '@':
700			if (escape) {
701				(void) fprintf(stderr, Errmsg_malt, fldesc,
702				    line);
703				return (1);
704			}
705
706			/*
707			 * If a msgid has been output a msgstr must follow
708			 * before we digest the new token.
709			 */
710			if (msgid) {
711				msgid = 0;
712				if (fprintf(fdmsgs, "msgstr\t\"\"\n") < 0) {
713					(void) fprintf(stderr, Errmsg_wrte,
714					    flmsgs, strerror(errno));
715					return (1);
716				}
717			}
718
719			/*
720			 * Determine whether we have one or more tokens.
721			 */
722			token++;
723			while (isspace(*token))		/* rid any whitespace */
724				token++;
725			_defn = token;			/* definition start */
726			while (!(isspace(*token)))
727				token++;
728			*token++ = 0;
729
730			while (isspace(*token))		/* rid any whitespace */
731				token++;
732
733			/*
734			 * Determine whether the single token is one of the
735			 * reserved message output delimiters otherwise
736			 * translate it as a message identifier.
737			 */
738			if (*token == 0) {
739				if (strcmp(_defn, start) == 0)
740					prtmsgs = 1;
741				else if (strcmp(_defn, end) == 0)
742					prtmsgs = -1;
743				else if (getmesgid(_defn) == 1)
744					return (1);
745				break;
746			}
747
748			/*
749			 * Multiple tokens are translated by taking the first
750			 * token as the message definition, and the rest of the
751			 * line as the message itself.  A message line ending
752			 * with an escape ('\') is expected to be continued on
753			 * the next line.
754			 */
755			if (prtmsgs != -1)
756				prtmsgs = 1;
757			if (fdmsgs && (prtmsgs == 1)) {
758				/*
759				 * For catgets(3c) make sure a message
760				 * identifier has been established (this is
761				 * normally a domain for gettext(3i), but for
762				 * sgsmsg use this could be argued as being
763				 * redundent).  Also make sure that the message
764				 * definitions haven't exceeeded the maximum
765				 * value allowed by gencat(1) before generating
766				 * any message file entries.
767				 */
768				if (cflag == 1) {
769					if (setid == 0) {
770						(void) fprintf(stderr, "file "
771						    "%s: no message identifier "
772						    "has been established\n",
773						    fldesc);
774						return (1);
775					}
776					if (ptr	> NL_MSGMAX) {
777						(void) fprintf(stderr, "file "
778						    "%s: message definition "
779						    "(%d) exceeds allowable "
780						    "limit (NL_MSGMAX)\n",
781						    fldesc, ptr);
782						return (1);
783					}
784				}
785
786				/*
787				 * For catgets(3c) write the definition and the
788				 * message string to the message file.  For
789				 * gettext(3i) write the message string as a
790				 * mesgid - indicate a mesgid has been output
791				 * so that a msgstr can follow.
792				 */
793				if (cflag == 1) {
794					if (fprintf(fdmsgs, "%d\t%s", ptr,
795					    token) < 0) {
796						(void) fprintf(stderr,
797						    Errmsg_wrte, flmsgs,
798						    strerror(errno));
799						return (1);
800					}
801				} else {
802					if (fprintf(fdmsgs, "msgid\t\"") < 0) {
803						(void) fprintf(stderr,
804						    Errmsg_wrte, flmsgs,
805						    strerror(errno));
806						return (1);
807					}
808					msgid = 1;
809				}
810			}
811
812			/*
813			 * The message itself is a quoted string as this makes
814			 * embedding spaces at the start (or the end) of the
815			 * string very easy.
816			 */
817			if (*token != '"') {
818				(void) fprintf(stderr, Errmsg_malt, fldesc,
819				    line);
820				return (1);
821			}
822
823			(void) strcpy(defn, _defn);
824
825			/*
826			 * Write the tag to the lint definitions.
827			 */
828			if (fdlint) {
829				if (fprintf(fdlint, "\n#define\t%s\t",
830				    _defn) < 0) {
831					(void) fprintf(stderr, Errmsg_wrte,
832					    fllint, strerror(errno));
833					return (1);
834				}
835			}
836
837			len = 0;
838
839			/*
840			 * Write each character of the message string to the
841			 * data array.  Translate any escaped characters - use
842			 * the same specially recognized characters as defined
843			 * by gencat(1).
844			 */
845message:
846			if (*token == '"') {
847				if (fdlint &&
848				    (fprintf(fdlint, "%c", *token) < 0)) {
849					(void) fprintf(stderr, Errmsg_wrte,
850					    fllint, strerror(errno));
851					return (1);
852				}
853				token++;
854			}
855			while (*token) {
856				char	_token;
857
858				if ((*token == '\\') && (escape == 0)) {
859					escape = 1;
860					if (fdlint && (*(token + 1) != '\n') &&
861					    fprintf(fdlint, "%c", *token) < 0) {
862						(void) fprintf(stderr,
863						    Errmsg_wrte, fllint,
864						    strerror(errno));
865						return (1);
866					}
867					token++;
868					continue;
869				}
870				if (escape) {
871					if (*token == 'n')
872						_token = '\n';
873					else if (*token == 't')
874						_token = '\t';
875					else if (*token == 'v')
876						_token = '\v';
877					else if (*token == 'b')
878						_token = '\b';
879					else if (*token == 'f')
880						_token = '\f';
881					else if (*token == '\\')
882						_token = '\\';
883					else if (*token == '"')
884						_token = '"';
885					else if (*token == '\n')
886						break;
887					else
888						_token = *token;
889
890					if (fdmsgs && (prtmsgs == 1) &&
891					    (fprintf(fdmsgs, "\\") < 0)) {
892						(void) fprintf(stderr,
893						    Errmsg_wrte, flmsgs,
894						    strerror(errno));
895						return (1);
896					}
897				} else {
898					/*
899					 * If this is the trailing quote then
900					 * thats the last of the message string.
901					 * Eat up any remaining white space and
902					 * unless an escape character is found
903					 * terminate the data string with a 0.
904					 */
905					/* BEGIN CSTYLED */
906					if (*token == '"') {
907					    if (fdlint && (fprintf(fdlint,
908						"%c", *token) < 0)) {
909						(void) fprintf(stderr,
910						    Errmsg_wrte, fllint,
911						    strerror(errno));
912						return (1);
913					    }
914
915					    if (fdmsgs && (prtmsgs == 1) &&
916						(fprintf(fdmsgs, "%c",
917						*token) < 0)) {
918						(void) fprintf(stderr,
919						    Errmsg_wrte, flmsgs,
920						    strerror(errno));
921						return (1);
922					    }
923
924					    while (*++token) {
925						if (*token == '\n')
926							break;
927					    }
928					    _token = '\0';
929					} else
930					    _token = *token;
931					/* END CSTYLED */
932				}
933
934				if (fdmsgs && (prtmsgs == 1) &&
935				    (fprintf(fdmsgs, "%c", *token) < 0)) {
936					(void) fprintf(stderr, Errmsg_wrte,
937					    flmsgs, strerror(errno));
938					return (1);
939				}
940
941				if (fdlint && fprintf(fdlint,
942				    "%c", *token) < 0) {
943					(void) fprintf(stderr, Errmsg_wrte,
944					    fllint, strerror(errno));
945					return (1);
946				}
947
948				if (len >= bufsize) {
949					bufsize += LINE_MAX;
950					if ((token_buffer = realloc(
951					    token_buffer, bufsize)) == 0) {
952						(void) fprintf(stderr,
953						    Errmsg_nmem,
954						    strerror(errno));
955						return (1);
956					}
957				}
958				token_buffer[len] = _token;
959				ptr++, token++, len++;
960				escape = 0;
961
962				if (_token == '\0')
963					break;
964			}
965
966			/*
967			 * After the complete message string has been processed
968			 * (including its continuation beyond one line), create
969			 * a string size definition.
970			 */
971			if (escape == 0) {
972				const char *form = "#define\t%s_SIZE\t%d\n";
973
974				token_buffer[len] = '\0';
975
976				message_append(defn, token_buffer);
977
978				if (fdlint && fprintf(fdlint, form, defn,
979				    (len - 1)) < 0) {
980					(void) fprintf(stderr, Errmsg_wrte,
981					    fllint, strerror(errno));
982					return (1);
983				}
984			}
985			break;
986
987		default:
988			/*
989			 * Empty lines are passed through to the message file.
990			 */
991			while (isspace(*token))
992				token++;
993
994			if (*token == 0) {
995				if (msgid || (fdmsgs && (prtmsgs == 1))) {
996					/*
997					 * If a msgid has been output a msgstr
998					 * must follow before we digest the new
999					 * token.
1000					 */
1001					if (msgid) {
1002						msgid = 0;
1003						str = "msgstr\t\"\"\n\n";
1004					} else
1005						str = "\n";
1006
1007					if (fprintf(fdmsgs, str) < 0) {
1008						(void) fprintf(stderr,
1009						    Errmsg_wrte, flmsgs,
1010						    strerror(errno));
1011						return (1);
1012					}
1013				}
1014				break;
1015			}
1016
1017			/*
1018			 * If an escape is in effect then any tokens are taken
1019			 * to be message continuations.
1020			 */
1021			if (escape) {
1022				escape = 0;
1023				goto message;
1024			}
1025
1026			(void) fprintf(stderr, "file %s: line %d: invalid "
1027			    "input does not start with #, $ or @\n", fldesc,
1028			    line);
1029			return (1);
1030		}
1031		line++;
1032	}
1033
1034	free(token_buffer);
1035
1036	return (0);
1037}
1038
1039int
1040main(int argc, char ** argv)
1041{
1042	opterr = 0;
1043	while ((line = getopt(argc, argv, "cd:h:lm:n:i:v")) != EOF) {
1044		switch (line) {
1045		case 'c':			/* catgets instead of gettext */
1046			cflag = 1;
1047			break;
1048		case 'd':			/* new message data filename */
1049			fldata = optarg;	/*	(msg.c is default) */
1050			break;
1051		case 'h':			/* new message defs filename */
1052			fldefs = optarg;	/*	(msg.h is default) */
1053			break;
1054		case 'i':			/* input message ids from */
1055			flmids = optarg;	/*	from this file */
1056			break;
1057		case 'l':			/* define message data arrays */
1058			lflag = 1;		/*	to be local (static) */
1059			break;
1060		case 'm':			/* generate message database */
1061			flmsgs = optarg;	/*	to this file */
1062			break;
1063		case 'n':			/* new data array and func */
1064			interface = optarg;	/*	name (msg is default) */
1065			break;
1066		case 'v':
1067			vflag = 1;		/* set verbose flag */
1068			break;
1069		case '?':
1070			(void) fprintf(stderr, Errmsg_use, argv[0]);
1071			exit(1);
1072		default:
1073			break;
1074		}
1075	}
1076
1077	/*
1078	 * Validate the we have been given at least one input file.
1079	 */
1080	if ((argc - optind) < 1) {
1081		(void) fprintf(stderr, Errmsg_use);
1082		exit(1);
1083	}
1084
1085	/*
1086	 * Open all the required output files.
1087	 */
1088	if (fldefs) {
1089		if ((fddefs = fopen(fldefs, "w+")) == NULL) {
1090			(void) fprintf(stderr, Errmsg_opne, fldefs,
1091			    strerror(errno));
1092			return (1);
1093		}
1094	}
1095	if (fldata) {
1096		if (fldefs && (strcmp(fldefs, fldata) == 0))
1097			fddata = fddefs;
1098		else if ((fddata = fopen(fldata, "w+")) == NULL) {
1099			(void) fprintf(stderr, Errmsg_opne, fldata,
1100			    strerror(errno));
1101			return (1);
1102		}
1103	}
1104	if (fddefs && fddata) {
1105		(void) sprintf(fllint, "%s.%d", nmlint, (int)getpid());
1106		if ((fdlint = fopen(fllint, "w+")) == NULL) {
1107			(void) fprintf(stderr, Errmsg_opne, fllint,
1108			    strerror(errno));
1109			return (1);
1110		}
1111	}
1112	if (flmsgs) {
1113		if ((fdmsgs = fopen(flmsgs, "w+")) == NULL) {
1114			(void) fprintf(stderr, Errmsg_opne, flmsgs,
1115			    strerror(errno));
1116			return (1);
1117		}
1118	}
1119	if (flmids) {
1120		if ((fdmids = fopen(flmids, "r")) == NULL) {
1121			(void) fprintf(stderr, Errmsg_opne, flmids,
1122			    strerror(errno));
1123			return (1);
1124		}
1125	}
1126
1127
1128	/*
1129	 * Initialize the message definition and message data streams.
1130	 */
1131	if (fddefs) {
1132		if (init_defs())
1133			return (1);
1134	}
1135
1136	/*
1137	 * Read the input message file, and for each line process accordingly.
1138	 */
1139	for (; optind < argc; optind++) {
1140		int	err;
1141
1142		fldesc = argv[optind];
1143
1144		if ((fddesc = fopen(fldesc, "r")) == NULL) {
1145			(void) fprintf(stderr, Errmsg_opne, fldesc,
1146			    strerror(errno));
1147			return (1);
1148		}
1149		err = file();
1150		(void) fclose(fddesc);
1151
1152		if (err != 0)
1153			return (1);
1154	}
1155
1156	/*
1157	 * If a msgid has been output a msgstr must follow before we end the
1158	 * file.
1159	 */
1160	if (msgid) {
1161		msgid = 0;
1162		if (fprintf(fdmsgs, "msgstr\t\"\"\n") < 0) {
1163			(void) fprintf(stderr, Errmsg_wrte, flmsgs,
1164			    strerror(errno));
1165			return (1);
1166		}
1167	}
1168
1169	if (fdmids)
1170		(void) fclose(fdmids);
1171	if (fdmsgs)
1172		(void) fclose(fdmsgs);
1173
1174	if (fddefs) {
1175		if (output_defs())
1176			return (1);
1177	}
1178
1179	/*
1180	 * Finish off any generated data and header file.
1181	 */
1182	if (fldata) {
1183		if (output_data())
1184			return (1);
1185	}
1186	if (fddefs) {
1187		if (fini_defs())
1188			return (1);
1189	}
1190
1191	if (vflag)
1192		dump_stringtab(stp);
1193
1194	/*
1195	 * Close up everything and go home.
1196	 */
1197	if (fddata)
1198		(void) fclose(fddata);
1199	if (fddefs && (fddefs != fddata))
1200		(void) fclose(fddefs);
1201	if (fddefs && fddata) {
1202		(void) fclose(fdlint);
1203		(void) unlink(fllint);
1204	}
1205
1206	if (stp)
1207		st_destroy(stp);
1208
1209	return (0);
1210}
1211