1184054Slulf/*-
2186743Slulf * Copyright (c) 2008-2009, Ulf Lilleengen <lulf@FreeBSD.org>
3184054Slulf * All rights reserved.
4184054Slulf *
5184054Slulf * Redistribution and use in source and binary forms, with or without
6184054Slulf * modification, are permitted provided that the following conditions
7184054Slulf * are met:
8184054Slulf * 1. Redistributions of source code must retain the above copyright
9184054Slulf *    notice, this list of conditions and the following disclaimer.
10184054Slulf * 2. Redistributions in binary form must reproduce the above copyright
11184054Slulf *    notice, this list of conditions and the following disclaimer in the
12184054Slulf *    documentation and/or other materials provided with the distribution.
13184054Slulf *
14184054Slulf * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15184054Slulf * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16184054Slulf * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17184054Slulf * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18184054Slulf * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19184054Slulf * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20184054Slulf * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21184054Slulf * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22184054Slulf * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23184054Slulf * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24184054Slulf * SUCH DAMAGE.
25184054Slulf *
26184054Slulf * $FreeBSD$
27184054Slulf */
28184054Slulf
29185134Slulf#include <assert.h>
30185134Slulf#include <stdio.h>
31184054Slulf#include <stdlib.h>
32185134Slulf
33184054Slulf#include "misc.h"
34184054Slulf#include "queue.h"
35185134Slulf#include "rcsfile.h"
36185134Slulf#include "rcsparse.h"
37185134Slulf#include "rcstokenizer.h"
38184054Slulf
39184054Slulf/*
40184054Slulf * This is an RCS-parser using lex for tokenizing and makes sure the RCS syntax
41184054Slulf * is correct as it constructs an RCS file that is used by csup.
42184054Slulf */
43184054Slulf
44184054Slulfstatic void	asserttoken(yyscan_t *, int);
45184054Slulfstatic int	parse_admin(struct rcsfile *, yyscan_t *);
46184054Slulfstatic int	parse_deltas(struct rcsfile *, yyscan_t *, int);
47184054Slulfstatic int	parse_deltatexts(struct rcsfile *, yyscan_t *, int);
48185592Slulfstatic char	*duptext(yyscan_t *, int *);
49184054Slulf
50184054Slulfstruct string {
51184054Slulf	char *str;
52184054Slulf	STAILQ_ENTRY(string) next;
53184054Slulf};
54184054Slulf
55184054Slulfstatic void
56184054Slulfasserttoken(yyscan_t *sp, int token)
57184054Slulf{
58184054Slulf	int t;
59184054Slulf
60184054Slulf	t = token;
61184054Slulf	t = rcslex(*sp);
62184054Slulf	assert(t == token);
63184054Slulf}
64184054Slulf
65184054Slulfstatic char *
66185592Slulfduptext(yyscan_t *sp, int *arglen)
67184054Slulf{
68184054Slulf	char *tmp, *val;
69184054Slulf	int len;
70184054Slulf
71184054Slulf	tmp = rcsget_text(*sp);
72184054Slulf	len = rcsget_leng(*sp);
73185592Slulf	val = xmalloc(len + 1);
74185592Slulf	memcpy(val, tmp, len);
75185592Slulf	val[len] = '\0';
76185592Slulf	if (arglen != NULL)
77185592Slulf		*arglen = len;
78184054Slulf	return (val);
79184054Slulf}
80184054Slulf
81184054Slulf/*
82184054Slulf * Start up parser, and use the rcsfile hook to add objects.
83184054Slulf */
84184054Slulfint
85186700Slulfrcsparse_run(struct rcsfile *rf, FILE *infp, int ro)
86184054Slulf{
87184054Slulf	yyscan_t scanner;
88184054Slulf	char *desc;
89184054Slulf	int error, tok;
90184054Slulf
91184054Slulf	error = 0;
92184054Slulf	rcslex_init(&scanner);
93184054Slulf	rcsset_in(infp, scanner);
94184054Slulf	tok = parse_admin(rf, &scanner);
95184054Slulf	tok = parse_deltas(rf, &scanner, tok);
96184054Slulf	assert(tok == KEYWORD);
97184054Slulf	asserttoken(&scanner, STRING);
98185592Slulf	desc = duptext(&scanner, NULL);
99184054Slulf	rcsfile_setval(rf, RCSFILE_DESC, desc);
100184054Slulf	free(desc);
101184054Slulf	tok = rcslex(scanner);
102186700Slulf	/* Parse deltatexts if we need to edit. */
103186700Slulf	if (!ro) {
104186700Slulf		error = parse_deltatexts(rf, &scanner, tok);
105186700Slulf		if (error)
106186700Slulf			return (error);
107186700Slulf	}
108184054Slulf	rcslex_destroy(scanner);
109184054Slulf	return (0);
110184054Slulf}
111184054Slulf
112184054Slulf/*
113184054Slulf * Parse the admin part of a RCS file.
114184054Slulf */
115184054Slulfstatic int
116184054Slulfparse_admin(struct rcsfile *rf, yyscan_t *sp)
117184054Slulf{
118185134Slulf	char *branch, *comment, *expand, *head, *id, *revnum, *tag, *tmp;
119184054Slulf	int strict, token;
120184054Slulf
121184054Slulf	strict = 0;
122184054Slulf	branch = NULL;
123184054Slulf
124184054Slulf	/* head {num}; */
125184054Slulf	asserttoken(sp, KEYWORD);
126184054Slulf	asserttoken(sp, NUM);
127185592Slulf	head = duptext(sp, NULL);
128184054Slulf	rcsfile_setval(rf, RCSFILE_HEAD, head);
129184054Slulf	free(head);
130184054Slulf	asserttoken(sp, SEMIC);
131184054Slulf
132184054Slulf	/* { branch {num}; } */
133184054Slulf	token = rcslex(*sp);
134184054Slulf	if (token == KEYWORD_TWO) {
135184054Slulf		asserttoken(sp, NUM);
136185592Slulf		branch = duptext(sp, NULL);
137184054Slulf		rcsfile_setval(rf, RCSFILE_BRANCH, branch);
138184054Slulf		free(branch);
139184054Slulf		asserttoken(sp, SEMIC);
140184054Slulf		token = rcslex(*sp);
141184054Slulf	}
142184054Slulf
143184054Slulf	/* access {id]*; */
144184054Slulf	assert(token == KEYWORD);
145184054Slulf	token = rcslex(*sp);
146184054Slulf	while (token == ID) {
147185592Slulf		id = duptext(sp, NULL);
148184054Slulf		rcsfile_addaccess(rf, id);
149184054Slulf		free(id);
150184054Slulf		token = rcslex(*sp);
151184054Slulf	}
152184054Slulf	assert(token == SEMIC);
153184054Slulf
154184054Slulf	/* symbols {sym : num}*; */
155184054Slulf	asserttoken(sp, KEYWORD);
156184054Slulf	token = rcslex(*sp);
157184054Slulf	while (token == ID) {
158185592Slulf		tag = duptext(sp, NULL);
159184054Slulf		asserttoken(sp, COLON);
160184054Slulf		asserttoken(sp, NUM);
161185592Slulf		revnum = duptext(sp, NULL);
162184054Slulf		rcsfile_importtag(rf, tag, revnum);
163184054Slulf		free(tag);
164184054Slulf		free(revnum);
165184054Slulf		token = rcslex(*sp);
166184054Slulf	}
167184054Slulf	assert(token == SEMIC);
168184054Slulf
169184054Slulf	/* locks {id : num}*; */
170184054Slulf	asserttoken(sp, KEYWORD);
171184054Slulf	token = rcslex(*sp);
172184054Slulf	while (token == ID) {
173185134Slulf		/* XXX: locks field is skipped */
174184054Slulf		asserttoken(sp, COLON);
175184054Slulf		asserttoken(sp, NUM);
176184054Slulf		token = rcslex(*sp);
177184054Slulf	}
178184054Slulf	assert(token == SEMIC);
179184054Slulf	token = rcslex(*sp);
180184054Slulf	while (token == KEYWORD) {
181184054Slulf		tmp = rcsget_text(*sp);
182184054Slulf
183184054Slulf		/* {strict  ;} */
184184054Slulf		if (!strcmp(tmp, "strict")) {
185184054Slulf			rcsfile_setval(rf, RCSFILE_STRICT, tmp);
186184054Slulf			asserttoken(sp, SEMIC);
187184054Slulf		/* { comment {string}; } */
188184054Slulf		} else if (!strcmp(tmp, "comment")) {
189184054Slulf			token = rcslex(*sp);
190184054Slulf			if (token == STRING) {
191185592Slulf				comment = duptext(sp, NULL);
192184054Slulf				rcsfile_setval(rf, RCSFILE_COMMENT, comment);
193184054Slulf				free(comment);
194184054Slulf			}
195184054Slulf			asserttoken(sp, SEMIC);
196184054Slulf		/* { expand {string}; } */
197184054Slulf		} else if (!strcmp(tmp, "expand")) {
198184054Slulf			token = rcslex(*sp);
199184054Slulf			if (token == STRING) {
200185592Slulf				expand = duptext(sp, NULL);
201184054Slulf				rcsfile_setval(rf, RCSFILE_EXPAND, expand);
202184054Slulf				free(expand);
203184054Slulf			}
204184054Slulf			asserttoken(sp, SEMIC);
205184054Slulf		}
206184054Slulf		/* {newphrase }* */
207184054Slulf		token = rcslex(*sp);
208184054Slulf		while (token == ID) {
209184054Slulf			token = rcslex(*sp);
210185134Slulf			/* XXX: newphrases ignored */
211184054Slulf			while (token == ID || token == NUM || token == STRING ||
212184054Slulf			    token == COLON) {
213184054Slulf				token = rcslex(*sp);
214184054Slulf			}
215184054Slulf			asserttoken(sp, SEMIC);
216184054Slulf			token = rcslex(*sp);
217184054Slulf		}
218184054Slulf	}
219184054Slulf	return (token);
220184054Slulf}
221184054Slulf
222184054Slulf/*
223184054Slulf * Parse RCS deltas.
224184054Slulf */
225184054Slulfstatic int
226184054Slulfparse_deltas(struct rcsfile *rf, yyscan_t *sp, int token)
227184054Slulf{
228184054Slulf	STAILQ_HEAD(, string) branchlist;
229184054Slulf	char *revnum, *revdate, *author, *state, *next;
230184054Slulf
231184054Slulf	/* In case we don't have deltas. */
232184054Slulf	if (token != NUM)
233184054Slulf		return (token);
234184054Slulf	do {
235184054Slulf		next = NULL;
236184054Slulf		state = NULL;
237184054Slulf
238184054Slulf		/* num */
239184054Slulf		assert(token == NUM);
240185592Slulf		revnum = duptext(sp, NULL);
241184054Slulf		/* date num; */
242184054Slulf		asserttoken(sp, KEYWORD);
243184054Slulf		asserttoken(sp, NUM);
244185592Slulf		revdate = duptext(sp, NULL);
245184054Slulf		asserttoken(sp, SEMIC);
246184054Slulf		/* author id; */
247184054Slulf		asserttoken(sp, KEYWORD);
248184054Slulf		asserttoken(sp, ID);
249185592Slulf		author = duptext(sp, NULL);
250184054Slulf		asserttoken(sp, SEMIC);
251184054Slulf		/* state {id}; */
252184054Slulf		asserttoken(sp, KEYWORD);
253184054Slulf		token = rcslex(*sp);
254184054Slulf		if (token == ID) {
255185592Slulf			state = duptext(sp, NULL);
256184054Slulf			token = rcslex(*sp);
257184054Slulf		}
258184054Slulf		assert(token == SEMIC);
259184054Slulf		/* branches {num}*; */
260184054Slulf		asserttoken(sp, KEYWORD);
261184054Slulf		token = rcslex(*sp);
262184054Slulf		STAILQ_INIT(&branchlist);
263184054Slulf		while (token == NUM)
264184054Slulf			token = rcslex(*sp);
265184054Slulf		assert(token == SEMIC);
266184054Slulf		/* next {num}; */
267184054Slulf		asserttoken(sp, KEYWORD);
268184054Slulf		token = rcslex(*sp);
269184054Slulf		if (token == NUM) {
270185592Slulf			next = duptext(sp, NULL);
271184054Slulf			token = rcslex(*sp);
272184054Slulf		}
273184054Slulf		assert(token == SEMIC);
274184054Slulf		/* {newphrase }* */
275184054Slulf		token = rcslex(*sp);
276184054Slulf		while (token == ID) {
277184054Slulf			token = rcslex(*sp);
278185134Slulf			/* XXX: newphrases ignored. */
279184054Slulf			while (token == ID || token == NUM || token == STRING ||
280184054Slulf			    token == COLON) {
281184054Slulf				token = rcslex(*sp);
282184054Slulf			}
283184054Slulf			asserttoken(sp, SEMIC);
284184054Slulf			token = rcslex(*sp);
285184054Slulf		}
286184054Slulf		rcsfile_importdelta(rf, revnum, revdate, author, state, next);
287184054Slulf		free(revnum);
288184054Slulf		free(revdate);
289184054Slulf		free(author);
290184054Slulf		if (state != NULL)
291184054Slulf			free(state);
292184054Slulf		if (next != NULL)
293184054Slulf			free(next);
294184054Slulf	} while (token == NUM);
295184054Slulf
296184054Slulf	return (token);
297184054Slulf}
298184054Slulf
299184054Slulf/*
300184054Slulf * Parse RCS deltatexts.
301184054Slulf */
302184054Slulfstatic int
303184054Slulfparse_deltatexts(struct rcsfile *rf, yyscan_t *sp, int token)
304184054Slulf{
305184054Slulf	struct delta *d;
306185134Slulf	char *log, *revnum, *text;
307185592Slulf	int error, len;
308184054Slulf
309184054Slulf	error = 0;
310184054Slulf	/* In case we don't have deltatexts. */
311185134Slulf	if (token != NUM)
312190422Slulf		return (-1);
313184054Slulf	do {
314184054Slulf		/* num */
315184054Slulf		assert(token == NUM);
316185592Slulf		revnum = duptext(sp, NULL);
317184054Slulf		/* Get delta we're adding text to. */
318184054Slulf		d = rcsfile_getdelta(rf, revnum);
319184054Slulf		free(revnum);
320184054Slulf
321213300Sjhb		/*
322213300Sjhb		 * XXX: The RCS file is corrupt, but lie and say it is ok.
323213300Sjhb		 * If it is actually broken, then the MD5 mismatch will
324213300Sjhb		 * trigger a fixup.
325213300Sjhb		 */
326213300Sjhb		if (d == NULL)
327213300Sjhb			return (0);
328213300Sjhb
329184054Slulf		/* log string */
330184054Slulf		asserttoken(sp, KEYWORD);
331184054Slulf		asserttoken(sp, STRING);
332185592Slulf		log = duptext(sp, &len);
333185592Slulf		error = rcsdelta_addlog(d, log, len);
334184054Slulf		free(log);
335184054Slulf		if (error)
336184054Slulf			return (-1);
337184054Slulf		/* { newphrase }* */
338184054Slulf		token = rcslex(*sp);
339184054Slulf		while (token == ID) {
340184054Slulf			token = rcslex(*sp);
341185134Slulf			/* XXX: newphrases ignored. */
342184054Slulf			while (token == ID || token == NUM || token == STRING ||
343184054Slulf			    token == COLON) {
344184054Slulf				token = rcslex(*sp);
345184054Slulf			}
346184054Slulf			asserttoken(sp, SEMIC);
347184054Slulf			token = rcslex(*sp);
348184054Slulf		}
349184054Slulf		/* text string */
350184054Slulf		assert(token == KEYWORD);
351184054Slulf		asserttoken(sp, STRING);
352185592Slulf		text = duptext(sp, &len);
353185592Slulf		error = rcsdelta_addtext(d, text, len);
354185134Slulf		/*
355184054Slulf		 * If this happens, something is wrong with the RCS file, and it
356184054Slulf		 * should be resent.
357184054Slulf		 */
358184054Slulf		free(text);
359184054Slulf		if (error)
360184054Slulf			return (-1);
361184054Slulf		token = rcslex(*sp);
362184054Slulf	} while (token == NUM);
363184054Slulf
364184054Slulf	return (0);
365184054Slulf}
366