1184054Slulf/*- 2186743Slulf * Copyright (c) 2008-2009, Ulf Lilleengen <lulf@FreeBSD.org> 3184054Slulf * All rights reserved. 4184054Slulf * 5184054Slulf * Redistribution and use in source and binary forms, with or without 6184054Slulf * modification, are permitted provided that the following conditions 7184054Slulf * are met: 8184054Slulf * 1. Redistributions of source code must retain the above copyright 9184054Slulf * notice, this list of conditions and the following disclaimer. 10184054Slulf * 2. Redistributions in binary form must reproduce the above copyright 11184054Slulf * notice, this list of conditions and the following disclaimer in the 12184054Slulf * documentation and/or other materials provided with the distribution. 13184054Slulf * 14184054Slulf * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15184054Slulf * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16184054Slulf * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17184054Slulf * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18184054Slulf * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19184054Slulf * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20184054Slulf * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21184054Slulf * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22184054Slulf * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23184054Slulf * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24184054Slulf * SUCH DAMAGE. 25184054Slulf * 26184054Slulf * $FreeBSD$ 27184054Slulf */ 28184054Slulf 29185134Slulf#include <assert.h> 30185134Slulf#include <stdio.h> 31184054Slulf#include <stdlib.h> 32185134Slulf 33184054Slulf#include "misc.h" 34184054Slulf#include "queue.h" 35185134Slulf#include "rcsfile.h" 36185134Slulf#include "rcsparse.h" 37185134Slulf#include "rcstokenizer.h" 38184054Slulf 39184054Slulf/* 40184054Slulf * This is an RCS-parser using lex for tokenizing and makes sure the RCS syntax 41184054Slulf * is correct as it constructs an RCS file that is used by csup. 42184054Slulf */ 43184054Slulf 44184054Slulfstatic void asserttoken(yyscan_t *, int); 45184054Slulfstatic int parse_admin(struct rcsfile *, yyscan_t *); 46184054Slulfstatic int parse_deltas(struct rcsfile *, yyscan_t *, int); 47184054Slulfstatic int parse_deltatexts(struct rcsfile *, yyscan_t *, int); 48185592Slulfstatic char *duptext(yyscan_t *, int *); 49184054Slulf 50184054Slulfstruct string { 51184054Slulf char *str; 52184054Slulf STAILQ_ENTRY(string) next; 53184054Slulf}; 54184054Slulf 55184054Slulfstatic void 56184054Slulfasserttoken(yyscan_t *sp, int token) 57184054Slulf{ 58184054Slulf int t; 59184054Slulf 60184054Slulf t = token; 61184054Slulf t = rcslex(*sp); 62184054Slulf assert(t == token); 63184054Slulf} 64184054Slulf 65184054Slulfstatic char * 66185592Slulfduptext(yyscan_t *sp, int *arglen) 67184054Slulf{ 68184054Slulf char *tmp, *val; 69184054Slulf int len; 70184054Slulf 71184054Slulf tmp = rcsget_text(*sp); 72184054Slulf len = rcsget_leng(*sp); 73185592Slulf val = xmalloc(len + 1); 74185592Slulf memcpy(val, tmp, len); 75185592Slulf val[len] = '\0'; 76185592Slulf if (arglen != NULL) 77185592Slulf *arglen = len; 78184054Slulf return (val); 79184054Slulf} 80184054Slulf 81184054Slulf/* 82184054Slulf * Start up parser, and use the rcsfile hook to add objects. 83184054Slulf */ 84184054Slulfint 85186700Slulfrcsparse_run(struct rcsfile *rf, FILE *infp, int ro) 86184054Slulf{ 87184054Slulf yyscan_t scanner; 88184054Slulf char *desc; 89184054Slulf int error, tok; 90184054Slulf 91184054Slulf error = 0; 92184054Slulf rcslex_init(&scanner); 93184054Slulf rcsset_in(infp, scanner); 94184054Slulf tok = parse_admin(rf, &scanner); 95184054Slulf tok = parse_deltas(rf, &scanner, tok); 96184054Slulf assert(tok == KEYWORD); 97184054Slulf asserttoken(&scanner, STRING); 98185592Slulf desc = duptext(&scanner, NULL); 99184054Slulf rcsfile_setval(rf, RCSFILE_DESC, desc); 100184054Slulf free(desc); 101184054Slulf tok = rcslex(scanner); 102186700Slulf /* Parse deltatexts if we need to edit. */ 103186700Slulf if (!ro) { 104186700Slulf error = parse_deltatexts(rf, &scanner, tok); 105186700Slulf if (error) 106186700Slulf return (error); 107186700Slulf } 108184054Slulf rcslex_destroy(scanner); 109184054Slulf return (0); 110184054Slulf} 111184054Slulf 112184054Slulf/* 113184054Slulf * Parse the admin part of a RCS file. 114184054Slulf */ 115184054Slulfstatic int 116184054Slulfparse_admin(struct rcsfile *rf, yyscan_t *sp) 117184054Slulf{ 118185134Slulf char *branch, *comment, *expand, *head, *id, *revnum, *tag, *tmp; 119184054Slulf int strict, token; 120184054Slulf 121184054Slulf strict = 0; 122184054Slulf branch = NULL; 123184054Slulf 124184054Slulf /* head {num}; */ 125184054Slulf asserttoken(sp, KEYWORD); 126184054Slulf asserttoken(sp, NUM); 127185592Slulf head = duptext(sp, NULL); 128184054Slulf rcsfile_setval(rf, RCSFILE_HEAD, head); 129184054Slulf free(head); 130184054Slulf asserttoken(sp, SEMIC); 131184054Slulf 132184054Slulf /* { branch {num}; } */ 133184054Slulf token = rcslex(*sp); 134184054Slulf if (token == KEYWORD_TWO) { 135184054Slulf asserttoken(sp, NUM); 136185592Slulf branch = duptext(sp, NULL); 137184054Slulf rcsfile_setval(rf, RCSFILE_BRANCH, branch); 138184054Slulf free(branch); 139184054Slulf asserttoken(sp, SEMIC); 140184054Slulf token = rcslex(*sp); 141184054Slulf } 142184054Slulf 143184054Slulf /* access {id]*; */ 144184054Slulf assert(token == KEYWORD); 145184054Slulf token = rcslex(*sp); 146184054Slulf while (token == ID) { 147185592Slulf id = duptext(sp, NULL); 148184054Slulf rcsfile_addaccess(rf, id); 149184054Slulf free(id); 150184054Slulf token = rcslex(*sp); 151184054Slulf } 152184054Slulf assert(token == SEMIC); 153184054Slulf 154184054Slulf /* symbols {sym : num}*; */ 155184054Slulf asserttoken(sp, KEYWORD); 156184054Slulf token = rcslex(*sp); 157184054Slulf while (token == ID) { 158185592Slulf tag = duptext(sp, NULL); 159184054Slulf asserttoken(sp, COLON); 160184054Slulf asserttoken(sp, NUM); 161185592Slulf revnum = duptext(sp, NULL); 162184054Slulf rcsfile_importtag(rf, tag, revnum); 163184054Slulf free(tag); 164184054Slulf free(revnum); 165184054Slulf token = rcslex(*sp); 166184054Slulf } 167184054Slulf assert(token == SEMIC); 168184054Slulf 169184054Slulf /* locks {id : num}*; */ 170184054Slulf asserttoken(sp, KEYWORD); 171184054Slulf token = rcslex(*sp); 172184054Slulf while (token == ID) { 173185134Slulf /* XXX: locks field is skipped */ 174184054Slulf asserttoken(sp, COLON); 175184054Slulf asserttoken(sp, NUM); 176184054Slulf token = rcslex(*sp); 177184054Slulf } 178184054Slulf assert(token == SEMIC); 179184054Slulf token = rcslex(*sp); 180184054Slulf while (token == KEYWORD) { 181184054Slulf tmp = rcsget_text(*sp); 182184054Slulf 183184054Slulf /* {strict ;} */ 184184054Slulf if (!strcmp(tmp, "strict")) { 185184054Slulf rcsfile_setval(rf, RCSFILE_STRICT, tmp); 186184054Slulf asserttoken(sp, SEMIC); 187184054Slulf /* { comment {string}; } */ 188184054Slulf } else if (!strcmp(tmp, "comment")) { 189184054Slulf token = rcslex(*sp); 190184054Slulf if (token == STRING) { 191185592Slulf comment = duptext(sp, NULL); 192184054Slulf rcsfile_setval(rf, RCSFILE_COMMENT, comment); 193184054Slulf free(comment); 194184054Slulf } 195184054Slulf asserttoken(sp, SEMIC); 196184054Slulf /* { expand {string}; } */ 197184054Slulf } else if (!strcmp(tmp, "expand")) { 198184054Slulf token = rcslex(*sp); 199184054Slulf if (token == STRING) { 200185592Slulf expand = duptext(sp, NULL); 201184054Slulf rcsfile_setval(rf, RCSFILE_EXPAND, expand); 202184054Slulf free(expand); 203184054Slulf } 204184054Slulf asserttoken(sp, SEMIC); 205184054Slulf } 206184054Slulf /* {newphrase }* */ 207184054Slulf token = rcslex(*sp); 208184054Slulf while (token == ID) { 209184054Slulf token = rcslex(*sp); 210185134Slulf /* XXX: newphrases ignored */ 211184054Slulf while (token == ID || token == NUM || token == STRING || 212184054Slulf token == COLON) { 213184054Slulf token = rcslex(*sp); 214184054Slulf } 215184054Slulf asserttoken(sp, SEMIC); 216184054Slulf token = rcslex(*sp); 217184054Slulf } 218184054Slulf } 219184054Slulf return (token); 220184054Slulf} 221184054Slulf 222184054Slulf/* 223184054Slulf * Parse RCS deltas. 224184054Slulf */ 225184054Slulfstatic int 226184054Slulfparse_deltas(struct rcsfile *rf, yyscan_t *sp, int token) 227184054Slulf{ 228184054Slulf STAILQ_HEAD(, string) branchlist; 229184054Slulf char *revnum, *revdate, *author, *state, *next; 230184054Slulf 231184054Slulf /* In case we don't have deltas. */ 232184054Slulf if (token != NUM) 233184054Slulf return (token); 234184054Slulf do { 235184054Slulf next = NULL; 236184054Slulf state = NULL; 237184054Slulf 238184054Slulf /* num */ 239184054Slulf assert(token == NUM); 240185592Slulf revnum = duptext(sp, NULL); 241184054Slulf /* date num; */ 242184054Slulf asserttoken(sp, KEYWORD); 243184054Slulf asserttoken(sp, NUM); 244185592Slulf revdate = duptext(sp, NULL); 245184054Slulf asserttoken(sp, SEMIC); 246184054Slulf /* author id; */ 247184054Slulf asserttoken(sp, KEYWORD); 248184054Slulf asserttoken(sp, ID); 249185592Slulf author = duptext(sp, NULL); 250184054Slulf asserttoken(sp, SEMIC); 251184054Slulf /* state {id}; */ 252184054Slulf asserttoken(sp, KEYWORD); 253184054Slulf token = rcslex(*sp); 254184054Slulf if (token == ID) { 255185592Slulf state = duptext(sp, NULL); 256184054Slulf token = rcslex(*sp); 257184054Slulf } 258184054Slulf assert(token == SEMIC); 259184054Slulf /* branches {num}*; */ 260184054Slulf asserttoken(sp, KEYWORD); 261184054Slulf token = rcslex(*sp); 262184054Slulf STAILQ_INIT(&branchlist); 263184054Slulf while (token == NUM) 264184054Slulf token = rcslex(*sp); 265184054Slulf assert(token == SEMIC); 266184054Slulf /* next {num}; */ 267184054Slulf asserttoken(sp, KEYWORD); 268184054Slulf token = rcslex(*sp); 269184054Slulf if (token == NUM) { 270185592Slulf next = duptext(sp, NULL); 271184054Slulf token = rcslex(*sp); 272184054Slulf } 273184054Slulf assert(token == SEMIC); 274184054Slulf /* {newphrase }* */ 275184054Slulf token = rcslex(*sp); 276184054Slulf while (token == ID) { 277184054Slulf token = rcslex(*sp); 278185134Slulf /* XXX: newphrases ignored. */ 279184054Slulf while (token == ID || token == NUM || token == STRING || 280184054Slulf token == COLON) { 281184054Slulf token = rcslex(*sp); 282184054Slulf } 283184054Slulf asserttoken(sp, SEMIC); 284184054Slulf token = rcslex(*sp); 285184054Slulf } 286184054Slulf rcsfile_importdelta(rf, revnum, revdate, author, state, next); 287184054Slulf free(revnum); 288184054Slulf free(revdate); 289184054Slulf free(author); 290184054Slulf if (state != NULL) 291184054Slulf free(state); 292184054Slulf if (next != NULL) 293184054Slulf free(next); 294184054Slulf } while (token == NUM); 295184054Slulf 296184054Slulf return (token); 297184054Slulf} 298184054Slulf 299184054Slulf/* 300184054Slulf * Parse RCS deltatexts. 301184054Slulf */ 302184054Slulfstatic int 303184054Slulfparse_deltatexts(struct rcsfile *rf, yyscan_t *sp, int token) 304184054Slulf{ 305184054Slulf struct delta *d; 306185134Slulf char *log, *revnum, *text; 307185592Slulf int error, len; 308184054Slulf 309184054Slulf error = 0; 310184054Slulf /* In case we don't have deltatexts. */ 311185134Slulf if (token != NUM) 312190422Slulf return (-1); 313184054Slulf do { 314184054Slulf /* num */ 315184054Slulf assert(token == NUM); 316185592Slulf revnum = duptext(sp, NULL); 317184054Slulf /* Get delta we're adding text to. */ 318184054Slulf d = rcsfile_getdelta(rf, revnum); 319184054Slulf free(revnum); 320184054Slulf 321213300Sjhb /* 322213300Sjhb * XXX: The RCS file is corrupt, but lie and say it is ok. 323213300Sjhb * If it is actually broken, then the MD5 mismatch will 324213300Sjhb * trigger a fixup. 325213300Sjhb */ 326213300Sjhb if (d == NULL) 327213300Sjhb return (0); 328213300Sjhb 329184054Slulf /* log string */ 330184054Slulf asserttoken(sp, KEYWORD); 331184054Slulf asserttoken(sp, STRING); 332185592Slulf log = duptext(sp, &len); 333185592Slulf error = rcsdelta_addlog(d, log, len); 334184054Slulf free(log); 335184054Slulf if (error) 336184054Slulf return (-1); 337184054Slulf /* { newphrase }* */ 338184054Slulf token = rcslex(*sp); 339184054Slulf while (token == ID) { 340184054Slulf token = rcslex(*sp); 341185134Slulf /* XXX: newphrases ignored. */ 342184054Slulf while (token == ID || token == NUM || token == STRING || 343184054Slulf token == COLON) { 344184054Slulf token = rcslex(*sp); 345184054Slulf } 346184054Slulf asserttoken(sp, SEMIC); 347184054Slulf token = rcslex(*sp); 348184054Slulf } 349184054Slulf /* text string */ 350184054Slulf assert(token == KEYWORD); 351184054Slulf asserttoken(sp, STRING); 352185592Slulf text = duptext(sp, &len); 353185592Slulf error = rcsdelta_addtext(d, text, len); 354185134Slulf /* 355184054Slulf * If this happens, something is wrong with the RCS file, and it 356184054Slulf * should be resent. 357184054Slulf */ 358184054Slulf free(text); 359184054Slulf if (error) 360184054Slulf return (-1); 361184054Slulf token = rcslex(*sp); 362184054Slulf } while (token == NUM); 363184054Slulf 364184054Slulf return (0); 365184054Slulf} 366