1/*	$NetBSD: for.c,v 1.179 2024/04/01 12:33:27 rillig Exp $	*/
2
3/*
4 * Copyright (c) 1992, The Regents of the University of California.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32/*
33 * Handling of .for/.endfor loops in a makefile.
34 *
35 * For loops have the form:
36 *
37 *	.for <varname...> in <value...>
38 *	# the body
39 *	.endfor
40 *
41 * When a .for line is parsed, the following lines are copied to the body of
42 * the .for loop, until the corresponding .endfor line is reached.  In this
43 * phase, the body is not yet evaluated.  This also applies to any nested
44 * .for loops.
45 *
46 * After reaching the .endfor, the values from the .for line are grouped
47 * according to the number of variables.  For each such group, the unexpanded
48 * body is scanned for expressions, and those that match the
49 * variable names are replaced with expressions of the form ${:U...}.  After
50 * that, the body is treated like a file from an .include directive.
51 *
52 * Interface:
53 *	For_Eval	Evaluate the loop in the passed line.
54 *
55 *	For_Run		Run accumulated loop
56 */
57
58#include "make.h"
59
60/*	"@(#)for.c	8.1 (Berkeley) 6/6/93"	*/
61MAKE_RCSID("$NetBSD: for.c,v 1.179 2024/04/01 12:33:27 rillig Exp $");
62
63
64typedef struct ForLoop {
65	Vector /* of 'char *' */ vars; /* Iteration variables */
66	SubstringWords items;	/* Substitution items */
67	Buffer body;		/* Unexpanded body of the loop */
68	unsigned int nextItem;	/* Where to continue iterating */
69} ForLoop;
70
71
72static ForLoop *accumFor;	/* Loop being accumulated */
73
74
75/* See LK_FOR_BODY. */
76static void
77skip_whitespace_or_line_continuation(const char **pp)
78{
79	const char *p = *pp;
80	for (;;) {
81		if (ch_isspace(*p))
82			p++;
83		else if (p[0] == '\\' && p[1] == '\n')
84			p += 2;
85		else
86			break;
87	}
88	*pp = p;
89}
90
91static ForLoop *
92ForLoop_New(void)
93{
94	ForLoop *f = bmake_malloc(sizeof *f);
95
96	Vector_Init(&f->vars, sizeof(char *));
97	SubstringWords_Init(&f->items);
98	Buf_Init(&f->body);
99	f->nextItem = 0;
100
101	return f;
102}
103
104void
105ForLoop_Free(ForLoop *f)
106{
107	while (f->vars.len > 0)
108		free(*(char **)Vector_Pop(&f->vars));
109	Vector_Done(&f->vars);
110
111	SubstringWords_Free(f->items);
112	Buf_Done(&f->body);
113
114	free(f);
115}
116
117char *
118ForLoop_Details(const ForLoop *f)
119{
120	size_t i, n;
121	const char **vars;
122	const Substring *items;
123	Buffer buf;
124
125	n = f->vars.len;
126	vars = f->vars.items;
127	assert(f->nextItem >= n);
128	items = f->items.words + f->nextItem - n;
129
130	Buf_Init(&buf);
131	for (i = 0; i < n; i++) {
132		if (i > 0)
133			Buf_AddStr(&buf, ", ");
134		Buf_AddStr(&buf, vars[i]);
135		Buf_AddStr(&buf, " = ");
136		Buf_AddRange(&buf, items[i].start, items[i].end);
137	}
138	return Buf_DoneData(&buf);
139}
140
141static bool
142IsValidInVarname(char c)
143{
144	return c != '$' && c != ':' && c != '\\' &&
145	    c != '(' && c != '{' && c != ')' && c != '}';
146}
147
148static void
149ForLoop_ParseVarnames(ForLoop *f, const char **pp)
150{
151	const char *p = *pp;
152
153	for (;;) {
154		size_t len;
155
156		cpp_skip_whitespace(&p);
157		if (*p == '\0') {
158			Parse_Error(PARSE_FATAL, "missing `in' in for");
159			f->vars.len = 0;
160			return;
161		}
162
163		for (len = 0; p[len] != '\0' && !ch_isspace(p[len]); len++) {
164			if (!IsValidInVarname(p[len])) {
165				Parse_Error(PARSE_FATAL,
166				    "invalid character '%c' "
167				    "in .for loop variable name",
168				    p[len]);
169				f->vars.len = 0;
170				return;
171			}
172		}
173
174		if (len == 2 && p[0] == 'i' && p[1] == 'n') {
175			p += 2;
176			break;
177		}
178
179		*(char **)Vector_Push(&f->vars) = bmake_strldup(p, len);
180		p += len;
181	}
182
183	if (f->vars.len == 0) {
184		Parse_Error(PARSE_FATAL, "no iteration variables in for");
185		return;
186	}
187
188	*pp = p;
189}
190
191static bool
192ForLoop_ParseItems(ForLoop *f, const char *p)
193{
194	char *items;
195
196	cpp_skip_whitespace(&p);
197
198	items = Var_Subst(p, SCOPE_GLOBAL, VARE_WANTRES);
199	/* TODO: handle errors */
200
201	f->items = Substring_Words(items, false);
202	free(items);
203
204	if (f->items.len == 1 && Substring_IsEmpty(f->items.words[0]))
205		f->items.len = 0;	/* .for var in ${:U} */
206
207	if (f->items.len % f->vars.len != 0) {
208		Parse_Error(PARSE_FATAL,
209		    "Wrong number of words (%u) in .for "
210		    "substitution list with %u variables",
211		    (unsigned)f->items.len, (unsigned)f->vars.len);
212		return false;
213	}
214
215	return true;
216}
217
218static bool
219IsFor(const char *p)
220{
221	return p[0] == 'f' && p[1] == 'o' && p[2] == 'r' && ch_isspace(p[3]);
222}
223
224static bool
225IsEndfor(const char *p)
226{
227	return p[0] == 'e' && strncmp(p, "endfor", 6) == 0 &&
228	       (p[6] == '\0' || ch_isspace(p[6]));
229}
230
231/*
232 * Evaluate the for loop in the passed line. The line looks like this:
233 *	.for <varname...> in <value...>
234 *
235 * Results:
236 *	0	not a .for directive
237 *	1	found a .for directive
238 *	-1	erroneous .for directive
239 */
240int
241For_Eval(const char *line)
242{
243	const char *p;
244	ForLoop *f;
245
246	p = line + 1;		/* skip the '.' */
247	skip_whitespace_or_line_continuation(&p);
248
249	if (IsFor(p)) {
250		p += 3;
251
252		f = ForLoop_New();
253		ForLoop_ParseVarnames(f, &p);
254		if (f->vars.len > 0 && !ForLoop_ParseItems(f, p))
255			f->items.len = 0;	/* don't iterate */
256
257		accumFor = f;
258		return 1;
259	} else if (IsEndfor(p)) {
260		Parse_Error(PARSE_FATAL, "for-less endfor");
261		return -1;
262	} else
263		return 0;
264}
265
266/*
267 * Add another line to the .for loop that is being built up.
268 * Returns false when the matching .endfor is reached.
269 */
270bool
271For_Accum(const char *line, int *forLevel)
272{
273	const char *p = line;
274
275	if (*p == '.') {
276		p++;
277		skip_whitespace_or_line_continuation(&p);
278
279		if (IsEndfor(p)) {
280			DEBUG1(FOR, "For: end for %d\n", *forLevel);
281			if (--*forLevel == 0)
282				return false;
283		} else if (IsFor(p)) {
284			(*forLevel)++;
285			DEBUG1(FOR, "For: new loop %d\n", *forLevel);
286		}
287	}
288
289	Buf_AddStr(&accumFor->body, line);
290	Buf_AddByte(&accumFor->body, '\n');
291	return true;
292}
293
294/*
295 * When the body of a '.for i' loop is prepared for an iteration, each
296 * occurrence of $i in the body is replaced with ${:U...}, inserting the
297 * value of the item.  If this item contains a '$', it may be the start of an
298 * expression.  This expression is copied verbatim, its length is
299 * determined here, in a rather naive way, ignoring escape characters and
300 * funny delimiters in modifiers like ':S}from}to}'.
301 */
302static size_t
303ExprLen(const char *s, const char *e)
304{
305	char expr_open, expr_close;
306	int depth;
307	const char *p;
308
309	if (s == e)
310		return 0;	/* just escape the '$' */
311
312	expr_open = s[0];
313	if (expr_open == '(')
314		expr_close = ')';
315	else if (expr_open == '{')
316		expr_close = '}';
317	else
318		return 1;	/* Single char variable */
319
320	depth = 1;
321	for (p = s + 1; p != e; p++) {
322		if (*p == expr_open)
323			depth++;
324		else if (*p == expr_close && --depth == 0)
325			return (size_t)(p + 1 - s);
326	}
327
328	/* Expression end not found, escape the $ */
329	return 0;
330}
331
332/*
333 * The .for loop substitutes the items as ${:U<value>...}, which means
334 * that characters that break this syntax must be backslash-escaped.
335 */
336static bool
337NeedsEscapes(Substring value, char endc)
338{
339	const char *p;
340
341	for (p = value.start; p != value.end; p++) {
342		if (*p == ':' || *p == '$' || *p == '\\' || *p == endc ||
343		    *p == '\n')
344			return true;
345	}
346	return false;
347}
348
349/*
350 * While expanding the body of a .for loop, write the item as a ${:U...}
351 * expression, escaping characters as needed.  The result is later unescaped
352 * by ApplyModifier_Defined.
353 */
354static void
355AddEscaped(Buffer *cmds, Substring item, char endc)
356{
357	const char *p;
358	char ch;
359
360	if (!NeedsEscapes(item, endc)) {
361		Buf_AddRange(cmds, item.start, item.end);
362		return;
363	}
364
365	for (p = item.start; p != item.end;) {
366		ch = *p;
367		if (ch == '$') {
368			size_t len = ExprLen(p + 1, item.end);
369			if (len != 0) {
370				/*
371				 * XXX: Should a '\' be added here?
372				 * See directive-for-escape.mk, ExprLen.
373				 */
374				Buf_AddBytes(cmds, p, 1 + len);
375				p += 1 + len;
376				continue;
377			}
378			Buf_AddByte(cmds, '\\');
379		} else if (ch == ':' || ch == '\\' || ch == endc)
380			Buf_AddByte(cmds, '\\');
381		else if (ch == '\n') {
382			Parse_Error(PARSE_FATAL, "newline in .for value");
383			ch = ' ';	/* prevent newline injection */
384		}
385		Buf_AddByte(cmds, ch);
386		p++;
387	}
388}
389
390/*
391 * While expanding the body of a .for loop, replace the variable name of an
392 * expression like ${i} or ${i:...} or $(i) or $(i:...) with ":Uvalue".
393 */
394static void
395ForLoop_SubstVarLong(ForLoop *f, unsigned int firstItem, Buffer *body,
396		     const char **pp, char endc, const char **inout_mark)
397{
398	size_t i;
399	const char *start = *pp;
400	const char **varnames = Vector_Get(&f->vars, 0);
401
402	for (i = 0; i < f->vars.len; i++) {
403		const char *p = start;
404
405		if (!cpp_skip_string(&p, varnames[i]))
406			continue;
407		/* XXX: why test for backslash here? */
408		if (*p != ':' && *p != endc && *p != '\\')
409			continue;
410
411		/*
412		 * Found a variable match.  Skip over the variable name and
413		 * instead add ':U<value>' to the current body.
414		 */
415		Buf_AddRange(body, *inout_mark, start);
416		Buf_AddStr(body, ":U");
417		AddEscaped(body, f->items.words[firstItem + i], endc);
418
419		*inout_mark = p;
420		*pp = p;
421		return;
422	}
423}
424
425/*
426 * While expanding the body of a .for loop, replace single-character
427 * expressions like $i with their ${:U...} expansion.
428 */
429static void
430ForLoop_SubstVarShort(ForLoop *f, unsigned int firstItem, Buffer *body,
431		      const char *p, const char **inout_mark)
432{
433	char ch = *p;
434	const char **vars;
435	size_t i;
436
437	/* Skip $$ and stupid ones. */
438	if (ch == '}' || ch == ')' || ch == ':' || ch == '$')
439		return;
440
441	vars = Vector_Get(&f->vars, 0);
442	for (i = 0; i < f->vars.len; i++) {
443		const char *varname = vars[i];
444		if (varname[0] == ch && varname[1] == '\0')
445			goto found;
446	}
447	return;
448
449found:
450	Buf_AddRange(body, *inout_mark, p);
451	*inout_mark = p + 1;
452
453	/* Replace $<ch> with ${:U<value>} */
454	Buf_AddStr(body, "{:U");
455	AddEscaped(body, f->items.words[firstItem + i], '}');
456	Buf_AddByte(body, '}');
457}
458
459/*
460 * Compute the body for the current iteration by copying the unexpanded body,
461 * replacing the expressions for the iteration variables on the way.
462 *
463 * Using expressions ensures that the .for loop can't generate
464 * syntax, and that the later parsing will still see an expression.
465 * This code assumes that the variable with the empty name is never defined,
466 * see unit-tests/varname-empty.mk.
467 *
468 * The detection of substitutions of the loop control variables is naive.
469 * Many of the modifiers use '\$' instead of '$$' to escape '$', so it is
470 * possible to contrive a makefile where an unwanted substitution happens.
471 * See unit-tests/directive-for-escape.mk.
472 */
473static void
474ForLoop_SubstBody(ForLoop *f, unsigned int firstItem, Buffer *body)
475{
476	const char *p, *end;
477	const char *mark;	/* where the last substitution left off */
478
479	Buf_Clear(body);
480
481	mark = f->body.data;
482	end = f->body.data + f->body.len;
483	for (p = mark; (p = strchr(p, '$')) != NULL;) {
484		if (p[1] == '{' || p[1] == '(') {
485			char endc = p[1] == '{' ? '}' : ')';
486			p += 2;
487			ForLoop_SubstVarLong(f, firstItem, body,
488			    &p, endc, &mark);
489		} else {
490			ForLoop_SubstVarShort(f, firstItem, body,
491			    p + 1, &mark);
492			p += 2;
493		}
494	}
495
496	Buf_AddRange(body, mark, end);
497}
498
499/*
500 * Compute the body for the current iteration by copying the unexpanded body,
501 * replacing the expressions for the iteration variables on the way.
502 */
503bool
504For_NextIteration(ForLoop *f, Buffer *body)
505{
506	if (f->nextItem == f->items.len)
507		return false;
508
509	f->nextItem += (unsigned int)f->vars.len;
510	ForLoop_SubstBody(f, f->nextItem - (unsigned int)f->vars.len, body);
511	if (DEBUG(FOR)) {
512		char *details = ForLoop_Details(f);
513		debug_printf("For: loop body with %s:\n%s",
514		    details, body->data);
515		free(details);
516	}
517	return true;
518}
519
520/* Break out of the .for loop. */
521void
522For_Break(ForLoop *f)
523{
524	f->nextItem = (unsigned int)f->items.len;
525}
526
527/* Run the .for loop, imitating the actions of an include file. */
528void
529For_Run(unsigned headLineno, unsigned bodyReadLines)
530{
531	Buffer buf;
532	ForLoop *f = accumFor;
533	accumFor = NULL;
534
535	if (f->items.len > 0) {
536		Buf_Init(&buf);
537		Parse_PushInput(NULL, headLineno, bodyReadLines, buf, f);
538	} else
539		ForLoop_Free(f);
540}
541