1/*-
2 * Copyright (c) 1988, 1989, 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (c) 1989 by Berkeley Softworks
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Adam de Boor.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)str.c	5.8 (Berkeley) 6/1/90
39 */
40
41#include <sys/cdefs.h>
42__FBSDID("$FreeBSD$");
43
44#include <stdlib.h>
45#include <string.h>
46
47#include "buf.h"
48#include "str.h"
49#include "util.h"
50
51/**
52 * Initialize the argument array object.  The array is initially
53 * eight positions, and will be expanded as necessary.  The first
54 * position is set to NULL since everything ignores it.  We allocate
55 * (size + 1) since we need space for the terminating NULL.  The
56 * buffer is set to NULL, since no common buffer is allocated yet.
57 */
58void
59ArgArray_Init(ArgArray *aa)
60{
61
62	aa->size = 8;
63	aa->argv = emalloc((aa->size + 1) * sizeof(char *));
64	aa->argc = 0;
65	aa->argv[aa->argc++] = NULL;
66	aa->len = 0;
67	aa->buffer = NULL;
68}
69
70/**
71 * Cleanup the memory allocated for in the argument array object.
72 */
73void
74ArgArray_Done(ArgArray *aa)
75{
76
77	if (aa->buffer == NULL) {
78		int	i;
79		/* args are individually allocated */
80		for (i = 0; i < aa->argc; ++i) {
81			if (aa->argv[i]) {
82				free(aa->argv[i]);
83				aa->argv[i] = NULL;
84			}
85		}
86	} else {
87		/* args are part of a single allocation */
88		free(aa->buffer);
89		aa->buffer = NULL;
90	}
91	free(aa->argv);
92	aa->argv = NULL;
93	aa->argc = 0;
94	aa->size = 0;
95}
96
97/*-
98 * str_concat --
99 *	concatenate the two strings, inserting a space or slash between them.
100 *
101 * returns --
102 *	the resulting string in allocated space.
103 */
104char *
105str_concat(const char *s1, const char *s2, int flags)
106{
107	int len1, len2;
108	char *result;
109
110	/* get the length of both strings */
111	len1 = strlen(s1);
112	len2 = strlen(s2);
113
114	/* allocate length plus separator plus EOS */
115	result = emalloc(len1 + len2 + 2);
116
117	/* copy first string into place */
118	memcpy(result, s1, len1);
119
120	/* add separator character */
121	if (flags & STR_ADDSPACE) {
122		result[len1] = ' ';
123		++len1;
124	} else if (flags & STR_ADDSLASH) {
125		result[len1] = '/';
126		++len1;
127	}
128
129	/* copy second string plus EOS into place */
130	memcpy(result + len1, s2, len2 + 1);
131
132	return (result);
133}
134
135/**
136 * Fracture a string into an array of words (as delineated by tabs or
137 * spaces) taking quotation marks into account.  Leading tabs/spaces
138 * are ignored.
139 */
140void
141brk_string(ArgArray *aa, const char str[], Boolean expand)
142{
143	char	inquote;
144	char	*start;
145	char	*arg;
146
147	/* skip leading space chars. */
148	for (; *str == ' ' || *str == '\t'; ++str)
149		continue;
150
151	ArgArray_Init(aa);
152
153	aa->buffer = estrdup(str);
154
155	arg = aa->buffer;
156	start = arg;
157	inquote = '\0';
158
159	/*
160	 * copy the string; at the same time, parse backslashes,
161	 * quotes and build the argument list.
162	 */
163	for (;;) {
164		switch (str[0]) {
165		case '"':
166		case '\'':
167			if (inquote == '\0') {
168				inquote = str[0];
169				if (expand)
170					break;
171				if (start == NULL)
172					start = arg;
173			} else if (inquote == str[0]) {
174				inquote = '\0';
175				/* Don't miss "" or '' */
176				if (start == NULL)
177					start = arg;
178				if (expand)
179					break;
180			} else {
181				/* other type of quote found */
182				if (start == NULL)
183					start = arg;
184			}
185			*arg++ = str[0];
186			break;
187		case ' ':
188		case '\t':
189		case '\n':
190			if (inquote) {
191				if (start == NULL)
192					start = arg;
193				*arg++ = str[0];
194				break;
195			}
196			if (start == NULL)
197				break;
198			/* FALLTHROUGH */
199		case '\0':
200			/*
201			 * end of a token -- make sure there's enough argv
202			 * space and save off a pointer.
203			 */
204			if (aa->argc == aa->size) {
205				aa->size *= 2;		/* ramp up fast */
206				aa->argv = erealloc(aa->argv,
207				    (aa->size + 1) * sizeof(char *));
208			}
209
210			*arg++ = '\0';
211			if (start == NULL) {
212				aa->argv[aa->argc] = start;
213				return;
214			}
215			if (str[0] == '\n' || str[0] == '\0') {
216				aa->argv[aa->argc++] = start;
217				aa->argv[aa->argc] = NULL;
218				return;
219			} else {
220				aa->argv[aa->argc++] = start;
221				start = NULL;
222				break;
223			}
224		case '\\':
225			if (start == NULL)
226				start = arg;
227			if (expand) {
228				switch (str[1]) {
229				case '\0':
230				case '\n':
231					/* hmmm; fix it up as best we can */
232					*arg++ = '\\';
233					break;
234				case 'b':
235					*arg++ = '\b';
236					++str;
237					break;
238				case 'f':
239					*arg++ = '\f';
240					++str;
241					break;
242				case 'n':
243					*arg++ = '\n';
244					++str;
245					break;
246				case 'r':
247					*arg++ = '\r';
248					++str;
249					break;
250				case 't':
251					*arg++ = '\t';
252					++str;
253					break;
254				default:
255					*arg++ = str[1];
256					++str;
257					break;
258				}
259			} else {
260				*arg++ = str[0];
261				if (str[1] != '\0') {
262					++str;
263					*arg++ = str[0];
264				}
265			}
266			break;
267		default:
268			if (start == NULL)
269				start = arg;
270			*arg++ = str[0];
271			break;
272		}
273		++str;
274	}
275}
276
277/*
278 * Quote a string for appending it to MAKEFLAGS. According to Posix the
279 * kind of quoting here is implementation-defined. This quoting must ensure
280 * that the parsing of MAKEFLAGS's contents in a sub-shell yields the same
281 * options, option arguments and macro definitions as in the calling make.
282 * We simply quote all blanks, which according to Posix are space and tab
283 * in the POSIX locale. Don't use isblank because in that case makes with
284 * different locale settings could not communicate. We must also quote
285 * backslashes obviously.
286 */
287char *
288MAKEFLAGS_quote(const char *str)
289{
290	char *ret, *q;
291	const char *p;
292
293	/* assume worst case - everything has to be quoted */
294	ret = emalloc(strlen(str) * 2 + 1);
295
296	p = str;
297	q = ret;
298	while (*p != '\0') {
299		switch (*p) {
300
301		  case ' ':
302		  case '\t':
303			*q++ = '\\';
304			break;
305
306		  default:
307			break;
308		}
309		*q++ = *p++;
310	}
311	*q++ = '\0';
312	return (ret);
313}
314
315void
316MAKEFLAGS_break(ArgArray *aa, const char str[])
317{
318	char	*arg;
319	char	*start;
320
321	ArgArray_Init(aa);
322
323	aa->buffer = strdup(str);
324
325	arg = aa->buffer;
326	start = NULL;
327
328	for (;;) {
329		switch (str[0]) {
330		case ' ':
331		case '\t':
332			/* word separator */
333			if (start == NULL) {
334				/* not in a word */
335				str++;
336				continue;
337			}
338			/* FALLTHRU */
339		case '\0':
340			if (aa->argc == aa->size) {
341				aa->size *= 2;
342				aa->argv = erealloc(aa->argv,
343 				    (aa->size + 1) * sizeof(char *));
344			}
345
346			*arg++ = '\0';
347			if (start == NULL) {
348				aa->argv[aa->argc] = start;
349				return;
350			}
351			if (str[0] == '\0') {
352				aa->argv[aa->argc++] = start;
353				aa->argv[aa->argc] = NULL;
354				return;
355			} else {
356				aa->argv[aa->argc++] = start;
357				start = NULL;
358				str++;
359				continue;
360			}
361
362		case '\\':
363			if (str[1] == ' ' || str[1] == '\t')
364				str++;
365			break;
366
367		default:
368			break;
369		}
370		if (start == NULL)
371			start = arg;
372		*arg++ = *str++;
373	}
374}
375
376/*
377 * Str_Match --
378 *
379 * See if a particular string matches a particular pattern.
380 *
381 * Results: Non-zero is returned if string matches pattern, 0 otherwise. The
382 * matching operation permits the following special characters in the
383 * pattern: *?\[] (see the man page for details on what these mean).
384 *
385 * Side effects: None.
386 */
387int
388Str_Match(const char *string, const char *pattern)
389{
390	char c2;
391
392	for (;;) {
393		/*
394		 * See if we're at the end of both the pattern and the
395		 * string. If, we succeeded.  If we're at the end of the
396		 * pattern but not at the end of the string, we failed.
397		 */
398		if (*pattern == 0)
399			return (!*string);
400		if (*string == 0 && *pattern != '*')
401			return (0);
402		/*
403		 * Check for a "*" as the next pattern character.  It matches
404		 * any substring.  We handle this by calling ourselves
405		 * recursively for each postfix of string, until either we
406		 * match or we reach the end of the string.
407		 */
408		if (*pattern == '*') {
409			pattern += 1;
410			if (*pattern == 0)
411				return (1);
412			while (*string != 0) {
413				if (Str_Match(string, pattern))
414					return (1);
415				++string;
416			}
417			return (0);
418		}
419		/*
420		 * Check for a "?" as the next pattern character.  It matches
421		 * any single character.
422		 */
423		if (*pattern == '?')
424			goto thisCharOK;
425		/*
426		 * Check for a "[" as the next pattern character.  It is
427		 * followed by a list of characters that are acceptable, or
428		 * by a range (two characters separated by "-").
429		 */
430		if (*pattern == '[') {
431			++pattern;
432			for (;;) {
433				if ((*pattern == ']') || (*pattern == 0))
434					return (0);
435				if (*pattern == *string)
436					break;
437				if (pattern[1] == '-') {
438					c2 = pattern[2];
439					if (c2 == 0)
440						return (0);
441					if ((*pattern <= *string) &&
442					    (c2 >= *string))
443						break;
444					if ((*pattern >= *string) &&
445					    (c2 <= *string))
446						break;
447					pattern += 2;
448				}
449				++pattern;
450			}
451			while ((*pattern != ']') && (*pattern != 0))
452				++pattern;
453			goto thisCharOK;
454		}
455		/*
456		 * If the next pattern character is '/', just strip off the
457		 * '/' so we do exact matching on the character that follows.
458		 */
459		if (*pattern == '\\') {
460			++pattern;
461			if (*pattern == 0)
462				return (0);
463		}
464		/*
465		 * There's no special character.  Just make sure that the
466		 * next characters of each string match.
467		 */
468		if (*pattern != *string)
469			return (0);
470thisCharOK:	++pattern;
471		++string;
472	}
473}
474
475
476/**
477 * Str_SYSVMatch
478 *	Check word against pattern for a match (% is wild),
479 *
480 * Results:
481 *	Returns the beginning position of a match or null. The number
482 *	of characters matched is returned in len.
483 */
484const char *
485Str_SYSVMatch(const char *word, const char *pattern, int *len)
486{
487	const char *m, *p, *w;
488
489	p = pattern;
490	w = word;
491
492	if (*w == '\0') {
493		/* Zero-length word cannot be matched against */
494		*len = 0;
495		return (NULL);
496	}
497
498	if (*p == '\0') {
499		/* Null pattern is the whole string */
500		*len = strlen(w);
501		return (w);
502	}
503
504	if ((m = strchr(p, '%')) != NULL) {
505		/* check that the prefix matches */
506		for (; p != m && *w && *w == *p; w++, p++)
507			continue;
508
509		if (p != m)
510			return (NULL);	/* No match */
511
512		if (*++p == '\0') {
513			/* No more pattern, return the rest of the string */
514			*len = strlen(w);
515			return (w);
516		}
517	}
518
519	m = w;
520
521	/* Find a matching tail */
522	do
523		if (strcmp(p, w) == 0) {
524			*len = w - m;
525			return (m);
526		}
527	while (*w++ != '\0');
528
529	return (NULL);
530}
531
532
533/**
534 * Str_SYSVSubst
535 *	Substitute '%' on the pattern with len characters from src.
536 *	If the pattern does not contain a '%' prepend len characters
537 *	from src.
538 *
539 * Side Effects:
540 *	Places result on buf
541 */
542void
543Str_SYSVSubst(Buffer *buf, const char *pat, const char *src, int len)
544{
545	const char *m;
546
547	if ((m = strchr(pat, '%')) != NULL) {
548		/* Copy the prefix */
549		Buf_AppendRange(buf, pat, m);
550		/* skip the % */
551		pat = m + 1;
552	}
553
554	/* Copy the pattern */
555	Buf_AddBytes(buf, len, (const Byte *)src);
556
557	/* append the rest */
558	Buf_Append(buf, pat);
559}
560