str.c revision 321964
1/*	$NetBSD: str.c,v 1.38 2017/04/21 22:15:44 sjg Exp $	*/
2
3/*-
4 * Copyright (c) 1988, 1989, 1990, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Adam de Boor.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35/*-
36 * Copyright (c) 1989 by Berkeley Softworks
37 * All rights reserved.
38 *
39 * This code is derived from software contributed to Berkeley by
40 * Adam de Boor.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 *    notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 *    notice, this list of conditions and the following disclaimer in the
49 *    documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 *    must display the following acknowledgement:
52 *	This product includes software developed by the University of
53 *	California, Berkeley and its contributors.
54 * 4. Neither the name of the University nor the names of its contributors
55 *    may be used to endorse or promote products derived from this software
56 *    without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 */
70
71#ifndef MAKE_NATIVE
72static char rcsid[] = "$NetBSD: str.c,v 1.38 2017/04/21 22:15:44 sjg Exp $";
73#else
74#include <sys/cdefs.h>
75#ifndef lint
76#if 0
77static char     sccsid[] = "@(#)str.c	5.8 (Berkeley) 6/1/90";
78#else
79__RCSID("$NetBSD: str.c,v 1.38 2017/04/21 22:15:44 sjg Exp $");
80#endif
81#endif				/* not lint */
82#endif
83
84#include "make.h"
85
86/*-
87 * str_concat --
88 *	concatenate the two strings, inserting a space or slash between them,
89 *	freeing them if requested.
90 *
91 * returns --
92 *	the resulting string in allocated space.
93 */
94char *
95str_concat(const char *s1, const char *s2, int flags)
96{
97	int len1, len2;
98	char *result;
99
100	/* get the length of both strings */
101	len1 = strlen(s1);
102	len2 = strlen(s2);
103
104	/* allocate length plus separator plus EOS */
105	result = bmake_malloc((unsigned int)(len1 + len2 + 2));
106
107	/* copy first string into place */
108	memcpy(result, s1, len1);
109
110	/* add separator character */
111	if (flags & STR_ADDSPACE) {
112		result[len1] = ' ';
113		++len1;
114	} else if (flags & STR_ADDSLASH) {
115		result[len1] = '/';
116		++len1;
117	}
118
119	/* copy second string plus EOS into place */
120	memcpy(result + len1, s2, len2 + 1);
121
122	return(result);
123}
124
125/*-
126 * brk_string --
127 *	Fracture a string into an array of words (as delineated by tabs or
128 *	spaces) taking quotation marks into account.  Leading tabs/spaces
129 *	are ignored.
130 *
131 * If expand is TRUE, quotes are removed and escape sequences
132 *  such as \r, \t, etc... are expanded.
133 *
134 * returns --
135 *	Pointer to the array of pointers to the words.
136 *      Memory containing the actual words in *buffer.
137 *		Both of these must be free'd by the caller.
138 *      Number of words in *store_argc.
139 */
140char **
141brk_string(const char *str, int *store_argc, Boolean expand, char **buffer)
142{
143	int argc, ch;
144	char inquote, *start, *t;
145	const char *p;
146	int len;
147	int argmax = 50, curlen = 0;
148    	char **argv;
149
150	/* skip leading space chars. */
151	for (; *str == ' ' || *str == '\t'; ++str)
152		continue;
153
154	/* allocate room for a copy of the string */
155	if ((len = strlen(str) + 1) > curlen)
156		*buffer = bmake_malloc(curlen = len);
157
158	/*
159	 * initial argmax based on len
160	 */
161	argmax = MAX((len / 5), 50);
162	argv = bmake_malloc((argmax + 1) * sizeof(char *));
163
164	/*
165	 * copy the string; at the same time, parse backslashes,
166	 * quotes and build the argument list.
167	 */
168	argc = 0;
169	inquote = '\0';
170	for (p = str, start = t = *buffer;; ++p) {
171		switch(ch = *p) {
172		case '"':
173		case '\'':
174			if (inquote) {
175				if (inquote == ch)
176					inquote = '\0';
177				else
178					break;
179			}
180			else {
181				inquote = (char) ch;
182				/* Don't miss "" or '' */
183				if (start == NULL && p[1] == inquote) {
184					if (!expand) {
185						start = t;
186						*t++ = ch;
187					} else
188						start = t + 1;
189					p++;
190					inquote = '\0';
191					break;
192				}
193			}
194			if (!expand) {
195				if (!start)
196					start = t;
197				*t++ = ch;
198			}
199			continue;
200		case ' ':
201		case '\t':
202		case '\n':
203			if (inquote)
204				break;
205			if (!start)
206				continue;
207			/* FALLTHROUGH */
208		case '\0':
209			/*
210			 * end of a token -- make sure there's enough argv
211			 * space and save off a pointer.
212			 */
213			if (!start)
214			    goto done;
215
216			*t++ = '\0';
217			if (argc == argmax) {
218				argmax *= 2;		/* ramp up fast */
219				argv = (char **)bmake_realloc(argv,
220				    (argmax + 1) * sizeof(char *));
221			}
222			argv[argc++] = start;
223			start = NULL;
224			if (ch == '\n' || ch == '\0') {
225				if (expand && inquote) {
226					free(argv);
227					free(*buffer);
228					*buffer = NULL;
229					return NULL;
230				}
231				goto done;
232			}
233			continue;
234		case '\\':
235			if (!expand) {
236				if (!start)
237					start = t;
238				*t++ = '\\';
239				if (*(p+1) == '\0') /* catch '\' at end of line */
240					continue;
241				ch = *++p;
242				break;
243			}
244
245			switch (ch = *++p) {
246			case '\0':
247			case '\n':
248				/* hmmm; fix it up as best we can */
249				ch = '\\';
250				--p;
251				break;
252			case 'b':
253				ch = '\b';
254				break;
255			case 'f':
256				ch = '\f';
257				break;
258			case 'n':
259				ch = '\n';
260				break;
261			case 'r':
262				ch = '\r';
263				break;
264			case 't':
265				ch = '\t';
266				break;
267			}
268			break;
269		}
270		if (!start)
271			start = t;
272		*t++ = (char) ch;
273	}
274done:	argv[argc] = NULL;
275	*store_argc = argc;
276	return(argv);
277}
278
279/*
280 * Str_FindSubstring -- See if a string contains a particular substring.
281 *
282 * Input:
283 *	string		String to search.
284 *	substring	Substring to find in string.
285 *
286 * Results: If string contains substring, the return value is the location of
287 * the first matching instance of substring in string.  If string doesn't
288 * contain substring, the return value is NULL.  Matching is done on an exact
289 * character-for-character basis with no wildcards or special characters.
290 *
291 * Side effects: None.
292 */
293char *
294Str_FindSubstring(const char *string, const char *substring)
295{
296	const char *a, *b;
297
298	/*
299	 * First scan quickly through the two strings looking for a single-
300	 * character match.  When it's found, then compare the rest of the
301	 * substring.
302	 */
303
304	for (b = substring; *string != 0; string += 1) {
305		if (*string != *b)
306			continue;
307		a = string;
308		for (;;) {
309			if (*b == 0)
310				return UNCONST(string);
311			if (*a++ != *b++)
312				break;
313		}
314		b = substring;
315	}
316	return NULL;
317}
318
319/*
320 * Str_Match --
321 *
322 * See if a particular string matches a particular pattern.
323 *
324 * Results: Non-zero is returned if string matches pattern, 0 otherwise. The
325 * matching operation permits the following special characters in the
326 * pattern: *?\[] (see the man page for details on what these mean).
327 *
328 * XXX this function does not detect or report malformed patterns.
329 *
330 * Side effects: None.
331 */
332int
333Str_Match(const char *string, const char *pattern)
334{
335	char c2;
336
337	for (;;) {
338		/*
339		 * See if we're at the end of both the pattern and the
340		 * string. If, we succeeded.  If we're at the end of the
341		 * pattern but not at the end of the string, we failed.
342		 */
343		if (*pattern == 0)
344			return(!*string);
345		if (*string == 0 && *pattern != '*')
346			return(0);
347		/*
348		 * Check for a "*" as the next pattern character.  It matches
349		 * any substring.  We handle this by calling ourselves
350		 * recursively for each postfix of string, until either we
351		 * match or we reach the end of the string.
352		 */
353		if (*pattern == '*') {
354			pattern += 1;
355			if (*pattern == 0)
356				return(1);
357			while (*string != 0) {
358				if (Str_Match(string, pattern))
359					return(1);
360				++string;
361			}
362			return(0);
363		}
364		/*
365		 * Check for a "?" as the next pattern character.  It matches
366		 * any single character.
367		 */
368		if (*pattern == '?')
369			goto thisCharOK;
370		/*
371		 * Check for a "[" as the next pattern character.  It is
372		 * followed by a list of characters that are acceptable, or
373		 * by a range (two characters separated by "-").
374		 */
375		if (*pattern == '[') {
376			int nomatch;
377
378			++pattern;
379			if (*pattern == '^') {
380				++pattern;
381				nomatch = 1;
382			} else
383				nomatch = 0;
384			for (;;) {
385				if ((*pattern == ']') || (*pattern == 0)) {
386					if (nomatch)
387						break;
388					return(0);
389				}
390				if (*pattern == *string)
391					break;
392				if (pattern[1] == '-') {
393					c2 = pattern[2];
394					if (c2 == 0)
395						return(nomatch);
396					if ((*pattern <= *string) &&
397					    (c2 >= *string))
398						break;
399					if ((*pattern >= *string) &&
400					    (c2 <= *string))
401						break;
402					pattern += 2;
403				}
404				++pattern;
405			}
406			if (nomatch && (*pattern != ']') && (*pattern != 0))
407				return 0;
408			while ((*pattern != ']') && (*pattern != 0))
409				++pattern;
410			goto thisCharOK;
411		}
412		/*
413		 * If the next pattern character is '/', just strip off the
414		 * '/' so we do exact matching on the character that follows.
415		 */
416		if (*pattern == '\\') {
417			++pattern;
418			if (*pattern == 0)
419				return(0);
420		}
421		/*
422		 * There's no special character.  Just make sure that the
423		 * next characters of each string match.
424		 */
425		if (*pattern != *string)
426			return(0);
427thisCharOK:	++pattern;
428		++string;
429	}
430}
431
432
433/*-
434 *-----------------------------------------------------------------------
435 * Str_SYSVMatch --
436 *	Check word against pattern for a match (% is wild),
437 *
438 * Input:
439 *	word		Word to examine
440 *	pattern		Pattern to examine against
441 *	len		Number of characters to substitute
442 *
443 * Results:
444 *	Returns the beginning position of a match or null. The number
445 *	of characters matched is returned in len.
446 *
447 * Side Effects:
448 *	None
449 *
450 *-----------------------------------------------------------------------
451 */
452char *
453Str_SYSVMatch(const char *word, const char *pattern, int *len)
454{
455    const char *p = pattern;
456    const char *w = word;
457    const char *m;
458
459    if (*p == '\0') {
460	/* Null pattern is the whole string */
461	*len = strlen(w);
462	return UNCONST(w);
463    }
464
465    if ((m = strchr(p, '%')) != NULL) {
466	/* check that the prefix matches */
467	for (; p != m && *w && *w == *p; w++, p++)
468	     continue;
469
470	if (p != m)
471	    return NULL;	/* No match */
472
473	if (*++p == '\0') {
474	    /* No more pattern, return the rest of the string */
475	    *len = strlen(w);
476	    return UNCONST(w);
477	}
478    }
479
480    m = w;
481
482    /* Find a matching tail */
483    do
484	if (strcmp(p, w) == 0) {
485	    *len = w - m;
486	    return UNCONST(m);
487	}
488    while (*w++ != '\0');
489
490    return NULL;
491}
492
493
494/*-
495 *-----------------------------------------------------------------------
496 * Str_SYSVSubst --
497 *	Substitute '%' on the pattern with len characters from src.
498 *	If the pattern does not contain a '%' prepend len characters
499 *	from src.
500 *
501 * Results:
502 *	None
503 *
504 * Side Effects:
505 *	Places result on buf
506 *
507 *-----------------------------------------------------------------------
508 */
509void
510Str_SYSVSubst(Buffer *buf, char *pat, char *src, int len)
511{
512    char *m;
513
514    if ((m = strchr(pat, '%')) != NULL) {
515	/* Copy the prefix */
516	Buf_AddBytes(buf, m - pat, pat);
517	/* skip the % */
518	pat = m + 1;
519    }
520
521    /* Copy the pattern */
522    Buf_AddBytes(buf, len, src);
523
524    /* append the rest */
525    Buf_AddBytes(buf, strlen(pat), pat);
526}
527