1236769Sobrien/* $NetBSD: str.c,v 1.34 2012/03/03 23:16:47 dholland Exp $ */ 2236769Sobrien 3236769Sobrien/*- 4236769Sobrien * Copyright (c) 1988, 1989, 1990, 1993 5236769Sobrien * The Regents of the University of California. All rights reserved. 6236769Sobrien * 7236769Sobrien * This code is derived from software contributed to Berkeley by 8236769Sobrien * Adam de Boor. 9236769Sobrien * 10236769Sobrien * Redistribution and use in source and binary forms, with or without 11236769Sobrien * modification, are permitted provided that the following conditions 12236769Sobrien * are met: 13236769Sobrien * 1. Redistributions of source code must retain the above copyright 14236769Sobrien * notice, this list of conditions and the following disclaimer. 15236769Sobrien * 2. Redistributions in binary form must reproduce the above copyright 16236769Sobrien * notice, this list of conditions and the following disclaimer in the 17236769Sobrien * documentation and/or other materials provided with the distribution. 18236769Sobrien * 3. Neither the name of the University nor the names of its contributors 19236769Sobrien * may be used to endorse or promote products derived from this software 20236769Sobrien * without specific prior written permission. 21236769Sobrien * 22236769Sobrien * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23236769Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24236769Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25236769Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26236769Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27236769Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28236769Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29236769Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30236769Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31236769Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32236769Sobrien * SUCH DAMAGE. 33236769Sobrien */ 34236769Sobrien 35236769Sobrien/*- 36236769Sobrien * Copyright (c) 1989 by Berkeley Softworks 37236769Sobrien * All rights reserved. 38236769Sobrien * 39236769Sobrien * This code is derived from software contributed to Berkeley by 40236769Sobrien * Adam de Boor. 41236769Sobrien * 42236769Sobrien * Redistribution and use in source and binary forms, with or without 43236769Sobrien * modification, are permitted provided that the following conditions 44236769Sobrien * are met: 45236769Sobrien * 1. Redistributions of source code must retain the above copyright 46236769Sobrien * notice, this list of conditions and the following disclaimer. 47236769Sobrien * 2. Redistributions in binary form must reproduce the above copyright 48236769Sobrien * notice, this list of conditions and the following disclaimer in the 49236769Sobrien * documentation and/or other materials provided with the distribution. 50236769Sobrien * 3. All advertising materials mentioning features or use of this software 51236769Sobrien * must display the following acknowledgement: 52236769Sobrien * This product includes software developed by the University of 53236769Sobrien * California, Berkeley and its contributors. 54236769Sobrien * 4. Neither the name of the University nor the names of its contributors 55236769Sobrien * may be used to endorse or promote products derived from this software 56236769Sobrien * without specific prior written permission. 57236769Sobrien * 58236769Sobrien * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59236769Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60236769Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61236769Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62236769Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63236769Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64236769Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65236769Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66236769Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67236769Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68236769Sobrien * SUCH DAMAGE. 69236769Sobrien */ 70236769Sobrien 71236769Sobrien#ifndef MAKE_NATIVE 72236769Sobrienstatic char rcsid[] = "$NetBSD: str.c,v 1.34 2012/03/03 23:16:47 dholland Exp $"; 73236769Sobrien#else 74236769Sobrien#include <sys/cdefs.h> 75236769Sobrien#ifndef lint 76236769Sobrien#if 0 77236769Sobrienstatic char sccsid[] = "@(#)str.c 5.8 (Berkeley) 6/1/90"; 78236769Sobrien#else 79236769Sobrien__RCSID("$NetBSD: str.c,v 1.34 2012/03/03 23:16:47 dholland Exp $"); 80236769Sobrien#endif 81236769Sobrien#endif /* not lint */ 82236769Sobrien#endif 83236769Sobrien 84236769Sobrien#include "make.h" 85236769Sobrien 86236769Sobrien/*- 87236769Sobrien * str_concat -- 88236769Sobrien * concatenate the two strings, inserting a space or slash between them, 89236769Sobrien * freeing them if requested. 90236769Sobrien * 91236769Sobrien * returns -- 92236769Sobrien * the resulting string in allocated space. 93236769Sobrien */ 94236769Sobrienchar * 95236769Sobrienstr_concat(const char *s1, const char *s2, int flags) 96236769Sobrien{ 97236769Sobrien int len1, len2; 98236769Sobrien char *result; 99236769Sobrien 100236769Sobrien /* get the length of both strings */ 101236769Sobrien len1 = strlen(s1); 102236769Sobrien len2 = strlen(s2); 103236769Sobrien 104236769Sobrien /* allocate length plus separator plus EOS */ 105236769Sobrien result = bmake_malloc((u_int)(len1 + len2 + 2)); 106236769Sobrien 107236769Sobrien /* copy first string into place */ 108236769Sobrien memcpy(result, s1, len1); 109236769Sobrien 110236769Sobrien /* add separator character */ 111236769Sobrien if (flags & STR_ADDSPACE) { 112236769Sobrien result[len1] = ' '; 113236769Sobrien ++len1; 114236769Sobrien } else if (flags & STR_ADDSLASH) { 115236769Sobrien result[len1] = '/'; 116236769Sobrien ++len1; 117236769Sobrien } 118236769Sobrien 119236769Sobrien /* copy second string plus EOS into place */ 120236769Sobrien memcpy(result + len1, s2, len2 + 1); 121236769Sobrien 122236769Sobrien return(result); 123236769Sobrien} 124236769Sobrien 125236769Sobrien/*- 126236769Sobrien * brk_string -- 127236769Sobrien * Fracture a string into an array of words (as delineated by tabs or 128236769Sobrien * spaces) taking quotation marks into account. Leading tabs/spaces 129236769Sobrien * are ignored. 130236769Sobrien * 131236769Sobrien * If expand is TRUE, quotes are removed and escape sequences 132236769Sobrien * such as \r, \t, etc... are expanded. 133236769Sobrien * 134236769Sobrien * returns -- 135236769Sobrien * Pointer to the array of pointers to the words. 136236769Sobrien * Memory containing the actual words in *buffer. 137236769Sobrien * Both of these must be free'd by the caller. 138236769Sobrien * Number of words in *store_argc. 139236769Sobrien */ 140236769Sobrienchar ** 141236769Sobrienbrk_string(const char *str, int *store_argc, Boolean expand, char **buffer) 142236769Sobrien{ 143236769Sobrien int argc, ch; 144236769Sobrien char inquote, *start, *t; 145236769Sobrien const char *p; 146236769Sobrien int len; 147236769Sobrien int argmax = 50, curlen = 0; 148236769Sobrien char **argv = bmake_malloc((argmax + 1) * sizeof(char *)); 149236769Sobrien 150236769Sobrien /* skip leading space chars. */ 151236769Sobrien for (; *str == ' ' || *str == '\t'; ++str) 152236769Sobrien continue; 153236769Sobrien 154236769Sobrien /* allocate room for a copy of the string */ 155236769Sobrien if ((len = strlen(str) + 1) > curlen) 156236769Sobrien *buffer = bmake_malloc(curlen = len); 157236769Sobrien 158236769Sobrien /* 159236769Sobrien * copy the string; at the same time, parse backslashes, 160236769Sobrien * quotes and build the argument list. 161236769Sobrien */ 162236769Sobrien argc = 0; 163236769Sobrien inquote = '\0'; 164236769Sobrien for (p = str, start = t = *buffer;; ++p) { 165236769Sobrien switch(ch = *p) { 166236769Sobrien case '"': 167236769Sobrien case '\'': 168236769Sobrien if (inquote) { 169236769Sobrien if (inquote == ch) 170236769Sobrien inquote = '\0'; 171236769Sobrien else 172236769Sobrien break; 173236769Sobrien } 174236769Sobrien else { 175236769Sobrien inquote = (char) ch; 176236769Sobrien /* Don't miss "" or '' */ 177236769Sobrien if (start == NULL && p[1] == inquote) { 178236769Sobrien if (!expand) { 179236769Sobrien start = t; 180236769Sobrien *t++ = ch; 181236769Sobrien } else 182236769Sobrien start = t + 1; 183236769Sobrien p++; 184236769Sobrien inquote = '\0'; 185236769Sobrien break; 186236769Sobrien } 187236769Sobrien } 188236769Sobrien if (!expand) { 189236769Sobrien if (!start) 190236769Sobrien start = t; 191236769Sobrien *t++ = ch; 192236769Sobrien } 193236769Sobrien continue; 194236769Sobrien case ' ': 195236769Sobrien case '\t': 196236769Sobrien case '\n': 197236769Sobrien if (inquote) 198236769Sobrien break; 199236769Sobrien if (!start) 200236769Sobrien continue; 201236769Sobrien /* FALLTHROUGH */ 202236769Sobrien case '\0': 203236769Sobrien /* 204236769Sobrien * end of a token -- make sure there's enough argv 205236769Sobrien * space and save off a pointer. 206236769Sobrien */ 207236769Sobrien if (!start) 208236769Sobrien goto done; 209236769Sobrien 210236769Sobrien *t++ = '\0'; 211236769Sobrien if (argc == argmax) { 212236769Sobrien argmax *= 2; /* ramp up fast */ 213236769Sobrien argv = (char **)bmake_realloc(argv, 214236769Sobrien (argmax + 1) * sizeof(char *)); 215236769Sobrien } 216236769Sobrien argv[argc++] = start; 217236769Sobrien start = NULL; 218236769Sobrien if (ch == '\n' || ch == '\0') { 219236769Sobrien if (expand && inquote) { 220236769Sobrien free(argv); 221236769Sobrien free(*buffer); 222236769Sobrien *buffer = NULL; 223236769Sobrien return NULL; 224236769Sobrien } 225236769Sobrien goto done; 226236769Sobrien } 227236769Sobrien continue; 228236769Sobrien case '\\': 229236769Sobrien if (!expand) { 230236769Sobrien if (!start) 231236769Sobrien start = t; 232236769Sobrien *t++ = '\\'; 233236769Sobrien if (*(p+1) == '\0') /* catch '\' at end of line */ 234236769Sobrien continue; 235236769Sobrien ch = *++p; 236236769Sobrien break; 237236769Sobrien } 238236769Sobrien 239236769Sobrien switch (ch = *++p) { 240236769Sobrien case '\0': 241236769Sobrien case '\n': 242236769Sobrien /* hmmm; fix it up as best we can */ 243236769Sobrien ch = '\\'; 244236769Sobrien --p; 245236769Sobrien break; 246236769Sobrien case 'b': 247236769Sobrien ch = '\b'; 248236769Sobrien break; 249236769Sobrien case 'f': 250236769Sobrien ch = '\f'; 251236769Sobrien break; 252236769Sobrien case 'n': 253236769Sobrien ch = '\n'; 254236769Sobrien break; 255236769Sobrien case 'r': 256236769Sobrien ch = '\r'; 257236769Sobrien break; 258236769Sobrien case 't': 259236769Sobrien ch = '\t'; 260236769Sobrien break; 261236769Sobrien } 262236769Sobrien break; 263236769Sobrien } 264236769Sobrien if (!start) 265236769Sobrien start = t; 266236769Sobrien *t++ = (char) ch; 267236769Sobrien } 268236769Sobriendone: argv[argc] = NULL; 269236769Sobrien *store_argc = argc; 270236769Sobrien return(argv); 271236769Sobrien} 272236769Sobrien 273236769Sobrien/* 274236769Sobrien * Str_FindSubstring -- See if a string contains a particular substring. 275236769Sobrien * 276236769Sobrien * Input: 277236769Sobrien * string String to search. 278236769Sobrien * substring Substring to find in string. 279236769Sobrien * 280236769Sobrien * Results: If string contains substring, the return value is the location of 281236769Sobrien * the first matching instance of substring in string. If string doesn't 282236769Sobrien * contain substring, the return value is NULL. Matching is done on an exact 283236769Sobrien * character-for-character basis with no wildcards or special characters. 284236769Sobrien * 285236769Sobrien * Side effects: None. 286236769Sobrien */ 287236769Sobrienchar * 288236769SobrienStr_FindSubstring(const char *string, const char *substring) 289236769Sobrien{ 290236769Sobrien const char *a, *b; 291236769Sobrien 292236769Sobrien /* 293236769Sobrien * First scan quickly through the two strings looking for a single- 294236769Sobrien * character match. When it's found, then compare the rest of the 295236769Sobrien * substring. 296236769Sobrien */ 297236769Sobrien 298236769Sobrien for (b = substring; *string != 0; string += 1) { 299236769Sobrien if (*string != *b) 300236769Sobrien continue; 301236769Sobrien a = string; 302236769Sobrien for (;;) { 303236769Sobrien if (*b == 0) 304236769Sobrien return UNCONST(string); 305236769Sobrien if (*a++ != *b++) 306236769Sobrien break; 307236769Sobrien } 308236769Sobrien b = substring; 309236769Sobrien } 310236769Sobrien return NULL; 311236769Sobrien} 312236769Sobrien 313236769Sobrien/* 314236769Sobrien * Str_Match -- 315236769Sobrien * 316236769Sobrien * See if a particular string matches a particular pattern. 317236769Sobrien * 318236769Sobrien * Results: Non-zero is returned if string matches pattern, 0 otherwise. The 319236769Sobrien * matching operation permits the following special characters in the 320236769Sobrien * pattern: *?\[] (see the man page for details on what these mean). 321236769Sobrien * 322236769Sobrien * XXX this function does not detect or report malformed patterns. 323236769Sobrien * 324236769Sobrien * Side effects: None. 325236769Sobrien */ 326236769Sobrienint 327236769SobrienStr_Match(const char *string, const char *pattern) 328236769Sobrien{ 329236769Sobrien char c2; 330236769Sobrien 331236769Sobrien for (;;) { 332236769Sobrien /* 333236769Sobrien * See if we're at the end of both the pattern and the 334236769Sobrien * string. If, we succeeded. If we're at the end of the 335236769Sobrien * pattern but not at the end of the string, we failed. 336236769Sobrien */ 337236769Sobrien if (*pattern == 0) 338236769Sobrien return(!*string); 339236769Sobrien if (*string == 0 && *pattern != '*') 340236769Sobrien return(0); 341236769Sobrien /* 342236769Sobrien * Check for a "*" as the next pattern character. It matches 343236769Sobrien * any substring. We handle this by calling ourselves 344236769Sobrien * recursively for each postfix of string, until either we 345236769Sobrien * match or we reach the end of the string. 346236769Sobrien */ 347236769Sobrien if (*pattern == '*') { 348236769Sobrien pattern += 1; 349236769Sobrien if (*pattern == 0) 350236769Sobrien return(1); 351236769Sobrien while (*string != 0) { 352236769Sobrien if (Str_Match(string, pattern)) 353236769Sobrien return(1); 354236769Sobrien ++string; 355236769Sobrien } 356236769Sobrien return(0); 357236769Sobrien } 358236769Sobrien /* 359236769Sobrien * Check for a "?" as the next pattern character. It matches 360236769Sobrien * any single character. 361236769Sobrien */ 362236769Sobrien if (*pattern == '?') 363236769Sobrien goto thisCharOK; 364236769Sobrien /* 365236769Sobrien * Check for a "[" as the next pattern character. It is 366236769Sobrien * followed by a list of characters that are acceptable, or 367236769Sobrien * by a range (two characters separated by "-"). 368236769Sobrien */ 369236769Sobrien if (*pattern == '[') { 370236769Sobrien ++pattern; 371236769Sobrien for (;;) { 372236769Sobrien if ((*pattern == ']') || (*pattern == 0)) 373236769Sobrien return(0); 374236769Sobrien if (*pattern == *string) 375236769Sobrien break; 376236769Sobrien if (pattern[1] == '-') { 377236769Sobrien c2 = pattern[2]; 378236769Sobrien if (c2 == 0) 379236769Sobrien return(0); 380236769Sobrien if ((*pattern <= *string) && 381236769Sobrien (c2 >= *string)) 382236769Sobrien break; 383236769Sobrien if ((*pattern >= *string) && 384236769Sobrien (c2 <= *string)) 385236769Sobrien break; 386236769Sobrien pattern += 2; 387236769Sobrien } 388236769Sobrien ++pattern; 389236769Sobrien } 390236769Sobrien while ((*pattern != ']') && (*pattern != 0)) 391236769Sobrien ++pattern; 392236769Sobrien goto thisCharOK; 393236769Sobrien } 394236769Sobrien /* 395236769Sobrien * If the next pattern character is '/', just strip off the 396236769Sobrien * '/' so we do exact matching on the character that follows. 397236769Sobrien */ 398236769Sobrien if (*pattern == '\\') { 399236769Sobrien ++pattern; 400236769Sobrien if (*pattern == 0) 401236769Sobrien return(0); 402236769Sobrien } 403236769Sobrien /* 404236769Sobrien * There's no special character. Just make sure that the 405236769Sobrien * next characters of each string match. 406236769Sobrien */ 407236769Sobrien if (*pattern != *string) 408236769Sobrien return(0); 409236769SobrienthisCharOK: ++pattern; 410236769Sobrien ++string; 411236769Sobrien } 412236769Sobrien} 413236769Sobrien 414236769Sobrien 415236769Sobrien/*- 416236769Sobrien *----------------------------------------------------------------------- 417236769Sobrien * Str_SYSVMatch -- 418236769Sobrien * Check word against pattern for a match (% is wild), 419236769Sobrien * 420236769Sobrien * Input: 421236769Sobrien * word Word to examine 422236769Sobrien * pattern Pattern to examine against 423236769Sobrien * len Number of characters to substitute 424236769Sobrien * 425236769Sobrien * Results: 426236769Sobrien * Returns the beginning position of a match or null. The number 427236769Sobrien * of characters matched is returned in len. 428236769Sobrien * 429236769Sobrien * Side Effects: 430236769Sobrien * None 431236769Sobrien * 432236769Sobrien *----------------------------------------------------------------------- 433236769Sobrien */ 434236769Sobrienchar * 435236769SobrienStr_SYSVMatch(const char *word, const char *pattern, int *len) 436236769Sobrien{ 437236769Sobrien const char *p = pattern; 438236769Sobrien const char *w = word; 439236769Sobrien const char *m; 440236769Sobrien 441236769Sobrien if (*p == '\0') { 442236769Sobrien /* Null pattern is the whole string */ 443236769Sobrien *len = strlen(w); 444236769Sobrien return UNCONST(w); 445236769Sobrien } 446236769Sobrien 447236769Sobrien if ((m = strchr(p, '%')) != NULL) { 448236769Sobrien /* check that the prefix matches */ 449236769Sobrien for (; p != m && *w && *w == *p; w++, p++) 450236769Sobrien continue; 451236769Sobrien 452236769Sobrien if (p != m) 453236769Sobrien return NULL; /* No match */ 454236769Sobrien 455236769Sobrien if (*++p == '\0') { 456236769Sobrien /* No more pattern, return the rest of the string */ 457236769Sobrien *len = strlen(w); 458236769Sobrien return UNCONST(w); 459236769Sobrien } 460236769Sobrien } 461236769Sobrien 462236769Sobrien m = w; 463236769Sobrien 464236769Sobrien /* Find a matching tail */ 465236769Sobrien do 466236769Sobrien if (strcmp(p, w) == 0) { 467236769Sobrien *len = w - m; 468236769Sobrien return UNCONST(m); 469236769Sobrien } 470236769Sobrien while (*w++ != '\0'); 471236769Sobrien 472236769Sobrien return NULL; 473236769Sobrien} 474236769Sobrien 475236769Sobrien 476236769Sobrien/*- 477236769Sobrien *----------------------------------------------------------------------- 478236769Sobrien * Str_SYSVSubst -- 479236769Sobrien * Substitute '%' on the pattern with len characters from src. 480236769Sobrien * If the pattern does not contain a '%' prepend len characters 481236769Sobrien * from src. 482236769Sobrien * 483236769Sobrien * Results: 484236769Sobrien * None 485236769Sobrien * 486236769Sobrien * Side Effects: 487236769Sobrien * Places result on buf 488236769Sobrien * 489236769Sobrien *----------------------------------------------------------------------- 490236769Sobrien */ 491236769Sobrienvoid 492236769SobrienStr_SYSVSubst(Buffer *buf, char *pat, char *src, int len) 493236769Sobrien{ 494236769Sobrien char *m; 495236769Sobrien 496236769Sobrien if ((m = strchr(pat, '%')) != NULL) { 497236769Sobrien /* Copy the prefix */ 498236769Sobrien Buf_AddBytes(buf, m - pat, pat); 499236769Sobrien /* skip the % */ 500236769Sobrien pat = m + 1; 501236769Sobrien } 502236769Sobrien 503236769Sobrien /* Copy the pattern */ 504236769Sobrien Buf_AddBytes(buf, len, src); 505236769Sobrien 506236769Sobrien /* append the rest */ 507236769Sobrien Buf_AddBytes(buf, strlen(pat), pat); 508236769Sobrien} 509