1/*	$NetBSD: str.h,v 1.19 2024/01/05 21:56:55 rillig Exp $	*/
2
3/*
4 Copyright (c) 2021 Roland Illig <rillig@NetBSD.org>
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
9 are met:
10
11 1. Redistributions of source code must retain the above copyright
12    notice, this list of conditions and the following disclaimer.
13 2. Redistributions in binary form must reproduce the above copyright
14    notice, this list of conditions and the following disclaimer in the
15    documentation and/or other materials provided with the distribution.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
21 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28 */
29
30
31/*
32 * Memory-efficient string handling.
33 */
34
35
36/* A read-only string that may need to be freed after use. */
37typedef struct FStr {
38	const char *str;
39	void *freeIt;
40} FStr;
41
42/* A read-only range of a character array, NOT null-terminated. */
43typedef struct Substring {
44	const char *start;
45	const char *end;
46} Substring;
47
48/*
49 * Builds a string, only allocating memory if the string is different from the
50 * expected string.
51 */
52typedef struct LazyBuf {
53	char *data;
54	size_t len;
55	size_t cap;
56	const char *expected;
57} LazyBuf;
58
59/* The result of splitting a string into words. */
60typedef struct Words {
61	char **words;
62	size_t len;
63	void *freeIt;
64} Words;
65
66/* The result of splitting a string into words. */
67typedef struct SubstringWords {
68	Substring *words;
69	size_t len;
70	void *freeIt;
71} SubstringWords;
72
73typedef struct StrMatchResult {
74	const char *error;
75	bool matched;
76} StrMatchResult;
77
78
79/* Return a string that is the sole owner of str. */
80MAKE_INLINE FStr
81FStr_InitOwn(char *str)
82{
83	FStr fstr;
84	fstr.str = str;
85	fstr.freeIt = str;
86	return fstr;
87}
88
89/* Return a string that refers to the shared str. */
90MAKE_INLINE FStr
91FStr_InitRefer(const char *str)
92{
93	FStr fstr;
94	fstr.str = str;
95	fstr.freeIt = NULL;
96	return fstr;
97}
98
99MAKE_INLINE void
100FStr_Done(FStr *fstr)
101{
102	free(fstr->freeIt);
103#ifdef CLEANUP
104	fstr->str = NULL;
105	fstr->freeIt = NULL;
106#endif
107}
108
109
110MAKE_STATIC Substring
111Substring_Init(const char *start, const char *end)
112{
113	Substring sub;
114
115	sub.start = start;
116	sub.end = end;
117	return sub;
118}
119
120MAKE_INLINE Substring
121Substring_InitStr(const char *str)
122{
123	return Substring_Init(str, str + strlen(str));
124}
125
126MAKE_STATIC size_t
127Substring_Length(Substring sub)
128{
129	return (size_t)(sub.end - sub.start);
130}
131
132MAKE_STATIC bool
133Substring_IsEmpty(Substring sub)
134{
135	return sub.start == sub.end;
136}
137
138MAKE_INLINE bool
139Substring_Equals(Substring sub, const char *str)
140{
141	size_t len = strlen(str);
142	return Substring_Length(sub) == len &&
143	       memcmp(sub.start, str, len) == 0;
144}
145
146MAKE_INLINE bool
147Substring_Eq(Substring sub, Substring str)
148{
149	size_t len = Substring_Length(sub);
150	return len == Substring_Length(str) &&
151	       memcmp(sub.start, str.start, len) == 0;
152}
153
154MAKE_STATIC bool
155Substring_HasPrefix(Substring sub, Substring prefix)
156{
157	return Substring_Length(sub) >= Substring_Length(prefix) &&
158	       memcmp(sub.start, prefix.start, Substring_Length(prefix)) == 0;
159}
160
161MAKE_STATIC bool
162Substring_HasSuffix(Substring sub, Substring suffix)
163{
164	size_t suffixLen = Substring_Length(suffix);
165	return Substring_Length(sub) >= suffixLen &&
166	       memcmp(sub.end - suffixLen, suffix.start, suffixLen) == 0;
167}
168
169/* Returns an independent, null-terminated copy of the substring. */
170MAKE_STATIC FStr
171Substring_Str(Substring sub)
172{
173	if (Substring_IsEmpty(sub))
174		return FStr_InitRefer("");
175	return FStr_InitOwn(bmake_strsedup(sub.start, sub.end));
176}
177
178MAKE_STATIC const char *
179Substring_SkipFirst(Substring sub, char ch)
180{
181	const char *p;
182
183	for (p = sub.start; p != sub.end; p++)
184		if (*p == ch)
185			return p + 1;
186	return sub.start;
187}
188
189MAKE_STATIC const char *
190Substring_FindLast(Substring sub, char ch)
191{
192	const char *p;
193
194	for (p = sub.end; p != sub.start; p--)
195		if (p[-1] == ch)
196			return p - 1;
197	return NULL;
198}
199
200MAKE_STATIC Substring
201Substring_Dirname(Substring pathname)
202{
203	const char *p;
204
205	for (p = pathname.end; p != pathname.start; p--)
206		if (p[-1] == '/')
207			return Substring_Init(pathname.start, p - 1);
208	return Substring_InitStr(".");
209}
210
211MAKE_STATIC Substring
212Substring_Basename(Substring pathname)
213{
214	const char *p;
215
216	for (p = pathname.end; p != pathname.start; p--)
217		if (p[-1] == '/')
218			return Substring_Init(p, pathname.end);
219	return pathname;
220}
221
222
223MAKE_STATIC void
224LazyBuf_Init(LazyBuf *buf, const char *expected)
225{
226	buf->data = NULL;
227	buf->len = 0;
228	buf->cap = 0;
229	buf->expected = expected;
230}
231
232MAKE_INLINE void
233LazyBuf_Done(LazyBuf *buf)
234{
235	free(buf->data);
236}
237
238MAKE_STATIC void
239LazyBuf_Add(LazyBuf *buf, char ch)
240{
241
242	if (buf->data != NULL) {
243		if (buf->len == buf->cap) {
244			buf->cap *= 2;
245			buf->data = bmake_realloc(buf->data, buf->cap);
246		}
247		buf->data[buf->len++] = ch;
248
249	} else if (ch == buf->expected[buf->len]) {
250		buf->len++;
251		return;
252
253	} else {
254		buf->cap = buf->len + 16;
255		buf->data = bmake_malloc(buf->cap);
256		memcpy(buf->data, buf->expected, buf->len);
257		buf->data[buf->len++] = ch;
258	}
259}
260
261MAKE_STATIC void
262LazyBuf_AddStr(LazyBuf *buf, const char *str)
263{
264	const char *p;
265
266	for (p = str; *p != '\0'; p++)
267		LazyBuf_Add(buf, *p);
268}
269
270MAKE_INLINE void
271LazyBuf_AddSubstring(LazyBuf *buf, Substring sub)
272{
273	const char *p;
274
275	for (p = sub.start; p != sub.end; p++)
276		LazyBuf_Add(buf, *p);
277}
278
279MAKE_STATIC Substring
280LazyBuf_Get(const LazyBuf *buf)
281{
282	const char *start = buf->data != NULL ? buf->data : buf->expected;
283	return Substring_Init(start, start + buf->len);
284}
285
286/*
287 * Returns the content of the buffer as a newly allocated string.
288 *
289 * See LazyBuf_Get to avoid unnecessary memory allocations.
290 */
291MAKE_STATIC FStr
292LazyBuf_DoneGet(LazyBuf *buf)
293{
294	if (buf->data != NULL) {
295		LazyBuf_Add(buf, '\0');
296		return FStr_InitOwn(buf->data);
297	}
298	return Substring_Str(LazyBuf_Get(buf));
299}
300
301
302Words Str_Words(const char *, bool);
303
304MAKE_INLINE void
305Words_Free(Words w)
306{
307	free(w.words);
308	free(w.freeIt);
309}
310
311
312SubstringWords Substring_Words(const char *, bool);
313
314MAKE_INLINE void
315SubstringWords_Init(SubstringWords *w)
316{
317	w->words = NULL;
318	w->len = 0;
319	w->freeIt = NULL;
320}
321
322MAKE_INLINE void
323SubstringWords_Free(SubstringWords w)
324{
325	free(w.words);
326	free(w.freeIt);
327}
328
329
330char *str_concat2(const char *, const char *);
331char *str_concat3(const char *, const char *, const char *);
332
333StrMatchResult Str_Match(const char *, const char *);
334
335void Str_Intern_Init(void);
336void Str_Intern_End(void);
337const char *Str_Intern(const char *);
338