1228753Smm/*-
2232153Smm * Copyright (c) 2003-2010 Tim Kientzle
3228753Smm * All rights reserved.
4228753Smm *
5228753Smm * Redistribution and use in source and binary forms, with or without
6228753Smm * modification, are permitted provided that the following conditions
7228753Smm * are met:
8228753Smm * 1. Redistributions of source code must retain the above copyright
9228753Smm *    notice, this list of conditions and the following disclaimer.
10228753Smm * 2. Redistributions in binary form must reproduce the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer in the
12228753Smm *    documentation and/or other materials provided with the distribution.
13228753Smm *
14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24228753Smm *
25228763Smm * $FreeBSD: stable/10/contrib/libarchive/libarchive/archive_string.h 368708 2020-12-16 22:25:40Z mm $
26228753Smm *
27228753Smm */
28228753Smm
29358090Smm#ifndef ARCHIVE_STRING_H_INCLUDED
30358090Smm#define ARCHIVE_STRING_H_INCLUDED
31358090Smm
32228753Smm#ifndef __LIBARCHIVE_BUILD
33232153Smm#ifndef __LIBARCHIVE_TEST
34228753Smm#error This header is only to be used internally to libarchive.
35228753Smm#endif
36232153Smm#endif
37228753Smm
38228753Smm#include <stdarg.h>
39228753Smm#ifdef HAVE_STDLIB_H
40228753Smm#include <stdlib.h>  /* required for wchar_t on some systems */
41228753Smm#endif
42228753Smm#ifdef HAVE_STRING_H
43228753Smm#include <string.h>
44228753Smm#endif
45228753Smm#ifdef HAVE_WCHAR_H
46228753Smm#include <wchar.h>
47228753Smm#endif
48228753Smm
49228753Smm#include "archive.h"
50228753Smm
51228753Smm/*
52232153Smm * Basic resizable/reusable string support similar to Java's "StringBuffer."
53228753Smm *
54228753Smm * Unlike sbuf(9), the buffers here are fully reusable and track the
55228753Smm * length throughout.
56228753Smm */
57228753Smm
58228753Smmstruct archive_string {
59228753Smm	char	*s;  /* Pointer to the storage */
60232153Smm	size_t	 length; /* Length of 's' in characters */
61232153Smm	size_t	 buffer_length; /* Length of malloc-ed storage in bytes. */
62228753Smm};
63228753Smm
64232153Smmstruct archive_wstring {
65232153Smm	wchar_t	*s;  /* Pointer to the storage */
66232153Smm	size_t	 length; /* Length of 's' in characters */
67232153Smm	size_t	 buffer_length; /* Length of malloc-ed storage in bytes. */
68232153Smm};
69232153Smm
70232153Smmstruct archive_string_conv;
71232153Smm
72228753Smm/* Initialize an archive_string object on the stack or elsewhere. */
73228753Smm#define	archive_string_init(a)	\
74228753Smm	do { (a)->s = NULL; (a)->length = 0; (a)->buffer_length = 0; } while(0)
75228753Smm
76228753Smm/* Append a C char to an archive_string, resizing as necessary. */
77228753Smmstruct archive_string *
78232153Smmarchive_strappend_char(struct archive_string *, char);
79228753Smm
80232153Smm/* Ditto for a wchar_t and an archive_wstring. */
81232153Smmstruct archive_wstring *
82232153Smmarchive_wstrappend_wchar(struct archive_wstring *, wchar_t);
83228753Smm
84313571Smm/* Append a raw array to an archive_string, resizing as necessary */
85313571Smmstruct archive_string *
86313571Smmarchive_array_append(struct archive_string *, const char *, size_t);
87313571Smm
88232153Smm/* Convert a Unicode string to current locale and append the result. */
89232153Smm/* Returns -1 if conversion fails. */
90232153Smmint
91232153Smmarchive_string_append_from_wcs(struct archive_string *, const wchar_t *, size_t);
92228753Smm
93228753Smm
94232153Smm/* Create a string conversion object.
95232153Smm * Return NULL and set a error message if the conversion is not supported
96232153Smm * on the platform. */
97232153Smmstruct archive_string_conv *
98232153Smmarchive_string_conversion_to_charset(struct archive *, const char *, int);
99232153Smmstruct archive_string_conv *
100232153Smmarchive_string_conversion_from_charset(struct archive *, const char *, int);
101232153Smm/* Create the default string conversion object for reading/writing an archive.
102232153Smm * Return NULL if the conversion is unneeded.
103232153Smm * Note: On non Windows platform this always returns NULL.
104232153Smm */
105232153Smmstruct archive_string_conv *
106232153Smmarchive_string_default_conversion_for_read(struct archive *);
107232153Smmstruct archive_string_conv *
108232153Smmarchive_string_default_conversion_for_write(struct archive *);
109232153Smm/* Dispose of a string conversion object. */
110228753Smmvoid
111232153Smmarchive_string_conversion_free(struct archive *);
112232153Smmconst char *
113232153Smmarchive_string_conversion_charset_name(struct archive_string_conv *);
114232153Smmvoid
115232153Smmarchive_string_conversion_set_opt(struct archive_string_conv *, int);
116232153Smm#define SCONV_SET_OPT_UTF8_LIBARCHIVE2X	1
117238856Smm#define SCONV_SET_OPT_NORMALIZATION_C	2
118238856Smm#define SCONV_SET_OPT_NORMALIZATION_D	4
119228753Smm
120232153Smm
121232153Smm/* Copy one archive_string to another in locale conversion.
122311042Smm * Return -1 if conversion fails. */
123232153Smmint
124238856Smmarchive_strncpy_l(struct archive_string *, const void *, size_t,
125232153Smm    struct archive_string_conv *);
126232153Smm
127232153Smm/* Copy one archive_string to another in locale conversion.
128311042Smm * Return -1 if conversion fails. */
129232153Smmint
130238856Smmarchive_strncat_l(struct archive_string *, const void *, size_t,
131232153Smm    struct archive_string_conv *);
132232153Smm
133232153Smm
134232153Smm/* Copy one archive_string to another */
135232153Smm#define	archive_string_copy(dest, src) \
136232153Smm	((dest)->length = 0, archive_string_concat((dest), (src)))
137232153Smm#define	archive_wstring_copy(dest, src) \
138232153Smm	((dest)->length = 0, archive_wstring_concat((dest), (src)))
139232153Smm
140228753Smm/* Concatenate one archive_string to another */
141232153Smmvoid archive_string_concat(struct archive_string *dest, struct archive_string *src);
142232153Smmvoid archive_wstring_concat(struct archive_wstring *dest, struct archive_wstring *src);
143228753Smm
144228753Smm/* Ensure that the underlying buffer is at least as large as the request. */
145228753Smmstruct archive_string *
146232153Smmarchive_string_ensure(struct archive_string *, size_t);
147232153Smmstruct archive_wstring *
148232153Smmarchive_wstring_ensure(struct archive_wstring *, size_t);
149228753Smm
150228753Smm/* Append C string, which may lack trailing \0. */
151228753Smm/* The source is declared void * here because this gets used with
152228753Smm * "signed char *", "unsigned char *" and "char *" arguments.
153228753Smm * Declaring it "char *" as with some of the other functions just
154228753Smm * leads to a lot of extra casts. */
155228753Smmstruct archive_string *
156232153Smmarchive_strncat(struct archive_string *, const void *, size_t);
157232153Smmstruct archive_wstring *
158232153Smmarchive_wstrncat(struct archive_wstring *, const wchar_t *, size_t);
159228753Smm
160228753Smm/* Append a C string to an archive_string, resizing as necessary. */
161232153Smmstruct archive_string *
162232153Smmarchive_strcat(struct archive_string *, const void *);
163232153Smmstruct archive_wstring *
164232153Smmarchive_wstrcat(struct archive_wstring *, const wchar_t *);
165228753Smm
166228753Smm/* Copy a C string to an archive_string, resizing as necessary. */
167228753Smm#define	archive_strcpy(as,p) \
168232153Smm	archive_strncpy((as), (p), ((p) == NULL ? 0 : strlen(p)))
169232153Smm#define	archive_wstrcpy(as,p) \
170232153Smm	archive_wstrncpy((as), (p), ((p) == NULL ? 0 : wcslen(p)))
171238856Smm#define	archive_strcpy_l(as,p,lo) \
172238856Smm	archive_strncpy_l((as), (p), ((p) == NULL ? 0 : strlen(p)), (lo))
173228753Smm
174228753Smm/* Copy a C string to an archive_string with limit, resizing as necessary. */
175228753Smm#define	archive_strncpy(as,p,l) \
176228753Smm	((as)->length=0, archive_strncat((as), (p), (l)))
177232153Smm#define	archive_wstrncpy(as,p,l) \
178232153Smm	((as)->length = 0, archive_wstrncat((as), (p), (l)))
179228753Smm
180228753Smm/* Return length of string. */
181228753Smm#define	archive_strlen(a) ((a)->length)
182228753Smm
183228753Smm/* Set string length to zero. */
184228753Smm#define	archive_string_empty(a) ((a)->length = 0)
185232153Smm#define	archive_wstring_empty(a) ((a)->length = 0)
186228753Smm
187228753Smm/* Release any allocated storage resources. */
188232153Smmvoid	archive_string_free(struct archive_string *);
189232153Smmvoid	archive_wstring_free(struct archive_wstring *);
190228753Smm
191228753Smm/* Like 'vsprintf', but resizes the underlying string as necessary. */
192232153Smm/* Note: This only implements a small subset of standard printf functionality. */
193232153Smmvoid	archive_string_vsprintf(struct archive_string *, const char *,
194228753Smm	    va_list) __LA_PRINTF(2, 0);
195232153Smmvoid	archive_string_sprintf(struct archive_string *, const char *, ...)
196228753Smm	    __LA_PRINTF(2, 3);
197228753Smm
198232153Smm/* Translates from MBS to Unicode. */
199232153Smm/* Returns non-zero if conversion failed in any way. */
200232153Smmint archive_wstring_append_from_mbs(struct archive_wstring *dest,
201232153Smm    const char *, size_t);
202228753Smm
203228753Smm
204232153Smm/* A "multistring" can hold Unicode, UTF8, or MBS versions of
205232153Smm * the string.  If you set and read the same version, no translation
206232153Smm * is done.  If you set and read different versions, the library
207232153Smm * will attempt to transparently convert.
208232153Smm */
209232153Smmstruct archive_mstring {
210232153Smm	struct archive_string aes_mbs;
211232153Smm	struct archive_string aes_utf8;
212232153Smm	struct archive_wstring aes_wcs;
213232153Smm	struct archive_string aes_mbs_in_locale;
214232153Smm	/* Bitmap of which of the above are valid.  Because we're lazy
215232153Smm	 * about malloc-ing and reusing the underlying storage, we
216232153Smm	 * can't rely on NULL pointers to indicate whether a string
217232153Smm	 * has been set. */
218232153Smm	int aes_set;
219232153Smm#define	AES_SET_MBS 1
220232153Smm#define	AES_SET_UTF8 2
221232153Smm#define	AES_SET_WCS 4
222232153Smm};
223232153Smm
224232153Smmvoid	archive_mstring_clean(struct archive_mstring *);
225232153Smmvoid	archive_mstring_copy(struct archive_mstring *dest, struct archive_mstring *src);
226232153Smmint archive_mstring_get_mbs(struct archive *, struct archive_mstring *, const char **);
227232153Smmint archive_mstring_get_utf8(struct archive *, struct archive_mstring *, const char **);
228232153Smmint archive_mstring_get_wcs(struct archive *, struct archive_mstring *, const wchar_t **);
229368708Smmint	archive_mstring_get_mbs_l(struct archive *, struct archive_mstring *, const char **,
230232153Smm	    size_t *, struct archive_string_conv *);
231232153Smmint	archive_mstring_copy_mbs(struct archive_mstring *, const char *mbs);
232232153Smmint	archive_mstring_copy_mbs_len(struct archive_mstring *, const char *mbs,
233232153Smm	    size_t);
234232153Smmint	archive_mstring_copy_utf8(struct archive_mstring *, const char *utf8);
235232153Smmint	archive_mstring_copy_wcs(struct archive_mstring *, const wchar_t *wcs);
236232153Smmint	archive_mstring_copy_wcs_len(struct archive_mstring *,
237232153Smm	    const wchar_t *wcs, size_t);
238232153Smmint	archive_mstring_copy_mbs_len_l(struct archive_mstring *,
239232153Smm	    const char *mbs, size_t, struct archive_string_conv *);
240232153Smmint     archive_mstring_update_utf8(struct archive *, struct archive_mstring *aes, const char *utf8);
241232153Smm
242232153Smm
243228753Smm#endif
244