1/*
2 * Copyright (c) 1993, 1994, 1995, 1996, 1997, 1998
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the Computer Systems
16 *	Engineering Group at Lawrence Berkeley Laboratory.
17 * 4. Neither the name of the University nor of the Laboratory may be used
18 *    to endorse or promote products derived from this software without
19 *    specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34/*
35 * Utilities for message formatting used both by libpcap and rpcapd.
36 */
37
38#ifdef HAVE_CONFIG_H
39#include <config.h>
40#endif
41
42#include "ftmacros.h"
43
44#include <stddef.h>
45#include <stdarg.h>
46#include <stdio.h>
47#include <string.h>
48#include <errno.h>
49
50#include "pcap-int.h"
51
52#include "portability.h"
53
54#include "fmtutils.h"
55
56#ifdef _WIN32
57#include "charconv.h"
58#endif
59
60/*
61 * Set the encoding.
62 */
63#ifdef _WIN32
64/*
65 * True if we shouold use UTF-8.
66 */
67static int use_utf_8;
68
69void
70pcap_fmt_set_encoding(unsigned int opts)
71{
72	if (opts == PCAP_CHAR_ENC_UTF_8)
73		use_utf_8 = 1;
74}
75#else
76void
77pcap_fmt_set_encoding(unsigned int opts _U_)
78{
79	/*
80	 * Nothing to do here.
81	 */
82}
83#endif
84
85#ifdef _WIN32
86/*
87 * Convert a null-terminated UTF-16LE string to UTF-8, putting it into
88 * a buffer starting at the specified location and stopping if we go
89 * past the specified size.  This will only put out complete UTF-8
90 * sequences.
91 *
92 * We do this ourselves because Microsoft doesn't offer a "convert and
93 * stop at a UTF-8 character boundary if we run out of space" routine.
94 */
95#define IS_LEADING_SURROGATE(c) \
96	((c) >= 0xd800 && (c) < 0xdc00)
97#define IS_TRAILING_SURROGATE(c) \
98	((c) >= 0xdc00 && (c) < 0xe000)
99#define SURROGATE_VALUE(leading, trailing) \
100	(((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000)
101#define REPLACEMENT_CHARACTER	0x0FFFD
102
103static char *
104utf_16le_to_utf_8_truncated(const wchar_t *utf_16, char *utf_8,
105    size_t utf_8_len)
106{
107	wchar_t c, c2;
108	uint32_t uc;
109
110	if (utf_8_len == 0) {
111		/*
112		 * Not even enough room for a trailing '\0'.
113		 * Don't put anything into the buffer.
114		 */
115		return (utf_8);
116	}
117
118	while ((c = *utf_16++) != '\0') {
119		if (IS_LEADING_SURROGATE(c)) {
120			/*
121			 * Leading surrogate.  Must be followed by
122			 * a trailing surrogate.
123			 */
124			c2 = *utf_16;
125			if (c2 == '\0') {
126				/*
127				 * Oops, string ends with a lead
128				 * surrogate.  Try to drop in
129				 * a REPLACEMENT CHARACTER, and
130				 * don't move the string pointer,
131				 * so on the next trip through
132				 * the loop we grab the terminating
133				 * '\0' and quit.
134				 */
135				uc = REPLACEMENT_CHARACTER;
136			} else {
137				/*
138				 * OK, we can consume this 2-octet
139				 * value.
140				 */
141				utf_16++;
142				if (IS_TRAILING_SURROGATE(c2)) {
143					/*
144					 * Trailing surrogate.
145					 * This calculation will,
146					 * for c being a leading
147					 * surrogate and c2 being
148					 * a trailing surrogate,
149					 * produce a value between
150					 * 0x100000 and 0x10ffff,
151					 * so it's always going to be
152					 * a valid Unicode code point.
153					 */
154					uc = SURROGATE_VALUE(c, c2);
155				} else {
156					/*
157					 * Not a trailing surroage;
158					 * try to drop in a
159					 * REPLACEMENT CHARACTER.
160					 */
161					uc = REPLACEMENT_CHARACTER;
162				}
163			}
164		} else {
165			/*
166			 * Not a leading surrogate.
167			 */
168			if (IS_TRAILING_SURROGATE(c)) {
169				/*
170				 * Trailing surrogate without
171				 * a preceding leading surrogate.
172				 * Try to drop in a REPLACEMENT
173				 * CHARACTER.
174				 */
175				uc = REPLACEMENT_CHARACTER;
176			} else {
177				/*
178				 * This is a valid BMP character;
179				 * drop it in.
180				 */
181				uc = c;
182			}
183		}
184
185		/*
186		 * OK, uc is a valid Unicode character; how
187		 * many bytes worth of UTF-8 does it require?
188		 */
189		if (uc < 0x0080) {
190			/* 1 byte. */
191			if (utf_8_len < 2) {
192				/*
193				 * Not enough room for that byte
194				 * plus a trailing '\0'.
195				 */
196				break;
197			}
198			*utf_8++ = (char)uc;
199			utf_8_len--;
200		} else if (uc < 0x0800) {
201			/* 2 bytes. */
202			if (utf_8_len < 3) {
203				/*
204				 * Not enough room for those bytes
205				 * plus a trailing '\0'.
206				 */
207				break;
208			}
209			*utf_8++ = ((uc >> 6) & 0x3F) | 0xC0;
210			*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
211			utf_8_len -= 2;
212		} else if (uc < 0x010000) {
213			/* 3 bytes. */
214			if (utf_8_len < 4) {
215				/*
216				 * Not enough room for those bytes
217				 * plus a trailing '\0'.
218				 */
219				break;
220			}
221			*utf_8++ = ((uc >> 12) & 0x0F) | 0xE0;
222			*utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
223			*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
224			utf_8_len -= 3;
225		} else {
226			/* 4 bytes. */
227			if (utf_8_len < 5) {
228				/*
229				 * Not enough room for those bytes
230				 * plus a trailing '\0'.
231				 */
232				break;
233			}
234			*utf_8++ = ((uc >> 18) & 0x03) | 0xF0;
235			*utf_8++ = ((uc >> 12) & 0x3F) | 0x80;
236			*utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
237			*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
238			utf_8_len -= 3;
239		}
240	}
241
242	/*
243	 * OK, we have enough room for (at least) a trailing '\0'.
244	 * (We started out with enough room, thanks to the test
245	 * for a zero-length buffer at the beginning, and if
246	 * there wasn't enough room for any character we wanted
247	 * to put into the buffer *plus* a trailing '\0',
248	 * we'd have quit before putting it into the buffer,
249	 * and thus would have left enough room for the trailing
250	 * '\0'.)
251	 *
252	 * Drop it in.
253	 */
254	*utf_8 = '\0';
255
256	/*
257	 * Return a pointer to the terminating '\0', in case we
258	 * want to drop something in after that.
259	 */
260	return (utf_8);
261}
262#endif /* _WIN32 */
263
264/*
265 * Generate an error message based on a format, arguments, and an
266 * errno, with a message for the errno after the formatted output.
267 */
268void
269pcap_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
270    const char *fmt, ...)
271{
272	va_list ap;
273
274	va_start(ap, fmt);
275	pcap_vfmt_errmsg_for_errno(errbuf, errbuflen, errnum, fmt, ap);
276	va_end(ap);
277}
278
279void
280pcap_vfmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
281    const char *fmt, va_list ap)
282{
283	size_t msglen;
284	char *p;
285	size_t errbuflen_remaining;
286
287	(void)vsnprintf(errbuf, errbuflen, fmt, ap);
288	msglen = strlen(errbuf);
289
290	/*
291	 * Do we have enough space to append ": "?
292	 * Including the terminating '\0', that's 3 bytes.
293	 */
294	if (msglen + 3 > errbuflen) {
295		/* No - just give them what we've produced. */
296		return;
297	}
298	p = errbuf + msglen;
299	errbuflen_remaining = errbuflen - msglen;
300	*p++ = ':';
301	*p++ = ' ';
302	*p = '\0';
303	errbuflen_remaining -= 2;
304
305	/*
306	 * Now append the string for the error code.
307	 */
308#if defined(HAVE__WCSERROR_S)
309	/*
310	 * We have a Windows-style _wcserror_s().
311	 * Generate a UTF-16LE error message.
312	 */
313	wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
314	errno_t err = _wcserror_s(utf_16_errbuf, PCAP_ERRBUF_SIZE, errnum);
315	if (err != 0) {
316		/*
317		 * It doesn't appear to be documented anywhere obvious
318		 * what the error returns from _wcserror_s().
319		 */
320		snprintf(p, errbuflen_remaining, "Error %d", errnum);
321		return;
322	}
323
324	/*
325	 * Now convert it from UTF-16LE to UTF-8, dropping it in the
326	 * remaining space in the buffer, and truncating it - cleanly,
327	 * on a UTF-8 character boundary - if it doesn't fit.
328	 */
329	utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
330
331	/*
332	 * Now, if we're not in UTF-8 mode, convert errbuf to the
333	 * local code page.
334	 */
335	if (!use_utf_8)
336		utf_8_to_acp_truncated(errbuf);
337#elif defined(HAVE_GNU_STRERROR_R)
338	/*
339	 * We have a GNU-style strerror_r(), which is *not* guaranteed to
340	 * do anything to the buffer handed to it, and which returns a
341	 * pointer to the error string, which may or may not be in
342	 * the buffer.
343	 *
344	 * It is, however, guaranteed to succeed.
345	 */
346	char strerror_buf[PCAP_ERRBUF_SIZE];
347	char *errstring = strerror_r(errnum, strerror_buf, PCAP_ERRBUF_SIZE);
348	snprintf(p, errbuflen_remaining, "%s", errstring);
349#elif defined(HAVE_POSIX_STRERROR_R)
350	/*
351	 * We have a POSIX-style strerror_r(), which is guaranteed to fill
352	 * in the buffer, but is not guaranteed to succeed.
353	 */
354	int err = strerror_r(errnum, p, errbuflen_remaining);
355	if (err == EINVAL) {
356		/*
357		 * UNIX 03 says this isn't guaranteed to produce a
358		 * fallback error message.
359		 */
360		snprintf(p, errbuflen_remaining, "Unknown error: %d",
361		    errnum);
362	} else if (err == ERANGE) {
363		/*
364		 * UNIX 03 says this isn't guaranteed to produce a
365		 * fallback error message.
366		 */
367		snprintf(p, errbuflen_remaining,
368		    "Message for error %d is too long", errnum);
369	}
370#else
371	/*
372	 * We have neither _wcserror_s() nor strerror_r(), so we're
373	 * stuck with using pcap_strerror().
374	 */
375	snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum));
376#endif
377}
378
379#ifdef _WIN32
380/*
381 * Generate an error message based on a format, arguments, and a
382 * Win32 error, with a message for the Win32 error after the formatted output.
383 */
384void
385pcap_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
386    const char *fmt, ...)
387{
388	va_list ap;
389
390	va_start(ap, fmt);
391	pcap_vfmt_errmsg_for_win32_err(errbuf, errbuflen, errnum, fmt, ap);
392	va_end(ap);
393}
394
395void
396pcap_vfmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
397    const char *fmt, va_list ap)
398{
399	size_t msglen;
400	char *p;
401	size_t errbuflen_remaining;
402	DWORD retval;
403	wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
404	size_t utf_8_len;
405
406	vsnprintf(errbuf, errbuflen, fmt, ap);
407	msglen = strlen(errbuf);
408
409	/*
410	 * Do we have enough space to append ": "?
411	 * Including the terminating '\0', that's 3 bytes.
412	 */
413	if (msglen + 3 > errbuflen) {
414		/* No - just give them what we've produced. */
415		return;
416	}
417	p = errbuf + msglen;
418	errbuflen_remaining = errbuflen - msglen;
419	*p++ = ':';
420	*p++ = ' ';
421	*p = '\0';
422	msglen += 2;
423	errbuflen_remaining -= 2;
424
425	/*
426	 * Now append the string for the error code.
427	 *
428	 * XXX - what language ID to use?
429	 *
430	 * For UN*Xes, pcap_strerror() may or may not return localized
431	 * strings.
432	 *
433	 * We currently don't have localized messages for libpcap, but
434	 * we might want to do so.  On the other hand, if most of these
435	 * messages are going to be read by libpcap developers and
436	 * perhaps by developers of libpcap-based applications, English
437	 * might be a better choice, so the developer doesn't have to
438	 * get the message translated if it's in a language they don't
439	 * happen to understand.
440	 */
441	retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
442	    NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
443	    utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL);
444	if (retval == 0) {
445		/*
446		 * Failed.
447		 */
448		snprintf(p, errbuflen_remaining,
449		    "Couldn't get error message for error (%lu)", errnum);
450		return;
451	}
452
453	/*
454	 * Now convert it from UTF-16LE to UTF-8.
455	 */
456	p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
457
458	/*
459	 * Now append the error number, if it fits.
460	 */
461	utf_8_len = p - errbuf;
462	errbuflen_remaining -= utf_8_len;
463	if (utf_8_len == 0) {
464		/* The message was empty. */
465		snprintf(p, errbuflen_remaining, "(%lu)", errnum);
466	} else
467		snprintf(p, errbuflen_remaining, " (%lu)", errnum);
468
469	/*
470	 * Now, if we're not in UTF-8 mode, convert errbuf to the
471	 * local code page.
472	 */
473	if (!use_utf_8)
474		utf_8_to_acp_truncated(errbuf);
475}
476#endif
477