1130803Smarcel/* Character set conversion support for GDB.
2130803Smarcel   Copyright 2001 Free Software Foundation, Inc.
3130803Smarcel
4130803Smarcel   This file is part of GDB.
5130803Smarcel
6130803Smarcel   This program is free software; you can redistribute it and/or modify
7130803Smarcel   it under the terms of the GNU General Public License as published by
8130803Smarcel   the Free Software Foundation; either version 2 of the License, or
9130803Smarcel   (at your option) any later version.
10130803Smarcel
11130803Smarcel   This program is distributed in the hope that it will be useful,
12130803Smarcel   but WITHOUT ANY WARRANTY; without even the implied warranty of
13130803Smarcel   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14130803Smarcel   GNU General Public License for more details.
15130803Smarcel
16130803Smarcel   You should have received a copy of the GNU General Public License
17130803Smarcel   along with this program; if not, write to the Free Software
18130803Smarcel   Foundation, Inc., 59 Temple Place - Suite 330,
19130803Smarcel   Boston, MA 02111-1307, USA.  */
20130803Smarcel
21130803Smarcel#ifndef CHARSET_H
22130803Smarcel#define CHARSET_H
23130803Smarcel
24130803Smarcel
25130803Smarcel/* If the target program uses a different character set than the host,
26130803Smarcel   GDB has some support for translating between the two; GDB converts
27130803Smarcel   characters and strings to the host character set before displaying
28130803Smarcel   them, and converts characters and strings appearing in expressions
29130803Smarcel   entered by the user to the target character set.
30130803Smarcel
31130803Smarcel   At the moment, GDB only supports single-byte, stateless character
32130803Smarcel   sets.  This includes the ISO-8859 family (ASCII extended with
33130803Smarcel   accented characters, and (I think) Cyrillic, for European
34130803Smarcel   languages), and the EBCDIC family (used on IBM's mainframes).
35130803Smarcel   Unfortunately, it excludes many Asian scripts, the fixed- and
36130803Smarcel   variable-width Unicode encodings, and other desireable things.
37130803Smarcel   Patches are welcome!  (For example, it would be nice if the Java
38130803Smarcel   string support could simply get absorbed into some more general
39130803Smarcel   multi-byte encoding support.)
40130803Smarcel
41130803Smarcel   Furthermore, GDB's code pretty much assumes that the host character
42130803Smarcel   set is some superset of ASCII; there are plenty if ('0' + n)
43130803Smarcel   expressions and the like.
44130803Smarcel
45130803Smarcel   When the `iconv' library routine supports a character set meeting
46130803Smarcel   the requirements above, it's easy to plug an entry into GDB's table
47130803Smarcel   that uses iconv to handle the details.  */
48130803Smarcel
49130803Smarcel/* Return the name of the current host/target character set.  The
50130803Smarcel   result is owned by the charset module; the caller should not free
51130803Smarcel   it.  */
52130803Smarcelconst char *host_charset (void);
53130803Smarcelconst char *target_charset (void);
54130803Smarcel
55130803Smarcel/* In general, the set of C backslash escapes (\n, \f) is specific to
56130803Smarcel   the character set.  Not all character sets will have form feed
57130803Smarcel   characters, for example.
58130803Smarcel
59130803Smarcel   The following functions allow GDB to parse and print control
60130803Smarcel   characters in a character-set-independent way.  They are both
61130803Smarcel   language-specific (to C and C++) and character-set-specific.
62130803Smarcel   Putting them here is a compromise.  */
63130803Smarcel
64130803Smarcel
65130803Smarcel/* If the target character TARGET_CHAR have a backslash escape in the
66130803Smarcel   C language (i.e., a character like 'n' or 't'), return the host
67130803Smarcel   character string that should follow the backslash.  Otherwise,
68130803Smarcel   return zero.
69130803Smarcel
70130803Smarcel   When this function returns non-zero, the string it returns is
71130803Smarcel   statically allocated; the caller is not responsible for freeing it.  */
72130803Smarcelconst char *c_target_char_has_backslash_escape (int target_char);
73130803Smarcel
74130803Smarcel
75130803Smarcel/* If the host character HOST_CHAR is a valid backslash escape in the
76130803Smarcel   C language for the target character set, return non-zero, and set
77130803Smarcel   *TARGET_CHAR to the target character the backslash escape represents.
78130803Smarcel   Otherwise, return zero.  */
79130803Smarcelint c_parse_backslash (int host_char, int *target_char);
80130803Smarcel
81130803Smarcel
82130803Smarcel/* Return non-zero if the host character HOST_CHAR can be printed
83130803Smarcel   literally --- that is, if it can be readably printed as itself in a
84130803Smarcel   character or string constant.  Return zero if it should be printed
85130803Smarcel   using some kind of numeric escape, like '\031' in C, '^(25)' in
86130803Smarcel   Chill, or #25 in Pascal.  */
87130803Smarcelint host_char_print_literally (int host_char);
88130803Smarcel
89130803Smarcel
90130803Smarcel/* If the host character HOST_CHAR has an equivalent in the target
91130803Smarcel   character set, set *TARGET_CHAR to that equivalent, and return
92130803Smarcel   non-zero.  Otherwise, return zero.  */
93130803Smarcelint host_char_to_target (int host_char, int *target_char);
94130803Smarcel
95130803Smarcel
96130803Smarcel/* If the target character TARGET_CHAR has an equivalent in the host
97130803Smarcel   character set, set *HOST_CHAR to that equivalent, and return
98130803Smarcel   non-zero.  Otherwise, return zero.  */
99130803Smarcelint target_char_to_host (int target_char, int *host_char);
100130803Smarcel
101130803Smarcel
102130803Smarcel/* If the target character TARGET_CHAR has a corresponding control
103130803Smarcel   character (also in the target character set), set *TARGET_CTRL_CHAR
104130803Smarcel   to the control character, and return non-zero.  Otherwise, return
105130803Smarcel   zero.  */
106130803Smarcelint target_char_to_control_char (int target_char, int *target_ctrl_char);
107130803Smarcel
108130803Smarcel
109130803Smarcel#endif /* CHARSET_H */
110