1130803Smarcel/* Character set conversion support for GDB. 2130803Smarcel Copyright 2001 Free Software Foundation, Inc. 3130803Smarcel 4130803Smarcel This file is part of GDB. 5130803Smarcel 6130803Smarcel This program is free software; you can redistribute it and/or modify 7130803Smarcel it under the terms of the GNU General Public License as published by 8130803Smarcel the Free Software Foundation; either version 2 of the License, or 9130803Smarcel (at your option) any later version. 10130803Smarcel 11130803Smarcel This program is distributed in the hope that it will be useful, 12130803Smarcel but WITHOUT ANY WARRANTY; without even the implied warranty of 13130803Smarcel MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14130803Smarcel GNU General Public License for more details. 15130803Smarcel 16130803Smarcel You should have received a copy of the GNU General Public License 17130803Smarcel along with this program; if not, write to the Free Software 18130803Smarcel Foundation, Inc., 59 Temple Place - Suite 330, 19130803Smarcel Boston, MA 02111-1307, USA. */ 20130803Smarcel 21130803Smarcel#ifndef CHARSET_H 22130803Smarcel#define CHARSET_H 23130803Smarcel 24130803Smarcel 25130803Smarcel/* If the target program uses a different character set than the host, 26130803Smarcel GDB has some support for translating between the two; GDB converts 27130803Smarcel characters and strings to the host character set before displaying 28130803Smarcel them, and converts characters and strings appearing in expressions 29130803Smarcel entered by the user to the target character set. 30130803Smarcel 31130803Smarcel At the moment, GDB only supports single-byte, stateless character 32130803Smarcel sets. This includes the ISO-8859 family (ASCII extended with 33130803Smarcel accented characters, and (I think) Cyrillic, for European 34130803Smarcel languages), and the EBCDIC family (used on IBM's mainframes). 35130803Smarcel Unfortunately, it excludes many Asian scripts, the fixed- and 36130803Smarcel variable-width Unicode encodings, and other desireable things. 37130803Smarcel Patches are welcome! (For example, it would be nice if the Java 38130803Smarcel string support could simply get absorbed into some more general 39130803Smarcel multi-byte encoding support.) 40130803Smarcel 41130803Smarcel Furthermore, GDB's code pretty much assumes that the host character 42130803Smarcel set is some superset of ASCII; there are plenty if ('0' + n) 43130803Smarcel expressions and the like. 44130803Smarcel 45130803Smarcel When the `iconv' library routine supports a character set meeting 46130803Smarcel the requirements above, it's easy to plug an entry into GDB's table 47130803Smarcel that uses iconv to handle the details. */ 48130803Smarcel 49130803Smarcel/* Return the name of the current host/target character set. The 50130803Smarcel result is owned by the charset module; the caller should not free 51130803Smarcel it. */ 52130803Smarcelconst char *host_charset (void); 53130803Smarcelconst char *target_charset (void); 54130803Smarcel 55130803Smarcel/* In general, the set of C backslash escapes (\n, \f) is specific to 56130803Smarcel the character set. Not all character sets will have form feed 57130803Smarcel characters, for example. 58130803Smarcel 59130803Smarcel The following functions allow GDB to parse and print control 60130803Smarcel characters in a character-set-independent way. They are both 61130803Smarcel language-specific (to C and C++) and character-set-specific. 62130803Smarcel Putting them here is a compromise. */ 63130803Smarcel 64130803Smarcel 65130803Smarcel/* If the target character TARGET_CHAR have a backslash escape in the 66130803Smarcel C language (i.e., a character like 'n' or 't'), return the host 67130803Smarcel character string that should follow the backslash. Otherwise, 68130803Smarcel return zero. 69130803Smarcel 70130803Smarcel When this function returns non-zero, the string it returns is 71130803Smarcel statically allocated; the caller is not responsible for freeing it. */ 72130803Smarcelconst char *c_target_char_has_backslash_escape (int target_char); 73130803Smarcel 74130803Smarcel 75130803Smarcel/* If the host character HOST_CHAR is a valid backslash escape in the 76130803Smarcel C language for the target character set, return non-zero, and set 77130803Smarcel *TARGET_CHAR to the target character the backslash escape represents. 78130803Smarcel Otherwise, return zero. */ 79130803Smarcelint c_parse_backslash (int host_char, int *target_char); 80130803Smarcel 81130803Smarcel 82130803Smarcel/* Return non-zero if the host character HOST_CHAR can be printed 83130803Smarcel literally --- that is, if it can be readably printed as itself in a 84130803Smarcel character or string constant. Return zero if it should be printed 85130803Smarcel using some kind of numeric escape, like '\031' in C, '^(25)' in 86130803Smarcel Chill, or #25 in Pascal. */ 87130803Smarcelint host_char_print_literally (int host_char); 88130803Smarcel 89130803Smarcel 90130803Smarcel/* If the host character HOST_CHAR has an equivalent in the target 91130803Smarcel character set, set *TARGET_CHAR to that equivalent, and return 92130803Smarcel non-zero. Otherwise, return zero. */ 93130803Smarcelint host_char_to_target (int host_char, int *target_char); 94130803Smarcel 95130803Smarcel 96130803Smarcel/* If the target character TARGET_CHAR has an equivalent in the host 97130803Smarcel character set, set *HOST_CHAR to that equivalent, and return 98130803Smarcel non-zero. Otherwise, return zero. */ 99130803Smarcelint target_char_to_host (int target_char, int *host_char); 100130803Smarcel 101130803Smarcel 102130803Smarcel/* If the target character TARGET_CHAR has a corresponding control 103130803Smarcel character (also in the target character set), set *TARGET_CTRL_CHAR 104130803Smarcel to the control character, and return non-zero. Otherwise, return 105130803Smarcel zero. */ 106130803Smarcelint target_char_to_control_char (int target_char, int *target_ctrl_char); 107130803Smarcel 108130803Smarcel 109130803Smarcel#endif /* CHARSET_H */ 110