1/* src.c -- Implementation File
2   Copyright (C) 1995 Free Software Foundation, Inc.
3   Contributed by James Craig Burley.
4
5This file is part of GNU Fortran.
6
7GNU Fortran is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU Fortran is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Fortran; see the file COPYING.  If not, write to
19the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
2002111-1307, USA.
21
22   Related Modules:
23
24   Description:
25      Source-file functions to handle various combinations of case sensitivity
26      and insensitivity at run time.
27
28   Modifications:
29*/
30
31#include "proj.h"
32#include "src.h"
33#include "top.h"
34
35/* This array does a toupper (), but any valid char type is valid as an
36   index and returns identity if not a lower-case character.  */
37
38char ffesrc_toupper_[256];
39
40/* This array does a tolower (), but any valid char type is valid as an
41   index and returns identity if not an upper-case character.  */
42
43char ffesrc_tolower_[256];
44
45/* This array is set up so that, given a source-mapped character, the result
46   of indexing into this array will match an upper-cased character depending
47   on the source-mapped character's case and the established ffe_case_match()
48   setting.  So the uppercase cells contain identies (e.g. ['A'] == 'A')
49   as long as uppercase matching is permitted (!FFE_caseLOWER) and the
50   lowercase cells contain uppercased identities (e.g. ['a'] == 'A') as long
51   as lowercase matching is permitted (!FFE_caseUPPER).	 Else the case
52   cells contain -1.  _init_ is for the first character of a keyword,
53   and _noninit_ is for other characters.  */
54
55char ffesrc_char_match_init_[256];
56char ffesrc_char_match_noninit_[256];
57
58/* This array is used to map input source according to the established
59   ffe_case_source() setting: for FFE_caseNONE, the array is all
60   identities; for FFE_caseUPPER, the lowercase cells contain
61   uppercased identities; and vice versa for FFE_caseLOWER.  */
62
63char ffesrc_char_source_[256];
64
65/* This array is used to map an internally generated character so that it
66   will be accepted as an initial character in a keyword.  The assumption
67   is that the incoming character is uppercase.  */
68
69char ffesrc_char_internal_init_[256];
70
71/* This array is used to determine if a particular character is valid in
72   a symbol name according to the established ffe_case_symbol() setting:
73   for FFE_caseNONE, the array is all FFEBAD; for FFE_caseUPPER, the
74   lowercase cells contain a non-FFEBAD error code (FFEBAD_SYMBOL_UPPER_CASE);
75   and vice versa for FFE_caseLOWER.  _init_ and _noninit_ distinguish
76   between initial and subsequent characters for the caseINITCAP case,
77   and their error codes are different for appropriate messages --
78   specifically, _noninit_ contains a non-FFEBAD error code for all
79   except lowercase characters for the caseINITCAP case.
80
81   See ffesrc_check_symbol_, it must be TRUE if this array is not all
82   FFEBAD.  */
83
84ffebad ffesrc_bad_symbol_init_[256];
85ffebad ffesrc_bad_symbol_noninit_[256];
86
87/* Set TRUE if any element in ffesrc_bad_symbol (with an index representing
88   a character that can also be in the text of a token passed to
89   ffename_find, strictly speaking) is not FFEBAD.  I.e., TRUE if it is
90   necessary to check token characters against the ffesrc_bad_symbol_
91   array.  */
92
93bool ffesrc_check_symbol_;
94
95/* These are set TRUE if the kind of character (upper/lower) is ok as a match
96   in the context (initial/noninitial character of keyword).  */
97
98bool ffesrc_ok_match_init_upper_;
99bool ffesrc_ok_match_init_lower_;
100bool ffesrc_ok_match_noninit_upper_;
101bool ffesrc_ok_match_noninit_lower_;
102
103/* Initialize table of alphabetic matches. */
104
105void
106ffesrc_init_1 ()
107{
108  int i;
109
110  for (i = 0; i < 256; ++i)
111    {
112      ffesrc_char_match_init_[i] = i;
113      ffesrc_char_match_noninit_[i] = i;
114      ffesrc_char_source_[i] = i;
115      ffesrc_char_internal_init_[i] = i;
116      ffesrc_toupper_[i] = i;
117      ffesrc_tolower_[i] = i;
118      ffesrc_bad_symbol_init_[i] = FFEBAD;
119      ffesrc_bad_symbol_noninit_[i] = FFEBAD;
120    }
121
122  for (i = 'A'; i <= 'Z'; ++i)
123    ffesrc_tolower_[i] = tolower (i);
124
125  for (i = 'a'; i <= 'z'; ++i)
126    ffesrc_toupper_[i] = toupper (i);
127
128  ffesrc_check_symbol_ = (ffe_case_symbol () != FFE_caseNONE);
129
130  ffesrc_ok_match_init_upper_ = (ffe_case_match () != FFE_caseLOWER);
131  ffesrc_ok_match_init_lower_ = (ffe_case_match () != FFE_caseUPPER)
132    && (ffe_case_match () != FFE_caseINITCAP);
133  ffesrc_ok_match_noninit_upper_ = (ffe_case_match () != FFE_caseLOWER)
134    && (ffe_case_match () != FFE_caseINITCAP);
135  ffesrc_ok_match_noninit_lower_ = (ffe_case_match () != FFE_caseUPPER);
136
137  /* Note that '-' is used to flag an invalid match character.	'-' is
138     somewhat arbitrary, actually.  -1 was used, but that's not wise on a
139     system with unsigned chars as default -- it'd turn into 255 or some such
140     large positive number, which would sort higher than the alphabetics and
141     thus possibly cause problems.  So '-' is picked just because it's never
142     likely to be a symbol character in Fortran and because it's "less than"
143     any alphabetic character.	EBCDIC might see things differently, I don't
144     remember it well enough, but that's just tough -- lots of other things
145     might have to change to support EBCDIC -- anyway, some other character
146     could easily be picked.  */
147
148#define FFESRC_INVALID_SYMBOL_CHAR_ '-'
149
150  if (!ffesrc_ok_match_init_upper_)
151    for (i = 'A'; i <= 'Z'; ++i)
152      ffesrc_char_match_init_[i] = FFESRC_INVALID_SYMBOL_CHAR_;
153
154  if (ffesrc_ok_match_init_lower_)
155    for (i = 'a'; i <= 'z'; ++i)
156      ffesrc_char_match_init_[i] = toupper (i);
157  else
158    for (i = 'a'; i <= 'z'; ++i)
159      ffesrc_char_match_init_[i] = FFESRC_INVALID_SYMBOL_CHAR_;
160
161  if (!ffesrc_ok_match_noninit_upper_)
162    for (i = 'A'; i <= 'Z'; ++i)
163      ffesrc_char_match_noninit_[i] = FFESRC_INVALID_SYMBOL_CHAR_;
164
165  if (ffesrc_ok_match_noninit_lower_)
166    for (i = 'a'; i <= 'z'; ++i)
167      ffesrc_char_match_noninit_[i] = toupper (i);
168  else
169    for (i = 'a'; i <= 'z'; ++i)
170      ffesrc_char_match_noninit_[i] = FFESRC_INVALID_SYMBOL_CHAR_;
171
172  if (ffe_case_source () == FFE_caseLOWER)
173    for (i = 'A'; i <= 'Z'; ++i)
174      ffesrc_char_source_[i] = tolower (i);
175  else if (ffe_case_source () == FFE_caseUPPER)
176    for (i = 'a'; i <= 'z'; ++i)
177      ffesrc_char_source_[i] = toupper (i);
178
179  if (ffe_case_match () == FFE_caseLOWER)
180    for (i = 'A'; i <= 'Z'; ++i)
181      ffesrc_char_internal_init_[i] = tolower (i);
182
183  switch (ffe_case_symbol ())
184    {
185    case FFE_caseLOWER:
186      for (i = 'A'; i <= 'Z'; ++i)
187	{
188	  ffesrc_bad_symbol_init_[i] = FFEBAD_SYMBOL_UPPER_CASE;
189	  ffesrc_bad_symbol_noninit_[i] = FFEBAD_SYMBOL_UPPER_CASE;
190	}
191      break;
192
193    case FFE_caseUPPER:
194      for (i = 'a'; i <= 'z'; ++i)
195	{
196	  ffesrc_bad_symbol_init_[i] = FFEBAD_SYMBOL_LOWER_CASE;
197	  ffesrc_bad_symbol_noninit_[i] = FFEBAD_SYMBOL_LOWER_CASE;
198	}
199      break;
200
201    case FFE_caseINITCAP:
202      for (i = 0; i < 256; ++i)
203	ffesrc_bad_symbol_noninit_[i] = FFEBAD_SYMBOL_NOLOWER_INITCAP;
204      for (i = 'a'; i <= 'z'; ++i)
205	{
206	  ffesrc_bad_symbol_init_[i] = FFEBAD_SYMBOL_LOWER_INITCAP;
207	  ffesrc_bad_symbol_noninit_[i] = FFEBAD;
208	}
209      break;
210
211    default:
212      break;
213    }
214}
215
216/* Compare two strings a la strcmp, the first being a source string with its
217   length passed, and the second being a constant string passed
218   in InitialCaps form.	 Also, the return value is always -1, 0, or 1. */
219
220int
221ffesrc_strcmp_1ns2i (ffeCase mcase, const char *var, int len,
222		     const char *str_ic)
223{
224  char c;
225  char d;
226
227  switch (mcase)
228    {
229    case FFE_caseNONE:
230      for (; len > 0; --len, ++var, ++str_ic)
231	{
232	  c = ffesrc_char_source (*var);	/* Transform source. */
233	  c = ffesrc_toupper (c);	/* Upcase source. */
234	  d = ffesrc_toupper (*str_ic);	/* Upcase InitialCaps char. */
235	  if (c != d)
236	    {
237	      if ((d != '\0') && (c < d))
238		return -1;
239	      else
240		return 1;
241	    }
242	}
243      break;
244
245    case FFE_caseUPPER:
246      for (; len > 0; --len, ++var, ++str_ic)
247	{
248	  c = ffesrc_char_source (*var);	/* Transform source. */
249	  d = ffesrc_toupper (*str_ic);	/* Transform InitialCaps char. */
250	  if (c != d)
251	    {
252	      if ((d != '\0') && (c < d))
253		return -1;
254	      else
255		return 1;
256	    }
257	}
258      break;
259
260    case FFE_caseLOWER:
261      for (; len > 0; --len, ++var, ++str_ic)
262	{
263	  c = ffesrc_char_source (*var);	/* Transform source. */
264	  d = ffesrc_tolower (*str_ic);	/* Transform InitialCaps char. */
265	  if (c != d)
266	    {
267	      if ((d != '\0') && (c < d))
268		return -1;
269	      else
270		return 1;
271	    }
272	}
273      break;
274
275    case FFE_caseINITCAP:
276      for (; len > 0; --len, ++var, ++str_ic)
277	{
278	  c = ffesrc_char_source (*var);	/* Transform source. */
279	  d = *str_ic;		/* No transform of InitialCaps char. */
280	  if (c != d)
281	    {
282	      c = ffesrc_toupper (c);
283	      d = ffesrc_toupper (d);
284	      while ((len > 0) && (c == d))
285		{		/* Skip past equivalent (case-ins) chars. */
286		  --len, ++var, ++str_ic;
287		  if (len > 0)
288		    c = ffesrc_toupper (*var);
289		  d = ffesrc_toupper (*str_ic);
290		}
291	      if ((d != '\0') && (c < d))
292		return -1;
293	      else
294		return 1;
295	    }
296	}
297      break;
298
299    default:
300      assert ("bad case value" == NULL);
301      return -1;
302    }
303
304  if (*str_ic == '\0')
305    return 0;
306  return -1;
307}
308
309/* Compare two strings a la strcmp, the second being a constant string passed
310   in both uppercase and lowercase form.  If not equal, the uppercase string
311   is used to determine the sign of the return value.  Also, the return
312   value is always -1, 0, or 1. */
313
314int
315ffesrc_strcmp_2c (ffeCase mcase, const char *var, const char *str_uc,
316		  const char *str_lc, const char *str_ic)
317{
318  int i;
319  char c;
320
321  switch (mcase)
322    {
323    case FFE_caseNONE:
324      for (; *var != '\0'; ++var, ++str_uc)
325	{
326	  c = ffesrc_toupper (*var);	/* Upcase source. */
327	  if (c != *str_uc)
328	    {
329	      if ((*str_uc != '\0') && (c < *str_uc))
330		return -1;
331	      else
332		return 1;
333	    }
334	}
335      if (*str_uc == '\0')
336	return 0;
337      return -1;
338
339    case FFE_caseUPPER:
340      i = strcmp (var, str_uc);
341      break;
342
343    case FFE_caseLOWER:
344      i = strcmp (var, str_lc);
345      break;
346
347    case FFE_caseINITCAP:
348      for (; *var != '\0'; ++var, ++str_ic, ++str_uc)
349	{
350	  if (*var != *str_ic)
351	    {
352	      c = ffesrc_toupper (*var);
353	      while ((c != '\0') && (c == *str_uc))
354		{		/* Skip past equivalent (case-ins) chars. */
355		  ++var, ++str_uc;
356		  c = ffesrc_toupper (*var);
357		}
358	      if ((*str_uc != '\0') && (c < *str_uc))
359		return -1;
360	      else
361		return 1;
362	    }
363	}
364      if (*str_ic == '\0')
365	return 0;
366      return -1;
367
368    default:
369      assert ("bad case value" == NULL);
370      return -1;
371    }
372
373  if (i == 0)
374    return 0;
375  else if (i < 0)
376    return -1;
377  return 1;
378}
379
380/* Compare two strings a la strncmp, the second being a constant string passed
381   in uppercase, lowercase, and InitialCaps form.  If not equal, the
382   uppercase string is used to determine the sign of the return value.	*/
383
384int
385ffesrc_strncmp_2c (ffeCase mcase, const char *var, const char *str_uc,
386		   const char *str_lc, const char *str_ic, int len)
387{
388  int i;
389  char c;
390
391  switch (mcase)
392    {
393    case FFE_caseNONE:
394      for (; len > 0; ++var, ++str_uc, --len)
395	{
396	  c = ffesrc_toupper (*var);	/* Upcase source. */
397	  if (c != *str_uc)
398	    {
399	      if (c < *str_uc)
400		return -1;
401	      else
402		return 1;
403	    }
404	}
405      return 0;
406
407    case FFE_caseUPPER:
408      i = strncmp (var, str_uc, len);
409      break;
410
411    case FFE_caseLOWER:
412      i = strncmp (var, str_lc, len);
413      break;
414
415    case FFE_caseINITCAP:
416      for (; len > 0; ++var, ++str_ic, ++str_uc, --len)
417	{
418	  if (*var != *str_ic)
419	    {
420	      c = ffesrc_toupper (*var);
421	      while ((len > 0) && (c == *str_uc))
422		{		/* Skip past equivalent (case-ins) chars. */
423		  --len, ++var, ++str_uc;
424		  if (len > 0)
425		    c = ffesrc_toupper (*var);
426		}
427	      if ((len > 0) && (c < *str_uc))
428		return -1;
429	      else
430		return 1;
431	    }
432	}
433      return 0;
434
435    default:
436      assert ("bad case value" == NULL);
437      return -1;
438    }
439
440  if (i == 0)
441    return 0;
442  else if (i < 0)
443    return -1;
444  return 1;
445}
446