names.h revision 133359
1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice immediately at the beginning of the file, without modification,
11 *    this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *    This product includes software developed by Ian F. Darwin and others.
18 * 4. The name of the author may not be used to endorse or promote products
19 *    derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33/*
34 * Names.h - names and types used by ascmagic in file(1).
35 * These tokens are here because they can appear anywhere in
36 * the first HOWMANY bytes, while tokens in MAGIC must
37 * appear at fixed offsets into the file. Don't make HOWMANY
38 * too high unless you have a very fast CPU.
39 *
40 * $Id: names.h,v 1.24 2004/03/22 19:09:12 christos Exp $
41 */
42
43/*
44	modified by Chris Lowth - 9 April 2000
45	to add mime type strings to the types table.
46*/
47
48/* these types are used to index the table 'types': keep em in sync! */
49#define	L_C	0		/* first and foremost on UNIX */
50#define	L_CC	1		/* Bjarne's postincrement */
51#define	L_FORT	2		/* the oldest one */
52#define	L_MAKE	3		/* Makefiles */
53#define	L_PLI	4		/* PL/1 */
54#define	L_MACH	5		/* some kinda assembler */
55#define	L_ENG	6		/* English */
56#define	L_PAS	7		/* Pascal */
57#define	L_MAIL	8		/* Electronic mail */
58#define	L_NEWS	9		/* Usenet Netnews */
59#define	L_JAVA	10		/* Java code */
60#define	L_HTML	11		/* HTML */
61#define	L_BCPL	12		/* BCPL */
62#define	L_M4	13		/* M4 */
63#define	L_PO	14		/* PO */
64
65static const struct {
66	const char *human;
67	const char *mime;
68} types[] = {
69	{ "C program",					"text/x-c", },
70	{ "C++ program",				"text/x-c++" },
71	{ "FORTRAN program",				"text/x-fortran" },
72	{ "make commands",				"text/x-makefile" },
73	{ "PL/1 program",				"text/x-pl1" },
74	{ "assembler program",				"text/x-asm" },
75	{ "English",					"text/plain" },
76	{ "Pascal program",				"text/x-pascal" },
77	{ "mail",					"text/x-mail" },
78	{ "news",					"text/x-news" },
79	{ "Java program",				"text/x-java" },
80	{ "HTML document",				"text/html", },
81	{ "BCPL program",				"text/x-bcpl" },
82	{ "M4 macro language pre-processor",		"text/x-m4" },
83	{ "PO (gettext message catalogue)",             "text/x-po" },
84	{ "cannot happen error on names.h/types",	"error/x-error" },
85	{ 0, 0}
86};
87
88/*
89 * XXX - how should we distinguish Java from C++?
90 * The trick used in a Debian snapshot, of having "extends" or "implements"
91 * as tags for Java, doesn't work very well, given that those keywords
92 * are often preceded by "class", which flags it as C++.
93 *
94 * Perhaps we need to be able to say
95 *
96 *	If "class" then
97 *
98 *		if "extends" or "implements" then
99 *			Java
100 *		else
101 *			C++
102 *	endif
103 *
104 * Or should we use other keywords, such as "package" or "import"?
105 * Unfortunately, Ada95 uses "package", and Modula-3 uses "import",
106 * although I infer from the language spec at
107 *
108 *	http://www.research.digital.com/SRC/m3defn/html/m3.html
109 *
110 * that Modula-3 uses "IMPORT" rather than "import", i.e. it must be
111 * in all caps.
112 *
113 * So, for now, we go with "import".  We must put it before the C++
114 * stuff, so that we don't misidentify Java as C++.  Not using "package"
115 * means we won't identify stuff that defines a package but imports
116 * nothing; hopefully, very little Java code imports nothing (one of the
117 * reasons for doing OO programming is to import as much as possible
118 * and write only what you need to, right?).
119 *
120 * Unfortunately, "import" may cause us to misidentify English text
121 * as Java, as it comes after "the" and "The".  Perhaps we need a fancier
122 * heuristic to identify Java?
123 */
124static struct names {
125	const char *name;
126	short type;
127} names[] = {
128	/* These must be sorted by eye for optimal hit rate */
129	/* Add to this list only after substantial meditation */
130	{"msgid",	L_PO},
131	{"dnl",		L_M4},
132	{"import",	L_JAVA},
133	{"\"libhdr\"",	L_BCPL},
134	{"\"LIBHDR\"",	L_BCPL},
135	{"//",		L_CC},
136	{"template",	L_CC},
137	{"virtual",	L_CC},
138	{"class",	L_CC},
139	{"public:",	L_CC},
140	{"private:",	L_CC},
141	{"/*",		L_C},	/* must precede "The", "the", etc. */
142	{"#include",	L_C},
143	{"char",	L_C},
144	{"The",		L_ENG},
145	{"the",		L_ENG},
146	{"double",	L_C},
147	{"extern",	L_C},
148	{"float",	L_C},
149	{"struct",	L_C},
150	{"union",	L_C},
151	{"CFLAGS",	L_MAKE},
152	{"LDFLAGS",	L_MAKE},
153	{"all:",	L_MAKE},
154	{".PRECIOUS",	L_MAKE},
155/* Too many files of text have these words in them.  Find another way
156 * to recognize Fortrash.
157 */
158#ifdef	NOTDEF
159	{"subroutine",	L_FORT},
160	{"function",	L_FORT},
161	{"block",	L_FORT},
162	{"common",	L_FORT},
163	{"dimension",	L_FORT},
164	{"integer",	L_FORT},
165	{"data",	L_FORT},
166#endif	/*NOTDEF*/
167	{".ascii",	L_MACH},
168	{".asciiz",	L_MACH},
169	{".byte",	L_MACH},
170	{".even",	L_MACH},
171	{".globl",	L_MACH},
172	{".text",	L_MACH},
173	{"clr",		L_MACH},
174	{"(input,",	L_PAS},
175	{"dcl",		L_PLI},
176	{"Received:",	L_MAIL},
177	{">From",	L_MAIL},
178	{"Return-Path:",L_MAIL},
179	{"Cc:",		L_MAIL},
180	{"Newsgroups:",	L_NEWS},
181	{"Path:",	L_NEWS},
182	{"Organization:",L_NEWS},
183	{"href=",	L_HTML},
184	{"HREF=",	L_HTML},
185	{"<body",	L_HTML},
186	{"<BODY",	L_HTML},
187	{"<html",	L_HTML},
188	{"<HTML",	L_HTML},
189	{NULL,		0}
190};
191#define NNAMES ((sizeof(names)/sizeof(struct names)) - 1)
192