2 * @(#) $Id: iconvcap.c,v 1.10 2005/12/01 10:08:53 yeti Exp $
3 * iconv capability checker by David Necas (Yeti).
4 * This program is in the public domain.
6 * iconvcap has two modes of operation:
8 * 1. No command line argumets are given.
9 * Iconvcap tries to find what charsets of interest iconv knows and under
10 * what names. It prints #defines directly includable to C source for
11 * any successfully detected charset (and #defines to NULL for the others).
12 * It also prints some info to stderr, which then goes to config.log. It
13 * returns success (0) iff following conditions are satified
14 * -- iconv is able to convert ISO-8859-1 to some tested variant of Unicode
15 * (see below)---so it's usable at all, and
16 * -- iconv is able convert at least two of the other charsets of interest to
17 * the same variant of Unicode (we then hope conversion in the opposite
18 * direction will work too)
19 * Otherwise, failure (1) is returned and the output should be ignored, if
22 * 2. A file name is given on command line.
23 * Iconvcap reads given file (should contain the just generated #define list)
24 * and chcecks if conversion from any to any encoding is possible. If it is
25 * OK it returns success (0), otherwise, it fails returning 1.
31 /* string.h or strings.h? That's a question!
32 * Don't use const, the compiler may not like it. */
33 int strncmp(char *s1, char *s2, size_t n);
34 char* strchr(char *s, int c);
35 char* strrchr(char *s, int c);
36 char* strncpy(char *dest, char *src, size_t n);
38 #define DPREFIX "ICONV_NAME_"
40 #define TEST_ENC_TO_UNICODE(x) \
41 fprintf(stderr, "iconvcap: checking for %s -> Unicode... ", #x); \
42 if (iconv_check(VARIANT_##x, unicode) == 0) { \
43 printf("#define "DPREFIX"%s \"%s\"\n", #x, FROM); \
45 } else printf("#define "DPREFIX"%s NULL\n", #x);
47 /* ANY variant of unicode is good enough here
48 particular surfaces are defined below */
49 char* VARIANT_UNICODE[] = {
50 "UCS2", "UCS-2", "ISO10646/UCS2", "ISO-10646/UCS2", "ISO_10646/UCS2",
51 "UNICODE", "ISO-10646", "ISO_10646", "ISO10646",
52 "UCS4", "UCS-4", "ISO10646/UCS4", "ISO-10646/UCS4", "ISO_10646/UCS4",
53 "UTF8", "UTF-8", "ISO10646/UTF8", "ISO-10646/UTF8", "ISO_10646/UTF8",
54 "CSUCS2", "CSUCS4", NULL
57 char* VARIANT_ASCII[] = {
58 "ASCII", "CSASCII", "US-ASCII", "ISO646-US", "ISO_646.IRV:1991", "CP367",
59 "IBM367", "CP367", "CSPC367", NULL
62 char* VARIANT_ISO88591[] = {
63 "ISO-8859-1", "ISO8859-1", "8859_1", "ISO_8859-1", "LATIN1", "ISOLATIN1",
64 "CSLATIN1", "CSISOLATIN1", NULL
67 char* VARIANT_ISO88592[] = {
68 "ISO-8859-2", "ISO8859-2", "8859_2", "ISO_8859-2", "LATIN2", "ISOLATIN2",
69 "CSLATIN2", "CSISOLATIN2", "ISO-IR-101", NULL
72 char* VARIANT_ISO88594[] = {
73 "ISO-8859-4", "ISO8859-4", "8859_4", "ISO_8859-4", "LATIN4", "ISOLATIN4",
74 "CSLATIN4", "CSISOLATIN4", "ISO-IR-110", NULL
77 char* VARIANT_ISO88595[] = {
78 "ISO-8859-5", "ISO8859-5", "8859_5", "ISO_8859-5", "ISOCYRILLIC",
79 "CSISOCYRILLIC", "ISO-IR-144", NULL
82 char* VARIANT_ISO885913[] = {
83 "ISO-8859-13", "ISO8859-13", "8859_13", "ISO_8859-13", "LATIN7", "ISOLATIN7",
84 "CSLATIN7", "CSISOLATIN7", "ISO-IR-179A", "ISOBALTIC", "CSISOBALTIC",
85 "CSISOLATINBALTIC", NULL
88 char* VARIANT_ISO885916[] = {
89 "ISO-8859-16", "ISO8859-16", "8859_16", "ISO_8859-16", "ISO-IR-226",
90 "LATIN10", "ISOLATIN10", "CSLATIN10", "CSISOLATIN10", NULL
93 char* VARIANT_BALTIC[] = {
94 "BALTIC", "CSBALTIC", "ISO-IR-179", NULL
97 char* VARIANT_IBM852[] = {
98 "IBM852", "CP852", "CP-852", "CP_852", "852", "IBM-852", "IBM_852",
99 "PC852", "CSPC852", "CSPCP852", NULL
102 char* VARIANT_IBM855[] = {
103 "IBM855", "CP855", "CP-855", "CP_855", "855", "IBM-855", "IBM_855",
104 "PC855", "CSPC855", "CSPCP855", NULL
107 char* VARIANT_IBM775[] = {
108 "IBM775", "CP775", "CP-775", "CP_775", "775", "IBM-775", "IBM_775",
109 "PC775", "CSPC775", "CSPCP775", NULL
112 char* VARIANT_IBM866[] = {
113 "IBM866", "CP866", "CP-866", "CP_866", "866", "IBM-866", "IBM_866",
114 "PC866", "CSPC866", "CSPCP866", NULL
117 char* VARIANT_CP1125[] = {
118 "CP1125", "1125", "CP-1125", "CP_1125", "MS1125", "MS-1125",
122 char* VARIANT_CP1250[] = {
123 "CP1250", "1250", "CP-1250", "CP_1250", "MS-EE", "MS1250", "MS-1250",
127 char* VARIANT_CP1251[] = {
128 "CP1251", "1251", "CP-1251", "CP_1251", "MS-CYRL", "MS1251", "MS-1251",
132 char* VARIANT_CP1257[] = {
133 "CP1257", "1257", "CP-1257", "CP_1257", "MS-BALT", "MS1257", "MS-1257",
134 "WINDOWS-1257", "WinBaltRim", NULL
137 char* VARIANT_MACCE[] = {
138 "MACCE", "MAC-CE", "MAC_CE", "MACINTOSH-CE", "MACEE", "MAC-EE", "MAC_EE",
142 char* VARIANT_MACCYR[] = {
143 "MACCYR", "MAC-CYR", "MAC_CYR", "MACINTOSH-CYR", "MACCYRILLIC",
144 "MAC-CYRILLIC", "MACINTOSH-CYRILLIC", NULL
147 char* VARIANT_KOI8CS2[] = {
148 "KOI8-CS2", "KOI8CS2", "KOI8_CS2", "KOI-8_CS2", "KOI8CS", "KOI8_CS",
149 "KOI8-CS", "KOI-8-CS", "KOI_8-CS", "CSKOI8CS2", NULL
152 char* VARIANT_KOI8R[] = {
153 "KOI8-R", "KOI8_R", "KOI-8_R", "KOI8R", "KOI8_R", "CSKOI8R", NULL
156 char* VARIANT_KOI8U[] = {
157 "KOI8-U", "KOI8_U", "KOI-8_U", "KOI8U", "KOI8_U", "CSKOI8U", NULL
160 char* VARIANT_KOI8UNI[] = {
161 "KOI8-UNI", "KOI8_UNI", "KOI-8_UNI", "KOI8UNI", "KOI8_UNI", "CSKOI8UNI",
165 char* VARIANT_ECMA113[] = {
166 "ECMA-113", "ECMA-cyrillic", "ECMA-113:1986", "ISO-IR-111", NULL
169 char* VARIANT_KEYBCS2[] = {
170 "KEYBCS2", "KEYBCS-2", "KAM", "KAMENICKY", "CP895", "895", "PC895",
174 char* VARIANT_LATEX[] = {
175 "TEX", "LATEX", "LTEX", NULL
178 char* VARIANT_UCS2[] = {
179 "UCS-2", "UCS-2BE", "UCS2", "ISO10646/UCS2", "ISO-10646/UCS2",
180 "ISO_10646/UCS2", "CSUCS2", NULL
183 char* VARIANT_UCS4[] = {
184 "UCS-4", "UCS-4BE", "UCS4", "ISO10646/UCS4", "ISO-10646/UCS4",
185 "ISO_10646/UCS4", "CSUCS4", NULL
188 char* VARIANT_UTF7[] = {
189 "UTF-7", "UTF7", "ISO10646/UTF7", "ISO-10646/UTF7", "ISO_10646/UTF7",
190 "UNICODE/UTF7", "CSUTF7", NULL
193 char* VARIANT_UTF8[] = {
194 "UTF-8", "UTF8", "ISO10646/UTF8", "ISO-10646/UTF8", "ISO_10646/UTF8",
195 "UNICODE/UTF8", "CSUTF8", NULL
198 char* VARIANT_CORK[] = {
202 char* VARIANT_GBK[] = {
203 "GBK", "GB2312", "CP936", NULL
206 char* VARIANT_BIG5[] = {
207 "BIG5", "CP950", NULL
210 char* VARIANT_HZ[] = {
211 "HZ", "HZ-GB-2312", NULL
214 typedef struct S_EncList {
216 struct S_EncList *next;
217 } T_EncList, *P_EncList;
219 /* for the case we would be linked with braindead librecode */
220 char *program_name = "iconvcap";
224 /* Local protoypes. */
225 static int iconv_check (char **fromlist,
227 static int iconv_check_one (char *from,
229 static int check_transitivity (char *fname);
233 main(int argc, char *argv[])
236 char *unicode[] = { NULL, NULL };
238 /* when we are called with some argument, run transitivity test and exit */
239 if (argc > 1) return check_transitivity(argv[1]);
241 /* check for conversion ISO-8859-1 -> Unicode */
242 fprintf(stderr, "iconvcap: checking for ISO8859-1 -> Unicode... ");
243 if ((ok = iconv_check(VARIANT_ISO88591, VARIANT_UNICODE)) == 0) {
245 printf("#define "DPREFIX"UNICODE \"%s\"\n", unicode[0]);
247 fprintf(stderr, "iconvcap: iconv seems to be broken. aborting.\n");
251 /* create table of charset names how iconv uses them */
253 TEST_ENC_TO_UNICODE(ASCII);
254 TEST_ENC_TO_UNICODE(BALTIC);
255 TEST_ENC_TO_UNICODE(CP1125);
256 TEST_ENC_TO_UNICODE(CP1250);
257 TEST_ENC_TO_UNICODE(CP1251);
258 TEST_ENC_TO_UNICODE(CP1257);
259 TEST_ENC_TO_UNICODE(ECMA113);
260 TEST_ENC_TO_UNICODE(IBM852);
261 TEST_ENC_TO_UNICODE(IBM855);
262 TEST_ENC_TO_UNICODE(IBM775);
263 TEST_ENC_TO_UNICODE(IBM866);
264 TEST_ENC_TO_UNICODE(ISO88592);
265 TEST_ENC_TO_UNICODE(ISO88594);
266 TEST_ENC_TO_UNICODE(ISO88595);
267 TEST_ENC_TO_UNICODE(ISO885913);
268 TEST_ENC_TO_UNICODE(ISO885916);
269 TEST_ENC_TO_UNICODE(KEYBCS2);
270 TEST_ENC_TO_UNICODE(KOI8CS2);
271 TEST_ENC_TO_UNICODE(KOI8R);
272 TEST_ENC_TO_UNICODE(KOI8U);
273 TEST_ENC_TO_UNICODE(KOI8UNI);
274 TEST_ENC_TO_UNICODE(MACCE);
275 TEST_ENC_TO_UNICODE(MACCYR);
276 TEST_ENC_TO_UNICODE(LATEX);
277 TEST_ENC_TO_UNICODE(UCS2);
278 TEST_ENC_TO_UNICODE(UCS4);
279 TEST_ENC_TO_UNICODE(UTF7);
280 TEST_ENC_TO_UNICODE(UTF8);
281 TEST_ENC_TO_UNICODE(CORK);
282 TEST_ENC_TO_UNICODE(GBK);
283 TEST_ENC_TO_UNICODE(BIG5);
284 TEST_ENC_TO_UNICODE(HZ);
286 if (ok >= 2) exit(0);
290 /* return 0 if conversion from any charset from fromlist to any charset from
291 tolist is possible and set FROM and TO (globals) to appropriate names
292 (it's assumed fromlist and tolist are lists of charset aliases) */
294 iconv_check(char **fromlist, char **tolist)
298 for (from = fromlist; *from != NULL; from++) {
299 for (to = tolist; *to != NULL; to++) {
300 if (iconv_check_one(*from, *to) == 0) {
301 fprintf(stderr, "found %s -> %s\n", *from, *to);
308 fprintf(stderr, "failed.\n");
314 /* check if conversion from any encoding not defined as NULL in file fname
315 to any other defined there is possible, in other words check transitivity
316 condition for all defined encodings (we then hope this condition holds
317 also for encodings we don't know anything about)
318 returns 0 on success 1 on failure */
320 check_transitivity(char *fname)
324 P_EncList enclist = NULL;
328 s = (char*)malloc(1024);
329 if ((f = fopen(fname, "r")) == NULL) {
330 fprintf(stderr, "iconvcap: cannot open %s\n", fname);
335 while (fgets(s, 1024, f) != NULL) {
336 p_e = (P_EncList)malloc(sizeof(T_EncList));
337 if (strncmp(s, "#define", 7) != 0) {
338 fprintf(stderr, "iconvcap: malformed input line: %s", s);
343 if ((sb = strchr(s, '"')) != NULL) {
344 if ((se = strrchr(s, '"')) == sb) {
345 fprintf(stderr, "iconvcap: malformed input line: %s", s);
351 p_e->enc = strncpy((char*)malloc(se-sb), sb+1, se-sb-1);
352 p_e->enc[se-sb-1] = '\0';
359 if (enclist == NULL) {
360 fprintf(stderr, "no valid encodings\n");
365 while (enclist != NULL) {
366 for (p_e = enclist->next; p_e != NULL; p_e = p_e->next) {
367 if (iconv_check_one(enclist->enc, p_e->enc) != 0) {
368 fprintf(stderr, "iconvap: iconv_open(%s, %s) failed\n",
369 enclist->enc, p_e->enc);
373 if (iconv_check_one(p_e->enc, enclist->enc) != 0) {
374 fprintf(stderr, "iconvcap: iconv_open(%s, %s) failed\n",
375 p_e->enc, enclist->enc);
380 enclist = enclist->next;
383 fprintf(stderr, "iconvcap: transitivity OK\n");
388 /* check whether conversion from `from' to `to' is possible */
390 iconv_check_one(char *from, char *to)
394 id = iconv_open(from, to);
395 if (id == (iconv_t)(-1)) return 1;