code.vuplus.com Git - vuplus_xbmc/blob - lib/enca/iconvcap.c

   1 /*
   2  * @(#) $Id: iconvcap.c,v 1.10 2005/12/01 10:08:53 yeti Exp $
   3  * iconv capability checker by David Necas (Yeti).
   4  * This program is in the public domain.
   5  *
   6  * iconvcap has two modes of operation:
   7  *
   8  * 1. No command line argumets are given.
   9  * Iconvcap tries to find what charsets of interest iconv knows and under
  10  * what names.  It prints #defines directly includable to C source for
  11  * any successfully detected charset (and #defines to NULL for the others).
  12  * It also prints some info to stderr, which then goes to config.log.  It
  13  * returns success (0) iff following conditions are satified
  14  * -- iconv is able to convert ISO-8859-1 to some tested variant of Unicode
  15  *    (see below)---so it's usable at all, and
  16  * -- iconv is able convert at least two of the other charsets of interest to
  17  *    the same variant of Unicode (we then hope conversion in the opposite
  18  *    direction will work too)
  19  * Otherwise, failure (1) is returned and the output should be ignored, if
  20  * any.
  21  *
  22  * 2. A file name is given on command line.
  23  * Iconvcap reads given file (should contain the just generated #define list)
  24  * and chcecks if conversion from any to any encoding is possible.  If it is
  25  * OK it returns success (0), otherwise, it fails returning 1.
  26  *
  27  */
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <iconv.h>
  31 /* string.h or strings.h?  That's a question!
  32  * Don't use const, the compiler may not like it. */
  33 int strncmp(char *s1, char *s2, size_t n);
  34 char* strchr(char *s, int c);
  35 char* strrchr(char *s, int c);
  36 char* strncpy(char *dest, char *src, size_t n);
  37
  38 #define DPREFIX "ICONV_NAME_"
  39
  40 #define TEST_ENC_TO_UNICODE(x) \
  41   fprintf(stderr, "iconvcap: checking for %s -> Unicode... ", #x); \
  42   if (iconv_check(VARIANT_##x, unicode) == 0) { \
  43     printf("#define "DPREFIX"%s \"%s\"\n", #x, FROM); \
  44     ok++; \
  45   } else printf("#define "DPREFIX"%s NULL\n", #x);
  46
  47 /* ANY variant of unicode is good enough here
  48    particular surfaces are defined below */
  49 char* VARIANT_UNICODE[] = {
  50   "UCS2", "UCS-2", "ISO10646/UCS2", "ISO-10646/UCS2", "ISO_10646/UCS2",
  51   "UNICODE", "ISO-10646", "ISO_10646", "ISO10646",
  52   "UCS4", "UCS-4", "ISO10646/UCS4", "ISO-10646/UCS4", "ISO_10646/UCS4",
  53   "UTF8", "UTF-8", "ISO10646/UTF8", "ISO-10646/UTF8", "ISO_10646/UTF8",
  54   "CSUCS2", "CSUCS4", NULL
  55 };
  56
  57 char* VARIANT_ASCII[] = {
  58   "ASCII", "CSASCII", "US-ASCII", "ISO646-US", "ISO_646.IRV:1991", "CP367",
  59   "IBM367", "CP367", "CSPC367", NULL
  60 };
  61
  62 char* VARIANT_ISO88591[] = {
  63   "ISO-8859-1", "ISO8859-1", "8859_1", "ISO_8859-1", "LATIN1", "ISOLATIN1",
  64   "CSLATIN1", "CSISOLATIN1", NULL
  65 };
  66
  67 char* VARIANT_ISO88592[] = {
  68   "ISO-8859-2", "ISO8859-2", "8859_2", "ISO_8859-2", "LATIN2", "ISOLATIN2",
  69   "CSLATIN2", "CSISOLATIN2", "ISO-IR-101", NULL
  70 };
  71
  72 char* VARIANT_ISO88594[] = {
  73   "ISO-8859-4", "ISO8859-4", "8859_4", "ISO_8859-4", "LATIN4", "ISOLATIN4",
  74   "CSLATIN4", "CSISOLATIN4", "ISO-IR-110", NULL
  75 };
  76
  77 char* VARIANT_ISO88595[] = {
  78   "ISO-8859-5", "ISO8859-5", "8859_5", "ISO_8859-5", "ISOCYRILLIC",
  79   "CSISOCYRILLIC", "ISO-IR-144", NULL
  80 };
  81
  82 char* VARIANT_ISO885913[] = {
  83   "ISO-8859-13", "ISO8859-13", "8859_13", "ISO_8859-13", "LATIN7", "ISOLATIN7",
  84   "CSLATIN7", "CSISOLATIN7", "ISO-IR-179A", "ISOBALTIC", "CSISOBALTIC",
  85   "CSISOLATINBALTIC", NULL
  86 };
  87
  88 char* VARIANT_ISO885916[] = {
  89   "ISO-8859-16", "ISO8859-16", "8859_16", "ISO_8859-16", "ISO-IR-226",
  90   "LATIN10", "ISOLATIN10", "CSLATIN10", "CSISOLATIN10", NULL
  91 };
  92
  93 char* VARIANT_BALTIC[] = {
  94   "BALTIC", "CSBALTIC", "ISO-IR-179", NULL
  95 };
  96
  97 char* VARIANT_IBM852[] = {
  98   "IBM852", "CP852", "CP-852", "CP_852", "852", "IBM-852", "IBM_852",
  99   "PC852", "CSPC852", "CSPCP852", NULL
 100 };
 101
 102 char* VARIANT_IBM855[] = {
 103   "IBM855", "CP855", "CP-855", "CP_855", "855", "IBM-855", "IBM_855",
 104   "PC855", "CSPC855", "CSPCP855", NULL
 105 };
 106
 107 char* VARIANT_IBM775[] = {
 108   "IBM775", "CP775", "CP-775", "CP_775", "775", "IBM-775", "IBM_775",
 109   "PC775", "CSPC775", "CSPCP775", NULL
 110 };
 111
 112 char* VARIANT_IBM866[] = {
 113   "IBM866", "CP866", "CP-866", "CP_866", "866", "IBM-866", "IBM_866",
 114   "PC866", "CSPC866", "CSPCP866", NULL
 115 };
 116
 117 char* VARIANT_CP1125[] = {
 118   "CP1125", "1125", "CP-1125", "CP_1125", "MS1125", "MS-1125",
 119   "WINDOWS-1125", NULL
 120 };
 121
 122 char* VARIANT_CP1250[] = {
 123   "CP1250", "1250", "CP-1250", "CP_1250", "MS-EE", "MS1250", "MS-1250",
 124   "WINDOWS-1250", NULL
 125 };
 126
 127 char* VARIANT_CP1251[] = {
 128   "CP1251", "1251", "CP-1251", "CP_1251", "MS-CYRL", "MS1251", "MS-1251",
 129   "WINDOWS-1251", NULL
 130 };
 131
 132 char* VARIANT_CP1257[] = {
 133   "CP1257", "1257", "CP-1257", "CP_1257", "MS-BALT", "MS1257", "MS-1257",
 134   "WINDOWS-1257", "WinBaltRim", NULL
 135 };
 136
 137 char* VARIANT_MACCE[] = {
 138   "MACCE", "MAC-CE", "MAC_CE", "MACINTOSH-CE", "MACEE", "MAC-EE", "MAC_EE",
 139   "MACINTOSH-EE", NULL
 140 };
 141
 142 char* VARIANT_MACCYR[] = {
 143   "MACCYR", "MAC-CYR", "MAC_CYR", "MACINTOSH-CYR", "MACCYRILLIC",
 144   "MAC-CYRILLIC", "MACINTOSH-CYRILLIC", NULL
 145 };
 146
 147 char* VARIANT_KOI8CS2[] = {
 148   "KOI8-CS2", "KOI8CS2", "KOI8_CS2", "KOI-8_CS2", "KOI8CS", "KOI8_CS",
 149   "KOI8-CS", "KOI-8-CS", "KOI_8-CS", "CSKOI8CS2", NULL
 150 };
 151
 152 char* VARIANT_KOI8R[] = {
 153   "KOI8-R", "KOI8_R", "KOI-8_R", "KOI8R", "KOI8_R", "CSKOI8R", NULL
 154 };
 155
 156 char* VARIANT_KOI8U[] = {
 157   "KOI8-U", "KOI8_U", "KOI-8_U", "KOI8U", "KOI8_U", "CSKOI8U", NULL
 158 };
 159
 160 char* VARIANT_KOI8UNI[] = {
 161   "KOI8-UNI", "KOI8_UNI", "KOI-8_UNI", "KOI8UNI", "KOI8_UNI", "CSKOI8UNI",
 162   NULL
 163 };
 164
 165 char* VARIANT_ECMA113[] = {
 166   "ECMA-113", "ECMA-cyrillic", "ECMA-113:1986", "ISO-IR-111", NULL
 167 };
 168
 169 char* VARIANT_KEYBCS2[] = {
 170   "KEYBCS2", "KEYBCS-2", "KAM", "KAMENICKY", "CP895", "895", "PC895",
 171   "csPC895", NULL
 172 };
 173
 174 char* VARIANT_LATEX[] = {
 175   "TEX", "LATEX", "LTEX", NULL
 176 };
 177
 178 char* VARIANT_UCS2[] = {
 179   "UCS-2", "UCS-2BE", "UCS2", "ISO10646/UCS2", "ISO-10646/UCS2",
 180   "ISO_10646/UCS2", "CSUCS2", NULL
 181 };
 182
 183 char* VARIANT_UCS4[] = {
 184   "UCS-4", "UCS-4BE", "UCS4", "ISO10646/UCS4", "ISO-10646/UCS4",
 185   "ISO_10646/UCS4", "CSUCS4", NULL
 186 };
 187
 188 char* VARIANT_UTF7[] = {
 189   "UTF-7", "UTF7", "ISO10646/UTF7", "ISO-10646/UTF7", "ISO_10646/UTF7",
 190   "UNICODE/UTF7", "CSUTF7", NULL
 191 };
 192
 193 char* VARIANT_UTF8[] = {
 194   "UTF-8", "UTF8", "ISO10646/UTF8", "ISO-10646/UTF8", "ISO_10646/UTF8",
 195   "UNICODE/UTF8", "CSUTF8", NULL
 196 };
 197
 198 char* VARIANT_CORK[] = {
 199   "CORK", "T1", NULL
 200 };
 201
 202 char* VARIANT_GBK[] = {
 203         "GBK", "GB2312", "CP936", NULL
 204 };
 205
 206 char* VARIANT_BIG5[] = {
 207         "BIG5", "CP950", NULL
 208 };
 209
 210 char* VARIANT_HZ[] = {
 211   "HZ", "HZ-GB-2312", NULL
 212 };
 213
 214 typedef struct S_EncList {
 215   char *enc;
 216   struct S_EncList *next;
 217 } T_EncList, *P_EncList;
 218
 219 /* for the case we would be linked with braindead librecode */
 220 char *program_name = "iconvcap";
 221
 222 char *FROM, *TO;
 223
 224 /* Local protoypes. */
 225 static int iconv_check        (char **fromlist,
 226                                char **tolist);
 227 static int iconv_check_one    (char *from,
 228                                char *to);
 229 static int check_transitivity (char *fname);
 230
 231 /* main() */
 232 int
 233 main(int argc, char *argv[])
 234 {
 235   int ok;
 236   char *unicode[] = { NULL, NULL };
 237
 238   /* when we are called with some argument, run transitivity test and exit */
 239   if (argc > 1) return check_transitivity(argv[1]);
 240
 241   /* check for conversion ISO-8859-1 -> Unicode */
 242   fprintf(stderr, "iconvcap: checking for ISO8859-1 -> Unicode... ");
 243   if ((ok = iconv_check(VARIANT_ISO88591, VARIANT_UNICODE)) == 0) {
 244     unicode[0] = TO;
 245     printf("#define "DPREFIX"UNICODE \"%s\"\n", unicode[0]);
 246   } else {
 247     fprintf(stderr, "iconvcap: iconv seems to be broken. aborting.\n");
 248     exit(1);
 249   }
 250
 251   /* create table of charset names how iconv uses them */
 252   ok = 0;
 253   TEST_ENC_TO_UNICODE(ASCII);
 254   TEST_ENC_TO_UNICODE(BALTIC);
 255   TEST_ENC_TO_UNICODE(CP1125);
 256   TEST_ENC_TO_UNICODE(CP1250);
 257   TEST_ENC_TO_UNICODE(CP1251);
 258   TEST_ENC_TO_UNICODE(CP1257);
 259   TEST_ENC_TO_UNICODE(ECMA113);
 260   TEST_ENC_TO_UNICODE(IBM852);
 261   TEST_ENC_TO_UNICODE(IBM855);
 262   TEST_ENC_TO_UNICODE(IBM775);
 263   TEST_ENC_TO_UNICODE(IBM866);
 264   TEST_ENC_TO_UNICODE(ISO88592);
 265   TEST_ENC_TO_UNICODE(ISO88594);
 266   TEST_ENC_TO_UNICODE(ISO88595);
 267   TEST_ENC_TO_UNICODE(ISO885913);
 268   TEST_ENC_TO_UNICODE(ISO885916);
 269   TEST_ENC_TO_UNICODE(KEYBCS2);
 270   TEST_ENC_TO_UNICODE(KOI8CS2);
 271   TEST_ENC_TO_UNICODE(KOI8R);
 272   TEST_ENC_TO_UNICODE(KOI8U);
 273   TEST_ENC_TO_UNICODE(KOI8UNI);
 274   TEST_ENC_TO_UNICODE(MACCE);
 275   TEST_ENC_TO_UNICODE(MACCYR);
 276   TEST_ENC_TO_UNICODE(LATEX);
 277   TEST_ENC_TO_UNICODE(UCS2);
 278   TEST_ENC_TO_UNICODE(UCS4);
 279   TEST_ENC_TO_UNICODE(UTF7);
 280   TEST_ENC_TO_UNICODE(UTF8);
 281   TEST_ENC_TO_UNICODE(CORK);
 282   TEST_ENC_TO_UNICODE(GBK);
 283   TEST_ENC_TO_UNICODE(BIG5);
 284   TEST_ENC_TO_UNICODE(HZ);
 285
 286   if (ok >= 2) exit(0);
 287   else exit(1);
 288 }
 289
 290 /* return 0 if conversion from any charset from fromlist to any charset from
 291    tolist is possible and set FROM and TO (globals) to appropriate names
 292    (it's assumed fromlist and tolist are lists of charset aliases) */
 293 static int
 294 iconv_check(char **fromlist, char **tolist)
 295 {
 296   char **from, **to;
 297
 298   for (from = fromlist; *from != NULL; from++) {
 299     for (to = tolist; *to != NULL; to++) {
 300       if (iconv_check_one(*from, *to) == 0) {
 301         fprintf(stderr, "found %s -> %s\n", *from, *to);
 302         FROM = *from;
 303         TO = *to;
 304         return 0;
 305       }
 306     }
 307   }
 308   fprintf(stderr, "failed.\n");
 309   FROM = NULL;
 310   TO = NULL;
 311   return 1;
 312 }
 313
 314 /* check if conversion from any encoding not defined as NULL in file fname
 315    to any other defined there is possible, in other words check transitivity
 316    condition for all defined encodings (we then hope this condition holds
 317    also for encodings we don't know anything about)
 318    returns 0 on success 1 on failure */
 319 static int
 320 check_transitivity(char *fname)
 321 {
 322   char *s, *sb, *se;
 323   FILE *f;
 324   P_EncList enclist = NULL;
 325   P_EncList p_e;
 326
 327
 328   s = (char*)malloc(1024);
 329   if ((f = fopen(fname, "r")) == NULL) {
 330     fprintf(stderr, "iconvcap: cannot open %s\n", fname);
 331     free(s);
 332     return 1;
 333   }
 334
 335   while (fgets(s, 1024, f) != NULL) {
 336     p_e = (P_EncList)malloc(sizeof(T_EncList));
 337     if (strncmp(s, "#define", 7) != 0) {
 338       fprintf(stderr, "iconvcap: malformed input line: %s", s);
 339       fclose(f);
 340       free(s);
 341       return 1;
 342     }
 343     if ((sb = strchr(s, '"')) != NULL) {
 344       if ((se = strrchr(s, '"')) == sb) {
 345         fprintf(stderr, "iconvcap: malformed input line: %s", s);
 346         fclose(f);
 347         free(s);
 348         return 1;
 349       }
 350
 351       p_e->enc = strncpy((char*)malloc(se-sb), sb+1, se-sb-1);
 352       p_e->enc[se-sb-1] = '\0';
 353       p_e->next = enclist;
 354       enclist = p_e;
 355     }
 356   }
 357   fclose(f);
 358
 359   if (enclist == NULL) {
 360     fprintf(stderr, "no valid encodings\n");
 361     free(s);
 362     return 1;
 363   }
 364
 365   while (enclist != NULL) {
 366     for (p_e = enclist->next; p_e != NULL; p_e = p_e->next) {
 367       if (iconv_check_one(enclist->enc, p_e->enc) != 0) {
 368         fprintf(stderr, "iconvap: iconv_open(%s, %s) failed\n",
 369                 enclist->enc, p_e->enc);
 370         free(s);
 371         return 1;
 372       }
 373       if (iconv_check_one(p_e->enc, enclist->enc) != 0) {
 374         fprintf(stderr, "iconvcap: iconv_open(%s, %s) failed\n",
 375                 p_e->enc, enclist->enc);
 376         free(s);
 377         return 1;
 378       }
 379     }
 380     enclist = enclist->next;
 381   }
 382
 383   fprintf(stderr, "iconvcap: transitivity OK\n");
 384   free(s);
 385   return 0;
 386 }
 387
 388 /* check whether conversion from `from' to `to' is possible */
 389 static int
 390 iconv_check_one(char *from, char *to)
 391 {
 392   iconv_t id;
 393
 394   id = iconv_open(from, to);
 395   if (id == (iconv_t)(-1)) return 1;
 396   iconv_close(id);
 397   return 0;
 398 }
 399