3 # Copyright (C) 2003, 2004, 2005, 2006 Apple Computer, Inc. All rights reserved.
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions
9 # 1. Redistributions of source code must retain the above copyright
10 # notice, this list of conditions and the following disclaimer.
11 # 2. Redistributions in binary form must reproduce the above copyright
12 # notice, this list of conditions and the following disclaimer in the
13 # documentation and/or other materials provided with the distribution.
14 # 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
15 # its contributors may be used to endorse or promote products derived
16 # from this software without specific prior written permission.
18 # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
19 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
22 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 my %aliasesFromCharsetsFile;
40 print STDERR @_, "\n";
46 my ($name, $prefix, $encoding, $flags) = @_;
48 error "$name shows up twice in output" if $namesWritten{$name};
49 $namesWritten{$name} = 1;
51 $output .= " { \"$name\", $prefix$encoding },\n";
54 sub process_platform_encodings
56 my ($filename, $PlatformPrefix) = @_;
57 my $baseFilename = $filename;
58 $baseFilename =~ s|.*/||;
60 my %seenPlatformNames;
63 open PLATFORM_ENCODINGS, $filename or die;
65 while (<PLATFORM_ENCODINGS>) {
69 if (my ($PlatformName, undef, $flags, $IANANames) = /^(.+?)(, (.+))?: (.+)$/) {
72 my $PlatformNameWithFlags = $PlatformName;
74 $PlatformNameWithFlags .= ", " . $flags;
76 $flags = "NoEncodingFlags";
78 error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformNameWithFlags};
79 $seenPlatformNames{$PlatformNameWithFlags} = 1;
81 # Build the aliases list.
82 # Also check that no two names are part of the same entry in the charsets file.
83 my @IANANames = split ", ", $IANANames;
85 my $canonicalFirstName = "";
87 for my $name (@IANANames) {
88 if ($firstName eq "") {
89 if ($name !~ /^[-A-Za-z0-9_]+$/) {
90 error "$name, in $baseFilename, has illegal characters in it";
95 if ($name !~ /^[a-z0-9]+$/) {
96 error "$name, in $baseFilename, has illegal characters in it (must be all lowercase alphanumeric)";
99 if ($name le $prevName) {
100 error "$name comes after $prevName in $baseFilename, but everything must be in alphabetical order";
105 my $canonicalName = lc $name;
106 $canonicalName =~ tr/-_//d;
108 $canonicalFirstName = $canonicalName if $canonicalFirstName eq "";
110 error "$name is mentioned twice in $baseFilename" if $seenIANANames{$canonicalName};
111 $seenIANANames{$canonicalName} = 1;
113 $aliases{$canonicalName} = 1;
114 next if !$aliasesFromCharsetsFile{$canonicalName};
115 for my $alias (@{$aliasesFromCharsetsFile{$canonicalName}}) {
116 $aliases{$alias} = 1;
118 for my $otherName (@IANANames) {
119 next if $canonicalName eq $otherName;
120 if ($aliasesFromCharsetsFile{$otherName}
121 && $aliasesFromCharsetsFile{$canonicalName} eq $aliasesFromCharsetsFile{$otherName}
122 && $canonicalName le $otherName) {
123 error "$baseFilename lists both $name and $otherName under $PlatformName, but that aliasing is already specified in character-sets.txt";
129 emit_line($firstName, $PlatformPrefix, $PlatformName, $flags);
130 for my $alias (sort keys %aliases) {
131 emit_line($alias, $PlatformPrefix, $PlatformName, $flags) if $alias ne $canonicalFirstName;
133 } elsif (/^([a-zA-Z0-9_]+)(, (.+))?$/) {
134 my $PlatformName = $1;
136 error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformName};
137 $seenPlatformNames{$PlatformName} = 1;
139 error "syntax error in $baseFilename, line $.";
143 close PLATFORM_ENCODINGS;
146 sub process_iana_charset
148 my ($canonical_name, @aliases) = @_;
150 return if !$canonical_name;
152 my @names = sort $canonical_name, @aliases;
154 for my $name (@names) {
155 $aliasesFromCharsetsFile{$name} = \@names;
159 sub process_iana_charsets
163 open CHARSETS, $filename or die;
170 my %exceptions = ( isoir91 => 1, isoir92 => 1 );
174 if ((my $new_canonical_name) = /Name: ([^ \t]*).*/) {
175 $new_canonical_name = lc $new_canonical_name;
176 $new_canonical_name =~ tr/a-z0-9//cd;
178 error "saw $new_canonical_name twice in character-sets.txt", if $seen{$new_canonical_name};
179 $seen{$new_canonical_name} = $new_canonical_name;
181 process_iana_charset $canonical_name, @aliases;
183 $canonical_name = $new_canonical_name;
185 } elsif ((my $new_alias) = /Alias: ([^ \t]*).*/) {
186 $new_alias = lc $new_alias;
187 $new_alias =~ tr/a-z0-9//cd;
189 # do this after normalizing the alias, sometimes character-sets.txt
190 # has weird escape characters, e.g. \b after None
191 next if $new_alias eq "none";
193 error "saw $new_alias twice in character-sets.txt $seen{$new_alias}, $canonical_name", if $seen{$new_alias} && $seen{$new_alias} ne $canonical_name && !$exceptions{$new_alias};
194 push @aliases, $new_alias if !$seen{$new_alias};
195 $seen{$new_alias} = $canonical_name;
199 process_iana_charset $canonical_name, @aliases;
206 process_iana_charsets($ARGV[0]);
207 process_platform_encodings($ARGV[1], $ARGV[2]);
212 // File generated by make-charset-table.pl. Do not edit!
215 #include "CharsetData.h"
219 const CharsetEntry CharsetTable[] = {