2 * Copyright (C) 2011 Google Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include "ContentTypeParser.h"
34 #include <wtf/text/CString.h>
35 #include <wtf/text/StringBuilder.h>
39 static void skipSpaces(const String& input, size_t& startIndex)
41 while (startIndex < input.length() && input[startIndex] == ' ')
45 static bool isTokenCharacter(char c)
47 return isASCII(c) && c > ' ' && c != '"' && c != '(' && c != ')' && c != ',' && c != '/' && (c < ':' || c > '@') && (c < '[' || c > ']');
50 static String parseToken(const String& input, size_t& startIndex)
52 if (startIndex >= input.length())
55 StringBuilder stringBuilder;
56 while (startIndex < input.length()) {
57 char currentCharacter = input[startIndex];
58 if (!isTokenCharacter(currentCharacter))
59 return stringBuilder.toString();
60 stringBuilder.append(currentCharacter);
63 return stringBuilder.toString();
66 static String parseQuotedString(const String& input, size_t& startIndex)
68 if (startIndex >= input.length())
71 if (input[startIndex++] != '"' || startIndex >= input.length())
74 StringBuilder stringBuilder;
75 bool lastCharacterWasBackslash = false;
76 char currentCharacter;
77 while ((currentCharacter = input[startIndex++]) != '"' || lastCharacterWasBackslash) {
78 if (startIndex >= input.length())
80 if (currentCharacter == '\\' && !lastCharacterWasBackslash) {
81 lastCharacterWasBackslash = true;
84 if (lastCharacterWasBackslash)
85 lastCharacterWasBackslash = false;
86 stringBuilder.append(currentCharacter);
88 return stringBuilder.toString();
91 ContentTypeParser::ContentTypeParser(const String& contentType)
92 : m_contentType(contentType.stripWhiteSpace())
97 String ContentTypeParser::charset() const
99 return parameterValueForName("charset");
102 String ContentTypeParser::parameterValueForName(const String& name) const
104 return m_parameters.get(name);
107 size_t ContentTypeParser::parameterCount() const
109 return m_parameters.size();
112 // From http://tools.ietf.org/html/rfc2045#section-5.1:
114 // content := "Content-Type" ":" type "/" subtype
116 // ; Matching of media type and subtype
117 // ; is ALWAYS case-insensitive.
119 // type := discrete-type / composite-type
121 // discrete-type := "text" / "image" / "audio" / "video" /
122 // "application" / extension-token
124 // composite-type := "message" / "multipart" / extension-token
126 // extension-token := ietf-token / x-token
128 // ietf-token := <An extension token defined by a
129 // standards-track RFC and registered
132 // x-token := <The two characters "X-" or "x-" followed, with
133 // no intervening white space, by any token>
135 // subtype := extension-token / iana-token
137 // iana-token := <A publicly-defined extension token. Tokens
138 // of this form must be registered with IANA
139 // as specified in RFC 2048.>
141 // parameter := attribute "=" value
143 // attribute := token
144 // ; Matching of attributes
145 // ; is ALWAYS case-insensitive.
147 // value := token / quoted-string
149 // token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
152 // tspecials := "(" / ")" / "<" / ">" / "@" /
153 // "," / ";" / ":" / "\" / <">
154 // "/" / "[" / "]" / "?" / "="
155 // ; Must be in quoted-string,
156 // ; to use within parameter values
158 void ContentTypeParser::parse()
160 DEFINE_STATIC_LOCAL(const String, contentTypeParameterName, ("Content-Type"));
162 if (!m_contentType.startsWith(contentTypeParameterName)) {
163 LOG_ERROR("Invalid Content-Type string '%s'", m_contentType.ascii().data());
166 size_t contentTypeLength = m_contentType.length();
167 size_t index = contentTypeParameterName.length();
168 skipSpaces(m_contentType, index);
169 if (index >= contentTypeLength || m_contentType[index] != ':' || ++index >= contentTypeLength) {
170 LOG_ERROR("Invalid Content-Type string '%s'", m_contentType.ascii().data());
174 // There should not be any quoted strings until we reach the parameters.
175 size_t semiColonIndex = m_contentType.find(';', index);
176 if (semiColonIndex == notFound) {
177 m_mimeType = m_contentType.substring(index).stripWhiteSpace();
181 m_mimeType = m_contentType.substring(index, semiColonIndex - index).stripWhiteSpace();
182 index = semiColonIndex + 1;
184 skipSpaces(m_contentType, index);
185 String key = parseToken(m_contentType, index);
186 if (key.isEmpty() || index >= contentTypeLength) {
187 LOG_ERROR("Invalid Content-Type parameter name.");
190 // Should we tolerate spaces here?
191 if (m_contentType[index++] != '=' || index >= contentTypeLength) {
192 LOG_ERROR("Invalid Content-Type malformed parameter.");
196 // Should we tolerate spaces here?
198 if (m_contentType[index] == '"')
199 value = parseQuotedString(m_contentType, index);
201 value = parseToken(m_contentType, index);
203 if (value.isNull()) {
204 LOG_ERROR("Invalid Content-Type, invalid parameter value.");
208 // Should we tolerate spaces here?
209 if (index < contentTypeLength && m_contentType[index++] != ';') {
210 LOG_ERROR("Invalid Content-Type, invalid character at the end of key/value parameter.");
214 m_parameters.set(key, value);
216 if (index >= contentTypeLength)