3 # iExploder - Generates bad HTML files to perform QA for web browsers.
5 # Copyright 2010 Thomas Stromberg - All Rights Reserved.
7 # Licensed under the Apache License, Version 2.0 (the "License");
8 # you may not use this file except in compliance with the License.
9 # You may obtain a copy of the License at
11 # http://www.apache.org/licenses/LICENSE-2.0
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS,
15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 # See the License for the specific language governing permissions and
17 # limitations under the License.
22 require './scanner.rb'
23 require './version.rb'
25 # Used to speed up subtest generation
28 # Media extensions to proper mime type map (not that we always listen'
32 'jpg' => 'image/jpeg',
34 'svg' => 'image/svg+xml',
35 'tiff' => 'image/tiff',
37 'ico' => 'image/x-icon',
38 'jng' => 'image/x-jng',
39 'xpm' => 'image/x-portable-pixmap',
41 'snd' => 'audio/basic',
45 # These tags get src properties more often than others
46 $SRC_TAGS = ['img', 'audio', 'video', 'embed']
49 attr_accessor :test_num, :subtest_data, :lookup_mode, :random_mode, :cgi_url, :browser, :claimed_browser
50 attr_accessor :offset, :lines, :stop_num, :config
52 def initialize(config_path)
53 @config = YAML::load(File.open(config_path))
57 @cgi_url = '/iexploder.cgi'
59 @claimed_browser = nil
74 # These if statements are so that mod_ruby doesn't have to reload the files
76 data_path = @config['mangle_data_path']
77 @cssTags = readTagsDir("#{data_path}/css-properties")
78 @cssPseudoTags = readTagsDir("#{data_path}/css-pseudo")
79 @cssAtRules = readTagsDir("#{data_path}/css-atrules")
80 @htmlTags = readTagsDir("#{data_path}/html-tags")
81 @htmlAttr = readTagsDir("#{data_path}/html-attrs")
82 @htmlValues = readTagsDir("#{data_path}/html-values")
83 @cssValues = readTagsDir("#{data_path}/css-values")
84 @headerValues = readTagsDir("#{data_path}/headers")
85 @protocolValues = readTagsDir("#{data_path}/protocols")
86 @mimeTypes = readTagsDir("#{data_path}/mime-types")
87 @media = readMediaDir("#{data_path}/media")
90 def readTagsDir(directory)
92 Dir.foreach(directory) { |filename|
93 if File.file?(directory + "/" + filename)
94 values = values + readTagFile(directory + "/" + filename)
100 def readMediaDir(directory)
102 Dir.foreach(directory) { |filename|
103 if File.file?(directory + "/" + filename)
104 (base, extension) = filename.split('.')
105 mime_type = $MIME_MAP[extension]
106 data[mime_type] = File.read(directory + "/" + filename)
112 def readTagFile(filename)
114 File.new(filename).readlines.each { |line|
117 # Don't include comments.
118 if (line !~ /^# /) && (line.length > 0)
126 def generateHtmlValue(tag)
128 tag = tag.sub('EXCLUDED_', '')
129 if tag =~ /^on/ and choice < 90
130 return generateHtmlValue('') + "()"
131 elsif tag == 'src' or tag == 'data' or tag == 'profile' and choice < 90
132 return generateGarbageUrl(tag)
137 return @htmlValues[rand(@htmlValues.length)]
139 return generateGarbageNumber()
141 return generateGarbageValue()
143 return generateGarbageNumber() + ',' + generateGarbageNumber()
145 return generateGarbageUrl(tag)
147 return generateOverflow()
151 def generateMediaUrl(tag)
152 mime_type = @media.keys[rand(@media.keys.length)]
153 return generateTestUrl(@test_num, nil, nil, mime_type)
156 def generateGarbageUrl(tag)
160 return generateMediaUrl(tag)
162 return @protocolValues[rand(@protocolValues.length)] + '%' + generateGarbageValue()
164 return @protocolValues[rand(@protocolValues.length)] + '//../' + generateGarbageValue()
166 return @protocolValues[rand(@protocolValues.length)] + '//' + generateGarbageValue()
168 return generateOverflow() + ":" + generateGarbageValue()
170 return generateGarbageValue() + ":" + generateOverflow()
172 return generateOverflow()
176 def generateCssValue(property)
177 size_types = ['', 'em', 'px', '%', 'pt', 'pc', 'ex', 'in', 'cm', 'mm']
182 # return the most likely scenario
183 case property.sub('EXCLUDED_', '')
184 when /-image|content/
185 return 'url(' + generateGarbageUrl(property) + ')'
186 when /-width|-radius|-spacing|margin|padding|height/
187 return generateGarbageValue() + size_types[rand(size_types.length)]
189 return generateGarbageColor()
190 when /-delay|-duration/
191 return generateGarbageValue() + 'ms'
193 return @cssValues[rand(@cssValues.length)]
195 when 51..75 then return generateGarbageNumber()
196 when 76..85 then return 'url(' + generateGarbageUrl(property) + ')'
197 when 85..98 then return generateGarbageValue()
199 return generateOverflow()
203 def generateGarbageColor()
205 when 0..50 then return '#' + generateGarbageValue()
206 when 51..70 then return 'rgb(' + generateGarbageNumber() + ',' + generateGarbageNumber() + ',' + generateGarbageNumber() + ')'
207 when 71..98 then return 'rgb(' + generateGarbageNumber() + '%,' + generateGarbageNumber() + '%,' + generateGarbageNumber() + '%)'
209 return generateOverflow()
213 def generateGarbageNumber()
216 when 0 then return '0'
217 when 1..40 then return '9' * rand(100)
218 when 41..60 then return '999999.' + rand(999999999999999999999).to_s
219 when 61..80 then return '-' + ('9' * rand(100))
220 when 81..90 then return '-999999.' + rand(999999999999999999999).to_s
221 when 91..98 then return generateGarbageText()
223 return generateOverflow()
227 def generateGarbageValue()
229 when 0..30 then return rand(255).chr * rand(@config['buffer_overflow_length'])
230 when 31..50 then return "%n" * 50
231 when 51..65 then return ("&#" + rand(999999).to_s + ";") * rand(@config['max_garbage_text_size'])
234 0.upto(rand(20)+1) do
235 junk << "\\x" + rand(65535).to_s(16)
237 return junk.join('') * rand(@config['max_garbage_text_size'])
240 chars = '%?!$#^0123456789ABCDEF%#./\&|;'
241 0.upto(rand(20)+1) do
242 junk << chars[rand(chars.length)].chr
244 return junk.join('') * rand(@config['max_garbage_text_size'])
248 def generateOverflow()
249 return rand(255).chr * (@config['buffer_overflow_length'] + rand(500))
252 def generateGarbageText
254 when 0..70 then return 'X' * 129
255 when 71..75 then return "%n" * 15
256 when 76..85 then return ("&#" + rand(9999999999999).to_s + ";") * rand(@config['max_garbage_text_size'])
257 when 86..90 then return generateGarbageValue()
258 when 91..98 then return rand(255).chr * rand(@config['max_garbage_text_size'])
260 return generateOverflow()
264 def isPropertyInBlacklist(properties)
265 # Format: [img, src] or [img, style, property]
266 blacklist_entries = []
267 if @config.has_key?('exclude') and @config['exclude']
268 blacklist_entries << properties.join('.')
269 wildcard_property = properties.dup
270 wildcard_property[0] = '*'
271 blacklist_entries << wildcard_property.join('.')
272 blacklist_entries.each do |entry|
273 if @config['exclude'].has_key?(entry) and @browser =~ /#{@config['exclude'][entry]}/
281 def generateCssStyling(tag)
283 0.upto(rand(@config['properties_per_style_max'])) {
284 property = @cssTags[rand(@cssTags.length)]
285 if isPropertyInBlacklist([tag, 'style', property])
286 property = "EXCLUDED_#{property}"
290 # very small chance we let the tag run on.
296 0.upto(rand(@config['attributes_per_style_property_max'])) {
297 values << generateCssValue(property)
299 out << values.join(' ')
300 # we almost always put the ; there.
309 def mangleTag(tag, no_close_chance=false)
310 if not no_close_chance and rand(100) < 15
311 return "</" + tag + ">"
317 out << generateOverflow()
320 attrNum = rand(@config['attributes_per_html_tag_max']) + 1
322 # The HTML head tag does not have many useful attributes, but is always included in tests.
323 if tag == 'head' and rand(100) < 75
325 when 0 then attrs << 'lang'
326 when 1 then attrs << 'dir'
327 when 2 then attrs << 'profile'
330 # 75% of the time, these tags get a src attribute
331 if $SRC_TAGS.include?(tag) and rand(100) < 75
332 if @config.has_key?('exclude') and @config['exclude'] and @config['exclude'].has_key?("#{tag}.src")
333 attrs << 'EXCLUDED_src'
339 while attrs.length < attrNum
340 attribute = @htmlAttr[rand(@htmlAttr.length)]
341 if isPropertyInBlacklist([tag, attribute])
342 attribute = "EXCLUDED_#{attribute}"
347 # Add a few HTML attributes
359 out << generateHtmlValue(attr)
371 out << generateCssStyling(tag)
381 return rand(99999999999)
387 def generateCssPattern()
388 # Generate a CSS selector pattern.
392 when 0..84 then pattern = @htmlTags[rand(@htmlTags.length)].dup
393 when 85..89 then pattern = "*"
394 when 90..94 then pattern = @cssAtRules[rand(@cssAtRules.length)].dup
395 when 95..100 then pattern = ''
399 pattern << " " + @htmlTags[rand(@htmlTags.length)]
403 pattern << " > " + @htmlTags[rand(@htmlTags.length)]
407 pattern << " + " + @htmlTags[rand(@htmlTags.length)]
416 pseudo = @cssPseudoTags[rand(@cssPseudoTags.length)].dup
417 # These tags typically have a parenthesis
418 if (pseudo =~ /^lang|^nth|^not/ and rand(100) < 75 and pseudo !~ /\(/) or rand(100) < 20
424 pseudo << generateGarbageValue()
430 pattern << ":" + pseudo
434 html_attr = @htmlAttr[rand(@htmlAttr.length)]
435 match = '[' + html_attr
437 garbage = generateGarbageValue()
439 when 0..25 then match << ']'
440 when 26..50 then match << "=\"#{garbage}\"]"
441 when 51..75 then match << "=~\"#{garbage}\"]"
442 when 76..99 then match << "|=\"#{garbage}\"]"
449 pattern << '.' + generateGarbageValue()
456 pattern << '#' + generateGarbageValue()
460 pattern << ' #' + generateGarbageValue()
468 0.upto(rand(@config['properties_per_style_max'])) {
469 out << generateCssPattern()
474 0.upto(rand(@config['properties_per_style_max'])) {
475 property = @cssTags[rand(@cssTags.length)].dup
476 if isPropertyInBlacklist(['style', 'style', property])
477 property = " EXCLUDED_#{property}"
479 out << " #{property}: "
482 0.upto(rand(@config['attributes_per_style_property_max'])) {
483 values << generateCssValue(property)
485 out << values.join(' ')
499 # Build any malicious javascript here. Fairly naive at the moment.
501 target = @htmlTags[rand(@htmlTags.length)]
502 css_property = @cssTags[rand(@cssTags.length)]
503 css_property2 = @cssTags[rand(@cssTags.length)]
504 html_attr = @htmlAttr[rand(@htmlAttr.length)]
505 css_value = generateCssValue(css_property)
506 html_value = generateHtmlValue(html_attr)
507 html_value2 = generateGarbageNumber()
508 mangled = mangleTag(@htmlTags[rand(@htmlTags.length)]);
509 mangled2 = mangleTag(@htmlTags[rand(@htmlTags.length)]);
512 js << "window.onload=function(){"
513 js << " var ietarget = document.createElement('#{target}');"
514 js << " ietarget.style.#{css_property} = '#{css_value}';"
515 js << " ietarget.#{html_attr} = '#{html_value}';"
516 js << " document.body.appendChild(ietarget);"
517 js << " ietarget.style.#{css_property2} = #{html_value2};"
519 js << " document.write('#{mangled}');"
520 js << " document.write('#{mangled2}');"
525 def buildMediaFile(mime_type)
526 if @media.has_key?(mime_type)
527 data = @media[mime_type].dup
529 puts "No media found for #{mime_type}"
530 data = generateGarbageText()
533 # corrupt it in a subtle way
536 garbage = generateGarbageValue()
538 garbage = rand(255).chr * rand(8)
541 if "1.9".respond_to?(:encoding)
542 garbage.force_encoding('ASCII-8BIT')
543 data.force_encoding('ASCII-8BIT')
546 garbage_start = rand(data.length)
547 garbage_end = garbage_start + garbage.length
548 data[garbage_start..garbage_end] = garbage
550 data << generateGarbageValue()
555 # Parse the subtest data passed in as part of the URL
556 def parseSubTestData(subtest_data)
557 # Initialize with one line at 0
558 if not subtest_data or subtest_data.to_i == 0
559 return [@config['initial_subtest_width'], [0]]
561 (lines_at_time, offsets_string) = subtest_data.split('_')
562 offsets = offsets_string.split(',').map! {|x| x.to_i }
563 return [lines_at_time.to_i, offsets]
566 def generateTestUrl(test_num, subtest_width=nil, subtest_offsets=nil, mime_type=nil)
569 if subtest_offsets.length > @config['subtest_combinations_max']
570 url << "t=" << test_num.to_s << "&l=test_redirect&z=THE_END"
572 url << "t=" << test_num.to_s << "&s=" << subtest_width.to_s << "_" << subtest_offsets.join(',')
575 url << "t=" << test_num.to_s
581 url << "&x=" << @stop_num.to_s
585 url << '&m=' + CGI::escape(mime_type)
588 url << "&b=" << CGI::escape(@browser)
592 def buildBodyTags(tag_count)
594 # subtract the <body> tag from tag_count.
595 1.upto(tag_count-1) { tagList << @htmlTags[rand(@htmlTags.length)] }
597 # Lean ourselves toward lots of img and src tests
598 for tag, percent in @config['favor_html_tags']
599 if rand(100) < percent.to_f
600 # Don't overwrite the body tag.
601 tagList[rand(tagList.length-1)+1] = tag
605 # Now we have our hitlist of tags,lets mangle them.
607 tagList.each do |tag|
608 tag_data = mangleTag(tag)
611 tag_data = "<script>"
613 tag_data << buildJavaScript() + "\n" + "</script>\n"
618 tag_data << buildStyleTag() + "\n" + "</style>\n"
619 elsif rand(100) <= 90
620 tag_data << generateGarbageText() << "\n"
626 tag_data << "</#{tag}>\n"
628 mangled_tags << "\n<!-- START #{tag} -->\n" + tag_data + "\n<!-- END #{tag} -->\n"
633 def buildHeaderTags(tag_count)
634 valid_head_tags = ['title', 'base', 'link', 'meta']
635 header_tags = ['html', 'head']
636 1.upto(tag_count-1) { header_tags << valid_head_tags[rand(valid_head_tags.length)] }
637 header_tags << @htmlTags[rand(@htmlTags.length)]
639 header_tags.each do |tag|
640 mangled_tags << mangleTag(tag, no_close_chance=true)
645 def buildSurvivedPage(page_type)
646 page = "<html><head>"
647 page << "<body>Bummer. You survived both redirects. Let me go sulk in the corner.</body>"
652 def buildRedirect(test_num, subtest_data, lookup_mode, stop_num=nil)
654 if lookup_mode == '1' or stop_num == test_num
659 width, offsets = parseSubTestData(@subtest_data)
664 # We still need a redirect, but don't bother generating new data.
666 redirect_url = generateTestUrl(test_num, width, offsets)
667 if lookup_mode == 'test_redirect'
668 redirect_url << "&l=test_another_redirect"
669 elsif lookup_mode == 'test_another_redirect'
670 redirect_url << "&l=survived_redirect"
672 redirect_url << "&l=#{lookup_mode}"
675 # This is a normal redirect going on to the next page. If we have subtest, get the next one.
677 width, offsets = combine_combo_creator(@config['html_tags_per_page'], width, offsets)[0..1]
679 redirect_url = generateTestUrl(nextTestNum(), width, offsets)
682 redirect_code = "\t<META HTTP-EQUIV=\"Refresh\" content=\"0;URL=#{redirect_url}\">\n"
683 # use both techniques, because you never know how you might be corrupting yourself.
684 redirect_code << "\t<script language=\"javascript\">setTimeout('window.location=\"#{redirect_url}\"', 1000);</script>\n"
689 if @lookup_mode == 'survived_redirect'
690 return self.buildSurvivedPage(@lookup_mode)
692 tag_count = @config['html_tags_per_page']
694 if $TEST_CACHE.has_key?(@test_num)
695 (header_tags, body_tags) = $TEST_CACHE[@test_num]
697 header_tags = buildHeaderTags(3)
698 body_tags = buildBodyTags(tag_count - header_tags.length)
703 header_tags.length => 'body'
706 if @subtest_data and @subtest_data.length > 0
707 if not $TEST_CACHE.has_key?(@test_num)
708 $TEST_CACHE[@test_num] = [header_tags, body_tags]
710 (width, offsets) = parseSubTestData(@subtest_data)
711 lines = combine_combo_creator(tag_count, width, offsets)[2]
712 all_tags = header_tags + body_tags
713 body_start = header_tags.length
716 # <html> and <body> are required, regardless of their existence in the subtest data.
717 0.upto(tag_count) do |line_number|
719 if lines.include?(line_number)
720 tag_data = all_tags[line_number]
721 elsif required_tags.key?(line_number)
722 tag_data = "<" + required_tags[line_number] + ">"
725 if line_number < body_start
726 header_tags << tag_data
728 body_tags << tag_data
732 header_tags << "<!-- subtest mode: #{offsets.length} combinations, width: #{width} -->"
735 htmlText = header_tags[0..1].join("\n\t")
736 htmlText << buildRedirect(@test_num, @subtest_data, @lookup_mode, @stop_num)
737 htmlText << "<title>[#{@test_num}:#{@subtest_data}] iExploder #{$VERSION} - #{generateGarbageText()}</title>\n"
738 if @claimed_browser and @claimed_browser.length > 1
739 show_browser = @claimed_browser
741 show_browser = @browser
743 htmlText << "\n<!-- iExploder #{$VERSION} | test #{@test_num}:#{@subtest_data} at #{Time.now} -->\n"
744 htmlText << "<!-- browser: #{show_browser} -->\n"
745 htmlText << header_tags[2..-1].join("\n\t")
746 htmlText << "\n</head>\n\n"
747 htmlText << body_tags.join("\n")
748 htmlText << "</body>\n</html>"
752 def buildHeaders(mime_type)
755 response = {'Content-Type' => mime_type}
756 0.upto(rand(@config['headers_per_page_max'])) do
757 try_header = @headerValues[rand(@headerValues.length)]
758 if ! banned_headers.include?(try_header.downcase)
759 use_headers << try_header
762 for header in use_headers.uniq
764 response[header] = generateGarbageNumber()
766 response[header] = generateGarbageUrl(header)
776 ie = IExploder.new('config.yaml')
777 ie.test_num = ARGV[0].to_i || 1
778 ie.subtest_data = ARGV[1] || nil
779 mime_type = ARGV[2] || nil
782 html_output = ie.buildPage()
785 headers = ie.buildHeaders(mime_type)
786 for (key, value) in headers
787 puts "#{key}: #{value}"
789 puts "Mime-Type: #{mime_type}"
790 puts ie.buildMediaFile(mime_type)