initial import
[vuplus_webkit] / Tools / Scripts / webkitpy / layout_tests / controllers / manager.py
1 #!/usr/bin/env python
2 # Copyright (C) 2010 Google Inc. All rights reserved.
3 # Copyright (C) 2010 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Szeged
4 #
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are
7 # met:
8 #
9 #     * Redistributions of source code must retain the above copyright
10 # notice, this list of conditions and the following disclaimer.
11 #     * Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following disclaimer
13 # in the documentation and/or other materials provided with the
14 # distribution.
15 #     * Neither the name of Google Inc. nor the names of its
16 # contributors may be used to endorse or promote products derived from
17 # this software without specific prior written permission.
18 #
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 """
32 The Manager runs a series of tests (TestType interface) against a set
33 of test files.  If a test file fails a TestType, it returns a list of TestFailure
34 objects to the Manager. The Manager then aggregates the TestFailures to
35 create a final report.
36 """
37
38 from __future__ import with_statement
39
40 import errno
41 import logging
42 import math
43 import Queue
44 import random
45 import re
46 import sys
47 import time
48
49 from webkitpy.common.checkout.scm import default_scm
50 from webkitpy.layout_tests.controllers import manager_worker_broker
51 from webkitpy.layout_tests.controllers import worker
52 from webkitpy.layout_tests.layout_package import json_layout_results_generator
53 from webkitpy.layout_tests.layout_package import json_results_generator
54 from webkitpy.layout_tests.models import test_expectations
55 from webkitpy.layout_tests.models import test_failures
56 from webkitpy.layout_tests.models import test_results
57 from webkitpy.layout_tests.models.test_input import TestInput
58 from webkitpy.layout_tests.models.result_summary import ResultSummary
59 from webkitpy.layout_tests.views import printing
60
61 from webkitpy.tool import grammar
62
63 _log = logging.getLogger(__name__)
64
65 # Builder base URL where we have the archived test results.
66 BUILDER_BASE_URL = "http://build.chromium.org/buildbot/layout_test_results/"
67
68 TestExpectations = test_expectations.TestExpectations
69
70
71 # FIXME: This should be on the Manager class (since that's the only caller)
72 # or split off from Manager onto another helper class, but should not be a free function.
73 # Most likely this should be made into its own class, and this super-long function
74 # split into many helper functions.
75 def summarize_results(port_obj, expectations, result_summary, retry_summary, test_timings, only_unexpected, interrupted):
76     """Summarize failing results as a dict.
77
78     FIXME: split this data structure into a separate class?
79
80     Args:
81         port_obj: interface to port-specific hooks
82         expectations: test_expectations.TestExpectations object
83         result_summary: summary object from initial test runs
84         retry_summary: summary object from final test run of retried tests
85         test_timings: a list of TestResult objects which contain test runtimes in seconds
86         only_unexpected: whether to return a summary only for the unexpected results
87     Returns:
88         A dictionary containing a summary of the unexpected results from the
89         run, with the following fields:
90         'version': a version indicator
91         'fixable': # of fixable tests (NOW - PASS)
92         'skipped': # of skipped tests (NOW & SKIPPED)
93         'num_regressions': # of non-flaky failures
94         'num_flaky': # of flaky failures
95         'num_passes': # of unexpected passes
96         'tests': a dict of tests -> {'expected': '...', 'actual': '...'}
97     """
98     results = {}
99     results['version'] = 3
100
101     tbe = result_summary.tests_by_expectation
102     tbt = result_summary.tests_by_timeline
103     results['fixable'] = len(tbt[test_expectations.NOW] - tbe[test_expectations.PASS])
104     results['skipped'] = len(tbt[test_expectations.NOW] & tbe[test_expectations.SKIP])
105
106     num_passes = 0
107     num_flaky = 0
108     num_regressions = 0
109     keywords = {}
110     for expecation_string, expectation_enum in TestExpectations.EXPECTATIONS.iteritems():
111         keywords[expectation_enum] = expecation_string.upper()
112
113     for modifier_string, modifier_enum in TestExpectations.MODIFIERS.iteritems():
114         keywords[modifier_enum] = modifier_string.upper()
115
116     tests = {}
117     original_results = result_summary.unexpected_results if only_unexpected else result_summary.results
118
119     for test_name, result in original_results.iteritems():
120         # Note that if a test crashed in the original run, we ignore
121         # whether or not it crashed when we retried it (if we retried it),
122         # and always consider the result not flaky.
123         expected = expectations.get_expectations_string(test_name)
124         result_type = result.type
125         actual = [keywords[result_type]]
126
127         if result_type == test_expectations.SKIP:
128             continue
129
130         test_dict = {}
131         if result.has_stderr:
132             test_dict['has_stderr'] = True
133
134         if result_type == test_expectations.PASS:
135             num_passes += 1
136             # FIXME: include passing tests that have stderr output.
137             if expected == 'PASS':
138                 continue
139         elif result_type == test_expectations.CRASH:
140             num_regressions += 1
141         elif test_name in result_summary.unexpected_results:
142             if test_name not in retry_summary.unexpected_results:
143                 actual.extend(expectations.get_expectations_string(test_name).split(" "))
144                 num_flaky += 1
145             else:
146                 retry_result_type = retry_summary.unexpected_results[test_name].type
147                 if result_type != retry_result_type:
148                     actual.append(keywords[retry_result_type])
149                     num_flaky += 1
150                 else:
151                     num_regressions += 1
152
153         test_dict['expected'] = expected
154         test_dict['actual'] = " ".join(actual)
155         # FIXME: Set this correctly once https://webkit.org/b/37739 is fixed
156         # and only set it if there actually is stderr data.
157
158         failure_types = [type(f) for f in result.failures]
159         # FIXME: get rid of all this is_* values once there is a 1:1 map between
160         # TestFailure type and test_expectations.EXPECTATION.
161         if test_failures.FailureMissingAudio in failure_types:
162             test_dict['is_missing_audio'] = True
163
164         if test_failures.FailureReftestMismatch in failure_types:
165             test_dict['is_reftest'] = True
166
167         for f in result.failures:
168             if 'is_reftest' in result.failures:
169                 test_dict['is_reftest'] = True
170
171         if test_failures.FailureReftestMismatchDidNotOccur in failure_types:
172             test_dict['is_mismatch_reftest'] = True
173
174         if test_failures.FailureMissingResult in failure_types:
175             test_dict['is_missing_text'] = True
176
177         if test_failures.FailureMissingImage in failure_types or test_failures.FailureMissingImageHash in failure_types:
178             test_dict['is_missing_image'] = True
179
180         # Store test hierarchically by directory. e.g.
181         # foo/bar/baz.html: test_dict
182         # foo/bar/baz1.html: test_dict
183         #
184         # becomes
185         # foo: {
186         #     bar: {
187         #         baz.html: test_dict,
188         #         baz1.html: test_dict
189         #     }
190         # }
191         parts = test_name.split('/')
192         current_map = tests
193         for i, part in enumerate(parts):
194             if i == (len(parts) - 1):
195                 current_map[part] = test_dict
196                 break
197             if part not in current_map:
198                 current_map[part] = {}
199             current_map = current_map[part]
200
201     results['tests'] = tests
202     results['num_passes'] = num_passes
203     results['num_flaky'] = num_flaky
204     results['num_regressions'] = num_regressions
205     results['uses_expectations_file'] = port_obj.uses_test_expectations_file()
206     results['interrupted'] = interrupted  # Does results.html have enough information to compute this itself? (by checking total number of results vs. total number of tests?)
207     results['layout_tests_dir'] = port_obj.layout_tests_dir()
208     results['has_wdiff'] = port_obj.wdiff_available()
209     results['has_pretty_patch'] = port_obj.pretty_patch_available()
210     try:
211         results['revision'] = default_scm().head_svn_revision()
212     except Exception, e:
213         # FIXME: We would like to warn here, but that would cause all passing_run integration tests
214         # to fail, since they assert that we have no logging output.
215         # The revision lookup always fails when running the tests since it tries to read from
216         # "/mock-checkout" using the real file system (since there is no way to mock out detect_scm_system at current).
217         # Once we fix detect_scm_system to use the mock file system we can add this log back.
218         #_log.warn("Failed to determine svn revision for checkout (cwd: %s, webkit_base: %s), leaving 'revision' key blank in full_results.json.\n%s" % (port_obj._filesystem.getcwd(), port_obj.path_from_webkit_base(), e))
219         # Handle cases where we're running outside of version control.
220         import traceback
221         _log.debug('Failed to learn head svn revision:')
222         _log.debug(traceback.format_exc())
223         results['revision'] = ""
224
225     return results
226
227
228 class TestRunInterruptedException(Exception):
229     """Raised when a test run should be stopped immediately."""
230     def __init__(self, reason):
231         Exception.__init__(self)
232         self.reason = reason
233         self.msg = reason
234
235     def __reduce__(self):
236         return self.__class__, (self.reason,)
237
238
239 class WorkerException(Exception):
240     """Raised when we receive an unexpected/unknown exception from a worker."""
241     pass
242
243
244 class TestShard(object):
245     """A test shard is a named list of TestInputs."""
246
247     # FIXME: Make this class visible, used by workers as well.
248     def __init__(self, name, test_inputs):
249         self.name = name
250         self.test_inputs = test_inputs
251
252     def __repr__(self):
253         return "TestShard(name='%s', test_inputs=%s'" % (self.name, self.test_inputs)
254
255     def __eq__(self, other):
256         return self.name == other.name and self.test_inputs == other.test_inputs
257
258
259 class Manager(object):
260     """A class for managing running a series of tests on a series of layout
261     test files."""
262
263
264     # The per-test timeout in milliseconds, if no --time-out-ms option was
265     # given to run_webkit_tests. This should correspond to the default timeout
266     # in DumpRenderTree.
267     DEFAULT_TEST_TIMEOUT_MS = 6 * 1000
268
269     def __init__(self, port, options, printer):
270         """Initialize test runner data structures.
271
272         Args:
273           port: an object implementing port-specific
274           options: a dictionary of command line options
275           printer: a Printer object to record updates to.
276         """
277         self._port = port
278         self._fs = port.filesystem
279         self._options = options
280         self._printer = printer
281         self._message_broker = None
282         self._expectations = None
283
284         self.HTTP_SUBDIR = 'http' + port.TEST_PATH_SEPARATOR
285         self.WEBSOCKET_SUBDIR = 'websocket' + port.TEST_PATH_SEPARATOR
286         self.LAYOUT_TESTS_DIRECTORY = 'LayoutTests'
287         self._has_http_lock = False
288
289         self._remaining_locked_shards = []
290
291         # disable wss server. need to install pyOpenSSL on buildbots.
292         # self._websocket_secure_server = websocket_server.PyWebSocket(
293         #        options.results_directory, use_tls=True, port=9323)
294
295         # a set of test files, and the same tests as a list
296
297         # FIXME: Rename to test_names.
298         self._test_files = set()
299         self._test_files_list = None
300         self._result_queue = Queue.Queue()
301         self._retrying = False
302         self._results_directory = self._port.results_directory()
303
304         self._all_results = []
305         self._group_stats = {}
306         self._current_result_summary = None
307
308         # This maps worker names to the state we are tracking for each of them.
309         self._worker_states = {}
310
311     def collect_tests(self, args):
312         """Find all the files to test.
313
314         Args:
315           args: list of test arguments from the command line
316
317         """
318         paths = self._strip_test_dir_prefixes(args)
319         if self._options.test_list:
320             paths += self._strip_test_dir_prefixes(read_test_files(self._fs, self._options.test_list, self._port.TEST_PATH_SEPARATOR))
321         self._test_files = self._port.tests(paths)
322
323     def _strip_test_dir_prefixes(self, paths):
324         return [self._strip_test_dir_prefix(path) for path in paths if path]
325
326     def _strip_test_dir_prefix(self, path):
327         # Handle both "LayoutTests/foo/bar.html" and "LayoutTests\foo\bar.html" if
328         # the filesystem uses '\\' as a directory separator.
329         if path.startswith(self.LAYOUT_TESTS_DIRECTORY + self._port.TEST_PATH_SEPARATOR):
330             return path[len(self.LAYOUT_TESTS_DIRECTORY + self._port.TEST_PATH_SEPARATOR):]
331         if path.startswith(self.LAYOUT_TESTS_DIRECTORY + self._fs.sep):
332             return path[len(self.LAYOUT_TESTS_DIRECTORY + self._fs.sep):]
333         return path
334
335     def lint(self):
336         lint_failed = False
337         for test_configuration in self._port.all_test_configurations():
338             try:
339                 self.lint_expectations(test_configuration)
340             except test_expectations.ParseError:
341                 lint_failed = True
342                 self._printer.write("")
343
344         if lint_failed:
345             _log.error("Lint failed.")
346             return -1
347
348         _log.info("Lint succeeded.")
349         return 0
350
351     def lint_expectations(self, config):
352         port = self._port
353         test_expectations.TestExpectations(
354             port,
355             None,
356             port.test_expectations(),
357             config,
358             self._options.lint_test_files,
359             port.test_expectations_overrides())
360
361     def _is_http_test(self, test):
362         return self.HTTP_SUBDIR in test or self.WEBSOCKET_SUBDIR in test
363
364     def _http_tests(self):
365         return set(test for test in self._test_files if self._is_http_test(test))
366
367     def parse_expectations(self):
368         """Parse the expectations from the test_list files and return a data
369         structure holding them. Throws an error if the test_list files have
370         invalid syntax."""
371         port = self._port
372         self._expectations = test_expectations.TestExpectations(
373             port,
374             self._test_files,
375             port.test_expectations(),
376             port.test_configuration(),
377             self._options.lint_test_files,
378             port.test_expectations_overrides())
379
380     # FIXME: This method is way too long and needs to be broken into pieces.
381     def prepare_lists_and_print_output(self):
382         """Create appropriate subsets of test lists and returns a
383         ResultSummary object. Also prints expected test counts.
384         """
385
386         # Remove skipped - both fixable and ignored - files from the
387         # top-level list of files to test.
388         num_all_test_files = len(self._test_files)
389         self._printer.print_expected("Found:  %d tests" %
390                                      (len(self._test_files)))
391         if not num_all_test_files:
392             _log.critical('No tests to run.')
393             return None
394
395         skipped = set()
396
397         if not self._options.http:
398             skipped = skipped.union(self._http_tests())
399
400         if num_all_test_files > 1 and not self._options.force:
401             skipped = skipped.union(self._expectations.get_tests_with_result_type(test_expectations.SKIP))
402             if self._options.skip_failing_tests:
403                 failing = self._expectations.get_tests_with_result_type(test_expectations.FAIL)
404                 self._test_files -= failing
405
406         self._test_files -= skipped
407
408         # Create a sorted list of test files so the subset chunk,
409         # if used, contains alphabetically consecutive tests.
410         self._test_files_list = list(self._test_files)
411         if self._options.randomize_order:
412             random.shuffle(self._test_files_list)
413         else:
414             self._test_files_list.sort(key=lambda test: test_key(self._port, test))
415
416         # If the user specifies they just want to run a subset of the tests,
417         # just grab a subset of the non-skipped tests.
418         if self._options.run_chunk or self._options.run_part:
419             chunk_value = self._options.run_chunk or self._options.run_part
420             test_files = self._test_files_list
421             try:
422                 (chunk_num, chunk_len) = chunk_value.split(":")
423                 chunk_num = int(chunk_num)
424                 assert(chunk_num >= 0)
425                 test_size = int(chunk_len)
426                 assert(test_size > 0)
427             except AssertionError:
428                 _log.critical("invalid chunk '%s'" % chunk_value)
429                 return None
430
431             # Get the number of tests
432             num_tests = len(test_files)
433
434             # Get the start offset of the slice.
435             if self._options.run_chunk:
436                 chunk_len = test_size
437                 # In this case chunk_num can be really large. We need
438                 # to make the slave fit in the current number of tests.
439                 slice_start = (chunk_num * chunk_len) % num_tests
440             else:
441                 # Validate the data.
442                 assert(test_size <= num_tests)
443                 assert(chunk_num <= test_size)
444
445                 # To count the chunk_len, and make sure we don't skip
446                 # some tests, we round to the next value that fits exactly
447                 # all the parts.
448                 rounded_tests = num_tests
449                 if rounded_tests % test_size != 0:
450                     rounded_tests = (num_tests + test_size -
451                                      (num_tests % test_size))
452
453                 chunk_len = rounded_tests / test_size
454                 slice_start = chunk_len * (chunk_num - 1)
455                 # It does not mind if we go over test_size.
456
457             # Get the end offset of the slice.
458             slice_end = min(num_tests, slice_start + chunk_len)
459
460             files = test_files[slice_start:slice_end]
461
462             tests_run_msg = 'Running: %d tests (chunk slice [%d:%d] of %d)' % (
463                 (slice_end - slice_start), slice_start, slice_end, num_tests)
464             self._printer.print_expected(tests_run_msg)
465
466             # If we reached the end and we don't have enough tests, we run some
467             # from the beginning.
468             if slice_end - slice_start < chunk_len:
469                 extra = chunk_len - (slice_end - slice_start)
470                 extra_msg = ('   last chunk is partial, appending [0:%d]' %
471                             extra)
472                 self._printer.print_expected(extra_msg)
473                 tests_run_msg += "\n" + extra_msg
474                 files.extend(test_files[0:extra])
475             tests_run_filename = self._fs.join(self._results_directory, "tests_run.txt")
476             self._fs.write_text_file(tests_run_filename, tests_run_msg)
477
478             len_skip_chunk = int(len(files) * len(skipped) /
479                                  float(len(self._test_files)))
480             skip_chunk_list = list(skipped)[0:len_skip_chunk]
481             skip_chunk = set(skip_chunk_list)
482
483             # Update expectations so that the stats are calculated correctly.
484             # We need to pass a list that includes the right # of skipped files
485             # to ParseExpectations so that ResultSummary() will get the correct
486             # stats. So, we add in the subset of skipped files, and then
487             # subtract them back out.
488             self._test_files_list = files + skip_chunk_list
489             self._test_files = set(self._test_files_list)
490
491             self.parse_expectations()
492
493             self._test_files = set(files)
494             self._test_files_list = files
495         else:
496             skip_chunk = skipped
497
498         result_summary = ResultSummary(self._expectations, self._test_files | skip_chunk)
499         self._print_expected_results_of_type(result_summary, test_expectations.PASS, "passes")
500         self._print_expected_results_of_type(result_summary, test_expectations.FAIL, "failures")
501         self._print_expected_results_of_type(result_summary, test_expectations.FLAKY, "flaky")
502         self._print_expected_results_of_type(result_summary, test_expectations.SKIP, "skipped")
503
504         if self._options.force:
505             self._printer.print_expected('Running all tests, including '
506                                          'skips (--force)')
507         else:
508             # Note that we don't actually run the skipped tests (they were
509             # subtracted out of self._test_files, above), but we stub out the
510             # results here so the statistics can remain accurate.
511             for test in skip_chunk:
512                 result = test_results.TestResult(test)
513                 result.type = test_expectations.SKIP
514                 result_summary.add(result, expected=True)
515         self._printer.print_expected('')
516
517         # Check to make sure we didn't filter out all of the tests.
518         if not len(self._test_files):
519             _log.info("All tests are being skipped")
520             return None
521
522         return result_summary
523
524     def _get_dir_for_test_file(self, test_file):
525         """Returns the highest-level directory by which to shard the given
526         test file."""
527         directory, test_file = self._port.split_test(test_file)
528
529         # The http tests are very stable on mac/linux.
530         # TODO(ojan): Make the http server on Windows be apache so we can
531         # turn shard the http tests there as well. Switching to apache is
532         # what made them stable on linux/mac.
533         return directory
534
535     def _get_test_input_for_file(self, test_file):
536         """Returns the appropriate TestInput object for the file. Mostly this
537         is used for looking up the timeout value (in ms) to use for the given
538         test."""
539         if self._test_is_slow(test_file):
540             return TestInput(test_file, self._options.slow_time_out_ms)
541         return TestInput(test_file, self._options.time_out_ms)
542
543     def _test_requires_lock(self, test_file):
544         """Return True if the test needs to be locked when
545         running multiple copies of NRWTs."""
546         return self._is_http_test(test_file)
547
548     def _test_is_slow(self, test_file):
549         return self._expectations.has_modifier(test_file, test_expectations.SLOW)
550
551     def _shard_tests(self, test_files, num_workers, fully_parallel):
552         """Groups tests into batches.
553         This helps ensure that tests that depend on each other (aka bad tests!)
554         continue to run together as most cross-tests dependencies tend to
555         occur within the same directory.
556         Return:
557             Two list of TestShards. The first contains tests that must only be
558             run under the server lock, the second can be run whenever.
559         """
560
561         # FIXME: Move all of the sharding logic out of manager into its
562         # own class or module. Consider grouping it with the chunking logic
563         # in prepare_lists as well.
564         if num_workers == 1:
565             return self._shard_in_two(test_files)
566         elif fully_parallel:
567             return self._shard_every_file(test_files)
568         return self._shard_by_directory(test_files, num_workers)
569
570     def _shard_in_two(self, test_files):
571         """Returns two lists of shards, one with all the tests requiring a lock and one with the rest.
572
573         This is used when there's only one worker, to minimize the per-shard overhead."""
574         locked_inputs = []
575         unlocked_inputs = []
576         for test_file in test_files:
577             test_input = self._get_test_input_for_file(test_file)
578             if self._test_requires_lock(test_file):
579                 locked_inputs.append(test_input)
580             else:
581                 unlocked_inputs.append(test_input)
582
583         locked_shards = []
584         unlocked_shards = []
585         if locked_inputs:
586             locked_shards = [TestShard('locked_tests', locked_inputs)]
587         if unlocked_inputs:
588             unlocked_shards = [TestShard('unlocked_tests', unlocked_inputs)]
589
590         return locked_shards, unlocked_shards
591
592     def _shard_every_file(self, test_files):
593         """Returns two lists of shards, each shard containing a single test file.
594
595         This mode gets maximal parallelism at the cost of much higher flakiness."""
596         locked_shards = []
597         unlocked_shards = []
598         for test_file in test_files:
599             test_input = self._get_test_input_for_file(test_file)
600
601             # Note that we use a '.' for the shard name; the name doesn't really
602             # matter, and the only other meaningful value would be the filename,
603             # which would be really redundant.
604             if self._test_requires_lock(test_file):
605                 locked_shards.append(TestShard('.', [test_input]))
606             else:
607                 unlocked_shards.append(TestShard('.', [test_input]))
608
609         return locked_shards, unlocked_shards
610
611     def _shard_by_directory(self, test_files, num_workers):
612         """Returns two lists of shards, each shard containing all the files in a directory.
613
614         This is the default mode, and gets as much parallelism as we can while
615         minimizing flakiness caused by inter-test dependencies."""
616         locked_shards = []
617         unlocked_shards = []
618         tests_by_dir = {}
619         # FIXME: Given that the tests are already sorted by directory,
620         # we can probably rewrite this to be clearer and faster.
621         for test_file in test_files:
622             directory = self._get_dir_for_test_file(test_file)
623             test_input = self._get_test_input_for_file(test_file)
624             tests_by_dir.setdefault(directory, [])
625             tests_by_dir[directory].append(test_input)
626
627         for directory, test_inputs in tests_by_dir.iteritems():
628             shard = TestShard(directory, test_inputs)
629             if self._test_requires_lock(directory):
630                 locked_shards.append(shard)
631             else:
632                 unlocked_shards.append(shard)
633
634         # Sort the shards by directory name.
635         locked_shards.sort(key=lambda shard: shard.name)
636         unlocked_shards.sort(key=lambda shard: shard.name)
637
638         return (self._resize_shards(locked_shards, self._max_locked_shards(num_workers),
639                                     'locked_shard'),
640                 unlocked_shards)
641
642     def _max_locked_shards(self, num_workers):
643         # Put a ceiling on the number of locked shards, so that we
644         # don't hammer the servers too badly.
645
646         # FIXME: For now, limit to one shard. After testing to make sure we
647         # can handle multiple shards, we should probably do something like
648         # limit this to no more than a quarter of all workers, e.g.:
649         # return max(math.ceil(num_workers / 4.0), 1)
650         return 1
651
652     def _resize_shards(self, old_shards, max_new_shards, shard_name_prefix):
653         """Takes a list of shards and redistributes the tests into no more
654         than |max_new_shards| new shards."""
655
656         # This implementation assumes that each input shard only contains tests from a
657         # single directory, and that tests in each shard must remain together; as a
658         # result, a given input shard is never split between output shards.
659         #
660         # Each output shard contains the tests from one or more input shards and
661         # hence may contain tests from multiple directories.
662
663         def divide_and_round_up(numerator, divisor):
664             return int(math.ceil(float(numerator) / divisor))
665
666         def extract_and_flatten(shards):
667             test_inputs = []
668             for shard in shards:
669                 test_inputs.extend(shard.test_inputs)
670             return test_inputs
671
672         def split_at(seq, index):
673             return (seq[:index], seq[index:])
674
675         num_old_per_new = divide_and_round_up(len(old_shards), max_new_shards)
676         new_shards = []
677         remaining_shards = old_shards
678         while remaining_shards:
679             some_shards, remaining_shards = split_at(remaining_shards, num_old_per_new)
680             new_shards.append(TestShard('%s_%d' % (shard_name_prefix, len(new_shards) + 1),
681                                         extract_and_flatten(some_shards)))
682         return new_shards
683
684     def _log_num_workers(self, num_workers, num_shards, num_locked_shards):
685         driver_name = self._port.driver_name()
686         if num_workers == 1:
687             self._printer.print_config("Running 1 %s over %s" %
688                 (driver_name, grammar.pluralize('shard', num_shards)))
689         else:
690             self._printer.print_config("Running %d %ss in parallel over %d shards (%d locked)" %
691                 (num_workers, driver_name, num_shards, num_locked_shards))
692
693     def _run_tests(self, file_list, result_summary):
694         """Runs the tests in the file_list.
695
696         Return: A tuple (interrupted, keyboard_interrupted, thread_timings,
697             test_timings, individual_test_timings)
698             interrupted is whether the run was interrupted
699             keyboard_interrupted is whether the interruption was because someone
700               typed Ctrl^C
701             thread_timings is a list of dicts with the total runtime
702               of each thread with 'name', 'num_tests', 'total_time' properties
703             test_timings is a list of timings for each sharded subdirectory
704               of the form [time, directory_name, num_tests]
705             individual_test_timings is a list of run times for each test
706               in the form {filename:filename, test_run_time:test_run_time}
707             result_summary: summary object to populate with the results
708         """
709         self._current_result_summary = result_summary
710         self._all_results = []
711         self._group_stats = {}
712         self._worker_states = {}
713
714         keyboard_interrupted = False
715         interrupted = False
716         thread_timings = []
717
718         self._printer.print_update('Sharding tests ...')
719         locked_shards, unlocked_shards = self._shard_tests(file_list, int(self._options.child_processes), self._options.experimental_fully_parallel)
720
721         # FIXME: We don't have a good way to coordinate the workers so that
722         # they don't try to run the shards that need a lock if we don't actually
723         # have the lock. The easiest solution at the moment is to grab the
724         # lock at the beginning of the run, and then run all of the locked
725         # shards first. This minimizes the time spent holding the lock, but
726         # means that we won't be running tests while we're waiting for the lock.
727         # If this becomes a problem in practice we'll need to change this.
728
729         all_shards = locked_shards + unlocked_shards
730         self._remaining_locked_shards = locked_shards
731         if locked_shards:
732             self.start_servers_with_lock()
733
734         num_workers = min(int(self._options.child_processes), len(all_shards))
735         self._log_num_workers(num_workers, len(all_shards), len(locked_shards))
736
737         manager_connection = manager_worker_broker.get(self._port, self._options, self, worker.Worker)
738
739         if self._options.dry_run:
740             return (keyboard_interrupted, interrupted, thread_timings, self._group_stats, self._all_results)
741
742         self._printer.print_update('Starting %s ...' % grammar.pluralize('worker', num_workers))
743         for worker_number in xrange(num_workers):
744             worker_connection = manager_connection.start_worker(worker_number)
745             worker_state = _WorkerState(worker_number, worker_connection)
746             self._worker_states[worker_connection.name] = worker_state
747
748             # FIXME: If we start workers up too quickly, DumpRenderTree appears
749             # to thrash on something and time out its first few tests. Until
750             # we can figure out what's going on, sleep a bit in between
751             # workers. This needs a bug filed.
752             time.sleep(0.1)
753
754         self._printer.print_update("Starting testing ...")
755         for shard in all_shards:
756             # FIXME: Change 'test_list' to 'shard', make sharding public.
757             manager_connection.post_message('test_list', shard.name, shard.test_inputs)
758
759         # We post one 'stop' message for each worker. Because the stop message
760         # are sent after all of the tests, and because each worker will stop
761         # reading messsages after receiving a stop, we can be sure each
762         # worker will get a stop message and hence they will all shut down.
763         for _ in xrange(num_workers):
764             manager_connection.post_message('stop')
765
766         try:
767             while not self.is_done():
768                 manager_connection.run_message_loop(delay_secs=1.0)
769
770             # Make sure all of the workers have shut down (if possible).
771             for worker_state in self._worker_states.values():
772                 if worker_state.worker_connection.is_alive():
773                     _log.debug('Waiting for worker %d to exit' % worker_state.number)
774                     worker_state.worker_connection.join(5.0)
775                     if worker_state.worker_connection.is_alive():
776                         _log.error('Worker %d did not exit in time.' % worker_state.number)
777
778         except KeyboardInterrupt:
779             self._printer.print_update('Interrupted, exiting ...')
780             self.cancel_workers()
781             keyboard_interrupted = True
782         except TestRunInterruptedException, e:
783             _log.warning(e.reason)
784             self.cancel_workers()
785             interrupted = True
786         except WorkerException:
787             self.cancel_workers()
788             raise
789         except:
790             # Unexpected exception; don't try to clean up workers.
791             _log.error("Exception raised, exiting")
792             self.cancel_workers()
793             raise
794         finally:
795             self.stop_servers_with_lock()
796
797         thread_timings = [worker_state.stats for worker_state in self._worker_states.values()]
798
799         # FIXME: should this be a class instead of a tuple?
800         return (interrupted, keyboard_interrupted, thread_timings, self._group_stats, self._all_results)
801
802     def update(self):
803         self.update_summary(self._current_result_summary)
804
805     def _collect_timing_info(self, threads):
806         test_timings = {}
807         individual_test_timings = []
808         thread_timings = []
809
810         for thread in threads:
811             thread_timings.append({'name': thread.getName(),
812                                    'num_tests': thread.get_num_tests(),
813                                    'total_time': thread.get_total_time()})
814             test_timings.update(thread.get_test_group_timing_stats())
815             individual_test_timings.extend(thread.get_test_results())
816
817         return (thread_timings, test_timings, individual_test_timings)
818
819     def needs_servers(self):
820         return any(self._test_requires_lock(test_name) for test_name in self._test_files) and self._options.http
821
822     def set_up_run(self):
823         """Configures the system to be ready to run tests.
824
825         Returns a ResultSummary object if we should continue to run tests,
826         or None if we should abort.
827
828         """
829         # This must be started before we check the system dependencies,
830         # since the helper may do things to make the setup correct.
831         self._printer.print_update("Starting helper ...")
832         self._port.start_helper()
833
834         # Check that the system dependencies (themes, fonts, ...) are correct.
835         if not self._options.nocheck_sys_deps:
836             self._printer.print_update("Checking system dependencies ...")
837             if not self._port.check_sys_deps(self.needs_servers()):
838                 self._port.stop_helper()
839                 return None
840
841         if self._options.clobber_old_results:
842             self._clobber_old_results()
843
844         # Create the output directory if it doesn't already exist.
845         self._port.maybe_make_directory(self._results_directory)
846
847         self._port.setup_test_run()
848
849         self._printer.print_update("Preparing tests ...")
850         result_summary = self.prepare_lists_and_print_output()
851         if not result_summary:
852             return None
853
854         return result_summary
855
856     def run(self, result_summary):
857         """Run all our tests on all our test files.
858
859         For each test file, we run each test type. If there are any failures,
860         we collect them for reporting.
861
862         Args:
863           result_summary: a summary object tracking the test results.
864
865         Return:
866           The number of unexpected results (0 == success)
867         """
868         # gather_test_files() must have been called first to initialize us.
869         # If we didn't find any files to test, we've errored out already in
870         # prepare_lists_and_print_output().
871         assert(len(self._test_files))
872
873         start_time = time.time()
874
875         interrupted, keyboard_interrupted, thread_timings, test_timings, individual_test_timings = self._run_tests(self._test_files_list, result_summary)
876
877         # We exclude the crashes from the list of results to retry, because
878         # we want to treat even a potentially flaky crash as an error.
879         failures = self._get_failures(result_summary, include_crashes=False, include_missing=False)
880         retry_summary = result_summary
881         while (len(failures) and self._options.retry_failures and not self._retrying and not interrupted and not keyboard_interrupted):
882             _log.info('')
883             _log.info("Retrying %d unexpected failure(s) ..." % len(failures))
884             _log.info('')
885             self._retrying = True
886             retry_summary = ResultSummary(self._expectations, failures.keys())
887             # Note that we intentionally ignore the return value here.
888             self._run_tests(failures.keys(), retry_summary)
889             failures = self._get_failures(retry_summary, include_crashes=True, include_missing=True)
890
891         end_time = time.time()
892
893         self._print_timing_statistics(end_time - start_time, thread_timings, test_timings, individual_test_timings, result_summary)
894         self._print_result_summary(result_summary)
895
896         sys.stdout.flush()
897         sys.stderr.flush()
898
899         self._printer.print_one_line_summary(result_summary.total, result_summary.expected, result_summary.unexpected)
900
901         unexpected_results = summarize_results(self._port, self._expectations, result_summary, retry_summary, individual_test_timings, only_unexpected=True, interrupted=interrupted)
902         self._printer.print_unexpected_results(unexpected_results)
903
904         # Re-raise a KeyboardInterrupt if necessary so the caller can handle it.
905         if keyboard_interrupted:
906             raise KeyboardInterrupt
907
908         # FIXME: remove record_results. It's just used for testing. There's no need
909         # for it to be a commandline argument.
910         if (self._options.record_results and not self._options.dry_run and not keyboard_interrupted):
911             self._port.print_leaks_summary()
912             # Write the same data to log files and upload generated JSON files to appengine server.
913             summarized_results = summarize_results(self._port, self._expectations, result_summary, retry_summary, individual_test_timings, only_unexpected=False, interrupted=interrupted)
914             self._upload_json_files(summarized_results, result_summary, individual_test_timings)
915
916         # Write the summary to disk (results.html) and display it if requested.
917         if not self._options.dry_run:
918             self._copy_results_html_file()
919             if self._options.show_results:
920                 self._show_results_html_file(result_summary)
921
922         # Ignore flaky failures and unexpected passes so we don't turn the
923         # bot red for those.
924         return unexpected_results['num_regressions']
925
926     def start_servers_with_lock(self):
927         assert(self._options.http)
928         self._printer.print_update('Acquiring http lock ...')
929         self._port.acquire_http_lock()
930         self._printer.print_update('Starting HTTP server ...')
931         self._port.start_http_server()
932         self._printer.print_update('Starting WebSocket server ...')
933         self._port.start_websocket_server()
934         self._has_http_lock = True
935
936     def stop_servers_with_lock(self):
937         if self._has_http_lock:
938             self._printer.print_update('Stopping HTTP server ...')
939             self._port.stop_http_server()
940             self._printer.print_update('Stopping WebSocket server ...')
941             self._port.stop_websocket_server()
942             self._printer.print_update('Releasing server lock ...')
943             self._port.release_http_lock()
944             self._has_http_lock = False
945
946     def clean_up_run(self):
947         """Restores the system after we're done running tests."""
948
949         _log.debug("flushing stdout")
950         sys.stdout.flush()
951         _log.debug("flushing stderr")
952         sys.stderr.flush()
953         _log.debug("stopping helper")
954         self._port.stop_helper()
955
956     def update_summary(self, result_summary):
957         """Update the summary and print results with any completed tests."""
958         while True:
959             try:
960                 result = test_results.TestResult.loads(self._result_queue.get_nowait())
961             except Queue.Empty:
962                 self._printer.print_progress(result_summary, self._retrying, self._test_files_list)
963                 return
964
965             self._update_summary_with_result(result_summary, result)
966
967     def _interrupt_if_at_failure_limits(self, result_summary):
968         # Note: The messages in this method are constructed to match old-run-webkit-tests
969         # so that existing buildbot grep rules work.
970         def interrupt_if_at_failure_limit(limit, failure_count, result_summary, message):
971             if limit and failure_count >= limit:
972                 message += " %d tests run." % (result_summary.expected + result_summary.unexpected)
973                 raise TestRunInterruptedException(message)
974
975         interrupt_if_at_failure_limit(
976             self._options.exit_after_n_failures,
977             result_summary.unexpected_failures,
978             result_summary,
979             "Exiting early after %d failures." % result_summary.unexpected_failures)
980         interrupt_if_at_failure_limit(
981             self._options.exit_after_n_crashes_or_timeouts,
982             result_summary.unexpected_crashes + result_summary.unexpected_timeouts,
983             result_summary,
984             # This differs from ORWT because it does not include WebProcess crashes.
985             "Exiting early after %d crashes and %d timeouts." % (result_summary.unexpected_crashes, result_summary.unexpected_timeouts))
986
987     def _update_summary_with_result(self, result_summary, result):
988         if result.type == test_expectations.SKIP:
989             result_summary.add(result, expected=True)
990         else:
991             expected = self._expectations.matches_an_expected_result(result.test_name, result.type, self._options.pixel_tests)
992             result_summary.add(result, expected)
993             exp_str = self._expectations.get_expectations_string(result.test_name)
994             got_str = self._expectations.expectation_to_string(result.type)
995             self._printer.print_test_result(result, expected, exp_str, got_str)
996         self._printer.print_progress(result_summary, self._retrying, self._test_files_list)
997         self._interrupt_if_at_failure_limits(result_summary)
998
999     def _clobber_old_results(self):
1000         # Just clobber the actual test results directories since the other
1001         # files in the results directory are explicitly used for cross-run
1002         # tracking.
1003         self._printer.print_update("Clobbering old results in %s" %
1004                                    self._results_directory)
1005         layout_tests_dir = self._port.layout_tests_dir()
1006         possible_dirs = self._port.test_dirs()
1007         for dirname in possible_dirs:
1008             if self._fs.isdir(self._fs.join(layout_tests_dir, dirname)):
1009                 self._fs.rmtree(self._fs.join(self._results_directory, dirname))
1010
1011     def _get_failures(self, result_summary, include_crashes, include_missing):
1012         """Filters a dict of results and returns only the failures.
1013
1014         Args:
1015           result_summary: the results of the test run
1016           include_crashes: whether crashes are included in the output.
1017             We use False when finding the list of failures to retry
1018             to see if the results were flaky. Although the crashes may also be
1019             flaky, we treat them as if they aren't so that they're not ignored.
1020         Returns:
1021           a dict of files -> results
1022         """
1023         failed_results = {}
1024         for test, result in result_summary.unexpected_results.iteritems():
1025             if (result.type == test_expectations.PASS or
1026                 (result.type == test_expectations.CRASH and not include_crashes) or
1027                 (result.type == test_expectations.MISSING and not include_missing)):
1028                 continue
1029             failed_results[test] = result.type
1030
1031         return failed_results
1032
1033     def _char_for_result(self, result):
1034         result = result.lower()
1035         if result in TestExpectations.EXPECTATIONS:
1036             result_enum_value = TestExpectations.EXPECTATIONS[result]
1037         else:
1038             result_enum_value = TestExpectations.MODIFIERS[result]
1039         return json_layout_results_generator.JSONLayoutResultsGenerator.FAILURE_TO_CHAR[result_enum_value]
1040
1041     def _upload_json_files(self, summarized_results, result_summary, individual_test_timings):
1042         """Writes the results of the test run as JSON files into the results
1043         dir and upload the files to the appengine server.
1044
1045         Args:
1046           unexpected_results: dict of unexpected results
1047           summarized_results: dict of results
1048           result_summary: full summary object
1049           individual_test_timings: list of test times (used by the flakiness
1050             dashboard).
1051         """
1052         _log.debug("Writing JSON files in %s." % self._results_directory)
1053
1054         times_trie = json_results_generator.test_timings_trie(self._port, individual_test_timings)
1055         times_json_path = self._fs.join(self._results_directory, "times_ms.json")
1056         json_results_generator.write_json(self._fs, times_trie, times_json_path)
1057
1058         full_results_path = self._fs.join(self._results_directory, "full_results.json")
1059         json_results_generator.write_json(self._fs, summarized_results, full_results_path)
1060
1061         generator = json_layout_results_generator.JSONLayoutResultsGenerator(
1062             self._port, self._options.builder_name, self._options.build_name,
1063             self._options.build_number, self._results_directory,
1064             BUILDER_BASE_URL, individual_test_timings,
1065             self._expectations, result_summary, self._test_files_list,
1066             self._options.test_results_server,
1067             "layout-tests",
1068             self._options.master_name)
1069
1070         _log.debug("Finished writing JSON files.")
1071
1072         json_files = ["incremental_results.json", "full_results.json", "times_ms.json"]
1073
1074         generator.upload_json_files(json_files)
1075
1076         incremental_results_path = self._fs.join(self._results_directory, "incremental_results.json")
1077
1078         # Remove these files from the results directory so they don't take up too much space on the buildbot.
1079         # The tools use the version we uploaded to the results server anyway.
1080         self._fs.remove(times_json_path)
1081         self._fs.remove(incremental_results_path)
1082
1083     def print_config(self):
1084         """Prints the configuration for the test run."""
1085         p = self._printer
1086         p.print_config("Using port '%s'" % self._port.name())
1087         p.print_config("Test configuration: %s" % self._port.test_configuration())
1088         p.print_config("Placing test results in %s" % self._results_directory)
1089         if self._options.new_baseline:
1090             p.print_config("Placing new baselines in %s" %
1091                            self._port.baseline_path())
1092
1093         fallback_path = [self._fs.split(x)[1] for x in self._port.baseline_search_path()]
1094         p.print_config("Baseline search path: %s -> generic" % " -> ".join(fallback_path))
1095
1096         p.print_config("Using %s build" % self._options.configuration)
1097         if self._options.pixel_tests:
1098             p.print_config("Pixel tests enabled")
1099         else:
1100             p.print_config("Pixel tests disabled")
1101
1102         p.print_config("Regular timeout: %s, slow test timeout: %s" %
1103                        (self._options.time_out_ms,
1104                         self._options.slow_time_out_ms))
1105
1106         p.print_config('Command line: ' +
1107                        ' '.join(self._port.driver_cmd_line()))
1108         p.print_config("Worker model: %s" % self._options.worker_model)
1109         p.print_config("")
1110
1111     def _print_expected_results_of_type(self, result_summary,
1112                                         result_type, result_type_str):
1113         """Print the number of the tests in a given result class.
1114
1115         Args:
1116           result_summary - the object containing all the results to report on
1117           result_type - the particular result type to report in the summary.
1118           result_type_str - a string description of the result_type.
1119         """
1120         tests = self._expectations.get_tests_with_result_type(result_type)
1121         now = result_summary.tests_by_timeline[test_expectations.NOW]
1122         wontfix = result_summary.tests_by_timeline[test_expectations.WONTFIX]
1123
1124         # We use a fancy format string in order to print the data out in a
1125         # nicely-aligned table.
1126         fmtstr = ("Expect: %%5d %%-8s (%%%dd now, %%%dd wontfix)"
1127                   % (self._num_digits(now), self._num_digits(wontfix)))
1128         self._printer.print_expected(fmtstr %
1129             (len(tests), result_type_str, len(tests & now), len(tests & wontfix)))
1130
1131     def _num_digits(self, num):
1132         """Returns the number of digits needed to represent the length of a
1133         sequence."""
1134         ndigits = 1
1135         if len(num):
1136             ndigits = int(math.log10(len(num))) + 1
1137         return ndigits
1138
1139     def _print_timing_statistics(self, total_time, thread_timings,
1140                                directory_test_timings, individual_test_timings,
1141                                result_summary):
1142         """Record timing-specific information for the test run.
1143
1144         Args:
1145           total_time: total elapsed time (in seconds) for the test run
1146           thread_timings: wall clock time each thread ran for
1147           directory_test_timings: timing by directory
1148           individual_test_timings: timing by file
1149           result_summary: summary object for the test run
1150         """
1151         self._printer.print_timing("Test timing:")
1152         self._printer.print_timing("  %6.2f total testing time" % total_time)
1153         self._printer.print_timing("")
1154         self._printer.print_timing("Thread timing:")
1155         cuml_time = 0
1156         for t in thread_timings:
1157             self._printer.print_timing("    %10s: %5d tests, %6.2f secs" %
1158                   (t['name'], t['num_tests'], t['total_time']))
1159             cuml_time += t['total_time']
1160         self._printer.print_timing("   %6.2f cumulative, %6.2f optimal" %
1161               (cuml_time, cuml_time / int(self._options.child_processes)))
1162         self._printer.print_timing("")
1163
1164         self._print_aggregate_test_statistics(individual_test_timings)
1165         self._print_individual_test_times(individual_test_timings,
1166                                           result_summary)
1167         self._print_directory_timings(directory_test_timings)
1168
1169     def _print_aggregate_test_statistics(self, individual_test_timings):
1170         """Prints aggregate statistics (e.g. median, mean, etc.) for all tests.
1171         Args:
1172           individual_test_timings: List of TestResults for all tests.
1173         """
1174         times_for_dump_render_tree = [test_stats.test_run_time for test_stats in individual_test_timings]
1175         self._print_statistics_for_test_timings("PER TEST TIME IN TESTSHELL (seconds):",
1176                                                 times_for_dump_render_tree)
1177
1178     def _print_individual_test_times(self, individual_test_timings,
1179                                   result_summary):
1180         """Prints the run times for slow, timeout and crash tests.
1181         Args:
1182           individual_test_timings: List of TestStats for all tests.
1183           result_summary: summary object for test run
1184         """
1185         # Reverse-sort by the time spent in DumpRenderTree.
1186         individual_test_timings.sort(lambda a, b:
1187             cmp(b.test_run_time, a.test_run_time))
1188
1189         num_printed = 0
1190         slow_tests = []
1191         timeout_or_crash_tests = []
1192         unexpected_slow_tests = []
1193         for test_tuple in individual_test_timings:
1194             test_name = test_tuple.test_name
1195             is_timeout_crash_or_slow = False
1196             if self._test_is_slow(test_name):
1197                 is_timeout_crash_or_slow = True
1198                 slow_tests.append(test_tuple)
1199
1200             if test_name in result_summary.failures:
1201                 result = result_summary.results[test_name].type
1202                 if (result == test_expectations.TIMEOUT or
1203                     result == test_expectations.CRASH):
1204                     is_timeout_crash_or_slow = True
1205                     timeout_or_crash_tests.append(test_tuple)
1206
1207             if (not is_timeout_crash_or_slow and
1208                 num_printed < printing.NUM_SLOW_TESTS_TO_LOG):
1209                 num_printed = num_printed + 1
1210                 unexpected_slow_tests.append(test_tuple)
1211
1212         self._printer.print_timing("")
1213         self._print_test_list_timing("%s slowest tests that are not "
1214             "marked as SLOW and did not timeout/crash:" %
1215             printing.NUM_SLOW_TESTS_TO_LOG, unexpected_slow_tests)
1216         self._printer.print_timing("")
1217         self._print_test_list_timing("Tests marked as SLOW:", slow_tests)
1218         self._printer.print_timing("")
1219         self._print_test_list_timing("Tests that timed out or crashed:",
1220                                      timeout_or_crash_tests)
1221         self._printer.print_timing("")
1222
1223     def _print_test_list_timing(self, title, test_list):
1224         """Print timing info for each test.
1225
1226         Args:
1227           title: section heading
1228           test_list: tests that fall in this section
1229         """
1230         if self._printer.disabled('slowest'):
1231             return
1232
1233         self._printer.print_timing(title)
1234         for test_tuple in test_list:
1235             test_run_time = round(test_tuple.test_run_time, 1)
1236             self._printer.print_timing("  %s took %s seconds" % (test_tuple.test_name, test_run_time))
1237
1238     def _print_directory_timings(self, directory_test_timings):
1239         """Print timing info by directory for any directories that
1240         take > 10 seconds to run.
1241
1242         Args:
1243           directory_test_timing: time info for each directory
1244         """
1245         timings = []
1246         for directory in directory_test_timings:
1247             num_tests, time_for_directory = directory_test_timings[directory]
1248             timings.append((round(time_for_directory, 1), directory,
1249                             num_tests))
1250         timings.sort()
1251
1252         self._printer.print_timing("Time to process slowest subdirectories:")
1253         min_seconds_to_print = 10
1254         for timing in timings:
1255             if timing[0] > min_seconds_to_print:
1256                 self._printer.print_timing(
1257                     "  %s took %s seconds to run %s tests." % (timing[1],
1258                     timing[0], timing[2]))
1259         self._printer.print_timing("")
1260
1261     def _print_statistics_for_test_timings(self, title, timings):
1262         """Prints the median, mean and standard deviation of the values in
1263         timings.
1264
1265         Args:
1266           title: Title for these timings.
1267           timings: A list of floats representing times.
1268         """
1269         self._printer.print_timing(title)
1270         timings.sort()
1271
1272         num_tests = len(timings)
1273         if not num_tests:
1274             return
1275         percentile90 = timings[int(.9 * num_tests)]
1276         percentile99 = timings[int(.99 * num_tests)]
1277
1278         if num_tests % 2 == 1:
1279             median = timings[((num_tests - 1) / 2) - 1]
1280         else:
1281             lower = timings[num_tests / 2 - 1]
1282             upper = timings[num_tests / 2]
1283             median = (float(lower + upper)) / 2
1284
1285         mean = sum(timings) / num_tests
1286
1287         for timing in timings:
1288             sum_of_deviations = math.pow(timing - mean, 2)
1289
1290         std_deviation = math.sqrt(sum_of_deviations / num_tests)
1291         self._printer.print_timing("  Median:          %6.3f" % median)
1292         self._printer.print_timing("  Mean:            %6.3f" % mean)
1293         self._printer.print_timing("  90th percentile: %6.3f" % percentile90)
1294         self._printer.print_timing("  99th percentile: %6.3f" % percentile99)
1295         self._printer.print_timing("  Standard dev:    %6.3f" % std_deviation)
1296         self._printer.print_timing("")
1297
1298     def _print_result_summary(self, result_summary):
1299         """Print a short summary about how many tests passed.
1300
1301         Args:
1302           result_summary: information to log
1303         """
1304         failed = len(result_summary.failures)
1305         skipped = len(
1306             result_summary.tests_by_expectation[test_expectations.SKIP])
1307         total = result_summary.total
1308         passed = total - failed - skipped
1309         pct_passed = 0.0
1310         if total > 0:
1311             pct_passed = float(passed) * 100 / total
1312
1313         self._printer.print_actual("")
1314         self._printer.print_actual("=> Results: %d/%d tests passed (%.1f%%)" %
1315                      (passed, total, pct_passed))
1316         self._printer.print_actual("")
1317         self._print_result_summary_entry(result_summary,
1318             test_expectations.NOW, "Tests to be fixed")
1319
1320         self._printer.print_actual("")
1321         self._print_result_summary_entry(result_summary,
1322             test_expectations.WONTFIX,
1323             "Tests that will only be fixed if they crash (WONTFIX)")
1324         self._printer.print_actual("")
1325
1326     def _print_result_summary_entry(self, result_summary, timeline,
1327                                     heading):
1328         """Print a summary block of results for a particular timeline of test.
1329
1330         Args:
1331           result_summary: summary to print results for
1332           timeline: the timeline to print results for (NOT, WONTFIX, etc.)
1333           heading: a textual description of the timeline
1334         """
1335         total = len(result_summary.tests_by_timeline[timeline])
1336         not_passing = (total -
1337            len(result_summary.tests_by_expectation[test_expectations.PASS] &
1338                result_summary.tests_by_timeline[timeline]))
1339         self._printer.print_actual("=> %s (%d):" % (heading, not_passing))
1340
1341         for result in TestExpectations.EXPECTATION_ORDER:
1342             if result == test_expectations.PASS:
1343                 continue
1344             results = (result_summary.tests_by_expectation[result] &
1345                        result_summary.tests_by_timeline[timeline])
1346             desc = TestExpectations.EXPECTATION_DESCRIPTIONS[result]
1347             if not_passing and len(results):
1348                 pct = len(results) * 100.0 / not_passing
1349                 self._printer.print_actual("  %5d %-24s (%4.1f%%)" %
1350                     (len(results), desc[len(results) != 1], pct))
1351
1352     def _copy_results_html_file(self):
1353         base_dir = self._port.path_from_webkit_base('LayoutTests', 'fast', 'harness')
1354         results_file = self._fs.join(base_dir, 'results.html')
1355         # FIXME: What should we do if this doesn't exist (e.g., in unit tests)?
1356         if self._fs.exists(results_file):
1357             self._fs.copyfile(results_file, self._fs.join(self._results_directory, "results.html"))
1358
1359     def _show_results_html_file(self, result_summary):
1360         """Shows the results.html page."""
1361         if self._options.full_results_html:
1362             test_files = result_summary.failures.keys()
1363         else:
1364             unexpected_failures = self._get_failures(result_summary, include_crashes=True, include_missing=True)
1365             test_files = unexpected_failures.keys()
1366
1367         if not len(test_files):
1368             return
1369
1370         results_filename = self._fs.join(self._results_directory, "results.html")
1371         self._port.show_results_html_file(results_filename)
1372
1373     def name(self):
1374         return 'Manager'
1375
1376     def is_done(self):
1377         worker_states = self._worker_states.values()
1378         return worker_states and all(self._worker_is_done(worker_state) for worker_state in worker_states)
1379
1380     # FIXME: Inline this function.
1381     def _worker_is_done(self, worker_state):
1382         return worker_state.done
1383
1384     def cancel_workers(self):
1385         for worker_state in self._worker_states.values():
1386             worker_state.worker_connection.cancel()
1387
1388     def handle_started_test(self, source, test_info, hang_timeout):
1389         worker_state = self._worker_states[source]
1390         worker_state.current_test_name = test_info.test_name
1391         worker_state.next_timeout = time.time() + hang_timeout
1392
1393     def handle_done(self, source):
1394         worker_state = self._worker_states[source]
1395         worker_state.done = True
1396
1397     def handle_exception(self, source, exception_type, exception_value, stack):
1398         if exception_type in (KeyboardInterrupt, TestRunInterruptedException):
1399             raise exception_type(exception_value)
1400         _log.error("%s raised %s('%s'):" % (
1401                    source,
1402                    exception_value.__class__.__name__,
1403                    str(exception_value)))
1404         self._log_worker_stack(stack)
1405         raise WorkerException(str(exception_value))
1406
1407     def handle_finished_list(self, source, list_name, num_tests, elapsed_time):
1408         self._group_stats[list_name] = (num_tests, elapsed_time)
1409
1410         def find(name, test_lists):
1411             for i in range(len(test_lists)):
1412                 if test_lists[i].name == name:
1413                     return i
1414             return -1
1415
1416         index = find(list_name, self._remaining_locked_shards)
1417         if index >= 0:
1418             self._remaining_locked_shards.pop(index)
1419             if not self._remaining_locked_shards:
1420                 self.stop_servers_with_lock()
1421
1422     def handle_finished_test(self, source, result, elapsed_time):
1423         worker_state = self._worker_states[source]
1424         worker_state.next_timeout = None
1425         worker_state.current_test_name = None
1426         worker_state.stats['total_time'] += elapsed_time
1427         worker_state.stats['num_tests'] += 1
1428
1429         self._all_results.append(result)
1430         self._update_summary_with_result(self._current_result_summary, result)
1431
1432     def _log_worker_stack(self, stack):
1433         webkitpydir = self._port.path_from_webkit_base('Tools', 'Scripts', 'webkitpy') + self._port.filesystem.sep
1434         for filename, line_number, function_name, text in stack:
1435             if filename.startswith(webkitpydir):
1436                 filename = filename.replace(webkitpydir, '')
1437             _log.error('  %s:%u (in %s)' % (filename, line_number, function_name))
1438             _log.error('    %s' % text)
1439
1440
1441 def read_test_files(fs, filenames, test_path_separator):
1442     tests = []
1443     for filename in filenames:
1444         try:
1445             if test_path_separator != fs.sep:
1446                 filename = filename.replace(test_path_separator, fs.sep)
1447             file_contents = fs.read_text_file(filename).split('\n')
1448             for line in file_contents:
1449                 line = test_expectations.strip_comments(line)
1450                 if line:
1451                     tests.append(line)
1452         except IOError, e:
1453             if e.errno == errno.ENOENT:
1454                 _log.critical('')
1455                 _log.critical('--test-list file "%s" not found' % file)
1456             raise
1457     return tests
1458
1459
1460 # FIXME: These two free functions belong either on manager (since it's the only one
1461 # which uses them) or in a different file (if they need to be re-used).
1462 def test_key(port, test_name):
1463     """Turns a test name into a list with two sublists, the natural key of the
1464     dirname, and the natural key of the basename.
1465
1466     This can be used when sorting paths so that files in a directory.
1467     directory are kept together rather than being mixed in with files in
1468     subdirectories."""
1469     dirname, basename = port.split_test(test_name)
1470     return (natural_sort_key(dirname + port.TEST_PATH_SEPARATOR), natural_sort_key(basename))
1471
1472
1473 def natural_sort_key(string_to_split):
1474     """ Turn a string into a list of string and number chunks.
1475         "z23a" -> ["z", 23, "a"]
1476
1477         Can be used to implement "natural sort" order. See:
1478             http://www.codinghorror.com/blog/2007/12/sorting-for-humans-natural-sort-order.html
1479             http://nedbatchelder.com/blog/200712.html#e20071211T054956
1480     """
1481     def tryint(val):
1482         try:
1483             return int(val)
1484         except ValueError:
1485             return val
1486
1487     return [tryint(chunk) for chunk in re.split('(\d+)', string_to_split)]
1488
1489
1490 class _WorkerState(object):
1491     """A class for the manager to use to track the current state of the workers."""
1492     def __init__(self, number, worker_connection):
1493         self.worker_connection = worker_connection
1494         self.number = number
1495         self.done = False
1496         self.current_test_name = None
1497         self.next_timeout = None
1498         self.stats = {}
1499         self.stats['name'] = worker_connection.name
1500         self.stats['num_tests'] = 0
1501         self.stats['total_time'] = 0
1502
1503     def __repr__(self):
1504         return "_WorkerState(" + str(self.__dict__) + ")"