initial import
[vuplus_webkit] / Tools / Scripts / webkitpy / common / net / buildbot / buildbot.py
1 # Copyright (c) 2009, Google Inc. All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions are
5 # met:
6 #
7 #     * Redistributions of source code must retain the above copyright
8 # notice, this list of conditions and the following disclaimer.
9 #     * Redistributions in binary form must reproduce the above
10 # copyright notice, this list of conditions and the following disclaimer
11 # in the documentation and/or other materials provided with the
12 # distribution.
13 #     * Neither the name of Google Inc. nor the names of its
14 # contributors may be used to endorse or promote products derived from
15 # this software without specific prior written permission.
16 #
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #
29 # WebKit's Python module for interacting with WebKit's buildbot
30
31 try:
32     import json
33 except ImportError:
34     # python 2.5 compatibility
35     import webkitpy.thirdparty.simplejson as json
36
37 import operator
38 import re
39 import urllib
40 import urllib2
41
42 import webkitpy.common.config.urls as config_urls
43 from webkitpy.common.net.failuremap import FailureMap
44 from webkitpy.common.net.layouttestresults import LayoutTestResults
45 from webkitpy.common.net.networktransaction import NetworkTransaction
46 from webkitpy.common.net.regressionwindow import RegressionWindow
47 from webkitpy.common.system.logutils import get_logger
48 from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup
49
50
51 _log = get_logger(__file__)
52
53
54 class Builder(object):
55     def __init__(self, name, buildbot):
56         self._name = name
57         self._buildbot = buildbot
58         self._builds_cache = {}
59         self._revision_to_build_number = None
60         from webkitpy.thirdparty.autoinstalled.mechanize import Browser
61         self._browser = Browser()
62         self._browser.set_handle_robots(False) # The builder pages are excluded by robots.txt
63
64     def name(self):
65         return self._name
66
67     def results_url(self):
68         return "%s/results/%s" % (self._buildbot.buildbot_url, self.url_encoded_name())
69
70     # In addition to per-build results, the build.chromium.org builders also
71     # keep a directory that accumulates test results over many runs.
72     def accumulated_results_url(self):
73         return None
74
75     def url_encoded_name(self):
76         return urllib.quote(self._name)
77
78     def url(self):
79         return "%s/builders/%s" % (self._buildbot.buildbot_url, self.url_encoded_name())
80
81     # This provides a single place to mock
82     def _fetch_build(self, build_number):
83         build_dictionary = self._buildbot._fetch_build_dictionary(self, build_number)
84         if not build_dictionary:
85             return None
86         revision_string = build_dictionary['sourceStamp']['revision']
87         return Build(self,
88             build_number=int(build_dictionary['number']),
89             # 'revision' may be None if a trunk build was started by the force-build button on the web page.
90             revision=(int(revision_string) if revision_string else None),
91             # Buildbot uses any nubmer other than 0 to mean fail.  Since we fetch with
92             # filter=1, passing builds may contain no 'results' value.
93             is_green=(not build_dictionary.get('results')),
94         )
95
96     def build(self, build_number):
97         if not build_number:
98             return None
99         cached_build = self._builds_cache.get(build_number)
100         if cached_build:
101             return cached_build
102
103         build = self._fetch_build(build_number)
104         self._builds_cache[build_number] = build
105         return build
106
107     def latest_cached_build(self):
108         revision_build_pairs = self.revision_build_pairs_with_results()
109         revision_build_pairs.sort(key=lambda i: i[1])
110         latest_build_number = revision_build_pairs[-1][1]
111         return self.build(latest_build_number)
112
113     def force_build(self, username="webkit-patch", comments=None):
114         def predicate(form):
115             try:
116                 return form.find_control("username")
117             except Exception, e:
118                 return False
119         self._browser.open(self.url())
120         self._browser.select_form(predicate=predicate)
121         self._browser["username"] = username
122         if comments:
123             self._browser["comments"] = comments
124         return self._browser.submit()
125
126     file_name_regexp = re.compile(r"r(?P<revision>\d+) \((?P<build_number>\d+)\)")
127     def _revision_and_build_for_filename(self, filename):
128         # Example: "r47483 (1)/" or "r47483 (1).zip"
129         match = self.file_name_regexp.match(filename)
130         return (int(match.group("revision")), int(match.group("build_number")))
131
132     def _fetch_revision_to_build_map(self):
133         # All _fetch requests go through _buildbot for easier mocking
134         # FIXME: This should use NetworkTransaction's 404 handling instead.
135         try:
136             # FIXME: This method is horribly slow due to the huge network load.
137             # FIXME: This is a poor way to do revision -> build mapping.
138             # Better would be to ask buildbot through some sort of API.
139             print "Loading revision/build list from %s." % self.results_url()
140             print "This may take a while..."
141             result_files = self._buildbot._fetch_twisted_directory_listing(self.results_url())
142         except urllib2.HTTPError, error:
143             if error.code != 404:
144                 raise
145             result_files = []
146
147         # This assumes there was only one build per revision, which is false but we don't care for now.
148         return dict([self._revision_and_build_for_filename(file_info["filename"]) for file_info in result_files])
149
150     def _revision_to_build_map(self):
151         if not self._revision_to_build_number:
152             self._revision_to_build_number = self._fetch_revision_to_build_map()
153         return self._revision_to_build_number
154
155     def revision_build_pairs_with_results(self):
156         return self._revision_to_build_map().items()
157
158     # This assumes there can be only one build per revision, which is false, but we don't care for now.
159     def build_for_revision(self, revision, allow_failed_lookups=False):
160         # NOTE: This lookup will fail if that exact revision was never built.
161         build_number = self._revision_to_build_map().get(int(revision))
162         if not build_number:
163             return None
164         build = self.build(build_number)
165         if not build and allow_failed_lookups:
166             # Builds for old revisions with fail to lookup via buildbot's json api.
167             build = Build(self,
168                 build_number=build_number,
169                 revision=revision,
170                 is_green=False,
171             )
172         return build
173
174     def find_regression_window(self, red_build, look_back_limit=30):
175         if not red_build or red_build.is_green():
176             return RegressionWindow(None, None)
177         common_failures = None
178         current_build = red_build
179         build_after_current_build = None
180         look_back_count = 0
181         while current_build:
182             if current_build.is_green():
183                 # current_build can't possibly have any failures in common
184                 # with red_build because it's green.
185                 break
186             results = current_build.layout_test_results()
187             # We treat a lack of results as if all the test failed.
188             # This occurs, for example, when we can't compile at all.
189             if results:
190                 failures = set(results.failing_tests())
191                 if common_failures == None:
192                     common_failures = failures
193                 else:
194                     common_failures = common_failures.intersection(failures)
195                     if not common_failures:
196                         # current_build doesn't have any failures in common with
197                         # the red build we're worried about.  We assume that any
198                         # failures in current_build were due to flakiness.
199                         break
200             look_back_count += 1
201             if look_back_count > look_back_limit:
202                 return RegressionWindow(None, current_build, failing_tests=common_failures)
203             build_after_current_build = current_build
204             current_build = current_build.previous_build()
205         # We must iterate at least once because red_build is red.
206         assert(build_after_current_build)
207         # Current build must either be green or have no failures in common
208         # with red build, so we've found our failure transition.
209         return RegressionWindow(current_build, build_after_current_build, failing_tests=common_failures)
210
211     def find_blameworthy_regression_window(self, red_build_number, look_back_limit=30, avoid_flakey_tests=True):
212         red_build = self.build(red_build_number)
213         regression_window = self.find_regression_window(red_build, look_back_limit)
214         if not regression_window.build_before_failure():
215             return None  # We ran off the limit of our search
216         # If avoid_flakey_tests, require at least 2 bad builds before we
217         # suspect a real failure transition.
218         if avoid_flakey_tests and regression_window.failing_build() == red_build:
219             return None
220         return regression_window
221
222
223 class Build(object):
224     def __init__(self, builder, build_number, revision, is_green):
225         self._builder = builder
226         self._number = build_number
227         self._revision = revision
228         self._is_green = is_green
229         self._layout_test_results = None
230
231     @staticmethod
232     def build_url(builder, build_number):
233         return "%s/builds/%s" % (builder.url(), build_number)
234
235     def url(self):
236         return self.build_url(self.builder(), self._number)
237
238     def results_url(self):
239         results_directory = "r%s (%s)" % (self.revision(), self._number)
240         return "%s/%s" % (self._builder.results_url(), urllib.quote(results_directory))
241
242     def results_zip_url(self):
243         return "%s.zip" % self.results_url()
244
245     def _fetch_file_from_results(self, file_name):
246         # It seems this can return None if the url redirects and then returns 404.
247         result = urllib2.urlopen("%s/%s" % (self.results_url(), file_name))
248         if not result:
249             return None
250         # urlopen returns a file-like object which sometimes works fine with str()
251         # but sometimes is a addinfourl object.  In either case calling read() is correct.
252         return result.read()
253
254     def layout_test_results(self):
255         if self._layout_test_results:
256             return self._layout_test_results
257
258         # FIXME: This should cache that the result was a 404 and stop hitting the network.
259         results_file = NetworkTransaction(convert_404_to_None=True).run(lambda: self._fetch_file_from_results("full_results.json"))
260         if not results_file:
261             results_file = NetworkTransaction(convert_404_to_None=True).run(lambda: self._fetch_file_from_results("results.html"))
262
263         # results_from_string accepts either ORWT html or NRWT json.
264         self._layout_test_results = LayoutTestResults.results_from_string(results_file)
265         return self._layout_test_results
266
267     def builder(self):
268         return self._builder
269
270     def revision(self):
271         return self._revision
272
273     def is_green(self):
274         return self._is_green
275
276     def previous_build(self):
277         # previous_build() allows callers to avoid assuming build numbers are sequential.
278         # They may not be sequential across all master changes, or when non-trunk builds are made.
279         return self._builder.build(self._number - 1)
280
281
282 class BuildBot(object):
283     _builder_factory = Builder
284     _default_url = config_urls.buildbot_url
285
286     def __init__(self, url=None):
287         self.buildbot_url = url if url else self._default_url
288         self._builder_by_name = {}
289
290         # If any core builder is red we should not be landing patches.  Other
291         # builders should be added to this list once they are known to be
292         # reliable.
293         # See https://bugs.webkit.org/show_bug.cgi?id=33296 and related bugs.
294         self.core_builder_names_regexps = [
295             "SnowLeopard.*Build",
296             "SnowLeopard.*\(Test",
297             "SnowLeopard.*\(WebKit2 Test",
298             "Leopard.*\((?:Build|Test)",
299             "Windows.*Build",
300             "Windows.*\(Test",
301             "WinCairo",
302             "WinCE",
303             "EFL",
304             "GTK.*32",
305             "GTK.*64.*Debug",  # Disallow the 64-bit Release bot which is broken.
306             "Qt",
307             "Chromium.*(Mac|Linux|Win).*Release$",
308             "Chromium.*(Mac|Linux).*Release.*\(Tests",
309         ]
310
311     def _parse_last_build_cell(self, builder, cell):
312         status_link = cell.find('a')
313         if status_link:
314             # Will be either a revision number or a build number
315             revision_string = status_link.string
316             # If revision_string has non-digits assume it's not a revision number.
317             builder['built_revision'] = int(revision_string) \
318                                         if not re.match('\D', revision_string) \
319                                         else None
320
321             # FIXME: We treat slave lost as green even though it is not to
322             # work around the Qts bot being on a broken internet connection.
323             # The real fix is https://bugs.webkit.org/show_bug.cgi?id=37099
324             builder['is_green'] = not re.search('fail', cell.renderContents()) or \
325                                   not not re.search('lost', cell.renderContents())
326
327             status_link_regexp = r"builders/(?P<builder_name>.*)/builds/(?P<build_number>\d+)"
328             link_match = re.match(status_link_regexp, status_link['href'])
329             builder['build_number'] = int(link_match.group("build_number"))
330         else:
331             # We failed to find a link in the first cell, just give up.  This
332             # can happen if a builder is just-added, the first cell will just
333             # be "no build"
334             # Other parts of the code depend on is_green being present.
335             builder['is_green'] = False
336             builder['built_revision'] = None
337             builder['build_number'] = None
338
339     def _parse_current_build_cell(self, builder, cell):
340         activity_lines = cell.renderContents().split("<br />")
341         builder["activity"] = activity_lines[0] # normally "building" or "idle"
342         # The middle lines document how long left for any current builds.
343         match = re.match("(?P<pending_builds>\d) pending", activity_lines[-1])
344         builder["pending_builds"] = int(match.group("pending_builds")) if match else 0
345
346     def _parse_builder_status_from_row(self, status_row):
347         status_cells = status_row.findAll('td')
348         builder = {}
349
350         # First cell is the name
351         name_link = status_cells[0].find('a')
352         builder["name"] = unicode(name_link.string)
353
354         self._parse_last_build_cell(builder, status_cells[1])
355         self._parse_current_build_cell(builder, status_cells[2])
356         return builder
357
358     def _matches_regexps(self, builder_name, name_regexps):
359         for name_regexp in name_regexps:
360             if re.match(name_regexp, builder_name):
361                 return True
362         return False
363
364     # FIXME: Should move onto Builder
365     def _is_core_builder(self, builder_name):
366         return self._matches_regexps(builder_name, self.core_builder_names_regexps)
367
368     # FIXME: This method needs to die, but is used by a unit test at the moment.
369     def _builder_statuses_with_names_matching_regexps(self, builder_statuses, name_regexps):
370         return [builder for builder in builder_statuses if self._matches_regexps(builder["name"], name_regexps)]
371
372     def red_core_builders(self):
373         return [builder for builder in self.core_builder_statuses() if not builder["is_green"]]
374
375     def red_core_builders_names(self):
376         return [builder["name"] for builder in self.red_core_builders()]
377
378     def idle_red_core_builders(self):
379         return [builder for builder in self.red_core_builders() if builder["activity"] == "idle"]
380
381     def core_builders_are_green(self):
382         return not self.red_core_builders()
383
384     # FIXME: These _fetch methods should move to a networking class.
385     def _fetch_build_dictionary(self, builder, build_number):
386         # Note: filter=1 will remove None and {} and '', which cuts noise but can
387         # cause keys to be missing which you might otherwise expect.
388         # FIXME: The bot sends a *huge* amount of data for each request, we should
389         # find a way to reduce the response size further.
390         json_url = "%s/json/builders/%s/builds/%s?filter=1" % (self.buildbot_url, urllib.quote(builder.name()), build_number)
391         try:
392             return json.load(urllib2.urlopen(json_url))
393         except urllib2.URLError, err:
394             build_url = Build.build_url(builder, build_number)
395             _log.error("Error fetching data for %s build %s (%s, json: %s): %s" % (builder.name(), build_number, build_url, json_url, err))
396             return None
397         except ValueError, err:
398             build_url = Build.build_url(builder, build_number)
399             _log.error("Error decoding json data from %s: %s" % (build_url, err))
400             return None
401
402     def _fetch_one_box_per_builder(self):
403         build_status_url = "%s/one_box_per_builder" % self.buildbot_url
404         return urllib2.urlopen(build_status_url)
405
406     def _file_cell_text(self, file_cell):
407         """Traverses down through firstChild elements until one containing a string is found, then returns that string"""
408         element = file_cell
409         while element.string is None and element.contents:
410             element = element.contents[0]
411         return element.string
412
413     def _parse_twisted_file_row(self, file_row):
414         string_or_empty = lambda string: unicode(string) if string else u""
415         file_cells = file_row.findAll('td')
416         return {
417             "filename": string_or_empty(self._file_cell_text(file_cells[0])),
418             "size": string_or_empty(self._file_cell_text(file_cells[1])),
419             "type": string_or_empty(self._file_cell_text(file_cells[2])),
420             "encoding": string_or_empty(self._file_cell_text(file_cells[3])),
421         }
422
423     def _parse_twisted_directory_listing(self, page):
424         soup = BeautifulSoup(page)
425         # HACK: Match only table rows with a class to ignore twisted header/footer rows.
426         file_rows = soup.find('table').findAll('tr', {'class': re.compile(r'\b(?:directory|file)\b')})
427         return [self._parse_twisted_file_row(file_row) for file_row in file_rows]
428
429     # FIXME: There should be a better way to get this information directly from twisted.
430     def _fetch_twisted_directory_listing(self, url):
431         return self._parse_twisted_directory_listing(urllib2.urlopen(url))
432
433     def builders(self):
434         return [self.builder_with_name(status["name"]) for status in self.builder_statuses()]
435
436     # This method pulls from /one_box_per_builder as an efficient way to get information about
437     def builder_statuses(self):
438         soup = BeautifulSoup(self._fetch_one_box_per_builder())
439         return [self._parse_builder_status_from_row(status_row) for status_row in soup.find('table').findAll('tr')]
440
441     def core_builder_statuses(self):
442         return [builder for builder in self.builder_statuses() if self._is_core_builder(builder["name"])]
443
444     def builder_with_name(self, name):
445         builder = self._builder_by_name.get(name)
446         if not builder:
447             builder = self._builder_factory(name, self)
448             self._builder_by_name[name] = builder
449         return builder
450
451     def failure_map(self, only_core_builders=True):
452         builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses()
453         failure_map = FailureMap()
454         revision_to_failing_bots = {}
455         for builder_status in builder_statuses:
456             if builder_status["is_green"]:
457                 continue
458             builder = self.builder_with_name(builder_status["name"])
459             regression_window = builder.find_blameworthy_regression_window(builder_status["build_number"])
460             if regression_window:
461                 failure_map.add_regression_window(builder, regression_window)
462         return failure_map
463
464     # This makes fewer requests than calling Builder.latest_build would.  It grabs all builder
465     # statuses in one request using self.builder_statuses (fetching /one_box_per_builder instead of builder pages).
466     def _latest_builds_from_builders(self, only_core_builders=True):
467         builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses()
468         return [self.builder_with_name(status["name"]).build(status["build_number"]) for status in builder_statuses]
469
470     def _build_at_or_before_revision(self, build, revision):
471         while build:
472             if build.revision() <= revision:
473                 return build
474             build = build.previous_build()
475
476     def last_green_revision(self, only_core_builders=True):
477         builds = self._latest_builds_from_builders(only_core_builders)
478         target_revision = builds[0].revision()
479         # An alternate way to do this would be to start at one revision and walk backwards
480         # checking builder.build_for_revision, however build_for_revision is very slow on first load.
481         while True:
482             # Make builds agree on revision
483             builds = [self._build_at_or_before_revision(build, target_revision) for build in builds]
484             if None in builds: # One of the builds failed to load from the server.
485                 return None
486             min_revision = min(map(lambda build: build.revision(), builds))
487             if min_revision != target_revision:
488                 target_revision = min_revision
489                 continue # Builds don't all agree on revision, keep searching
490             # Check to make sure they're all green
491             all_are_green = reduce(operator.and_, map(lambda build: build.is_green(), builds))
492             if not all_are_green:
493                 target_revision -= 1
494                 continue
495             return min_revision