code.vuplus.com Git - vuplus_webkit/blob - Tools/Scripts/webkitpy/common/net/buildbot/buildbot.py

   1 # Copyright (c) 2009, Google Inc. All rights reserved.
   2 #
   3 # Redistribution and use in source and binary forms, with or without
   4 # modification, are permitted provided that the following conditions are
   5 # met:
   6 #
   7 #     * Redistributions of source code must retain the above copyright
   8 # notice, this list of conditions and the following disclaimer.
   9 #     * Redistributions in binary form must reproduce the above
  10 # copyright notice, this list of conditions and the following disclaimer
  11 # in the documentation and/or other materials provided with the
  12 # distribution.
  13 #     * Neither the name of Google Inc. nor the names of its
  14 # contributors may be used to endorse or promote products derived from
  15 # this software without specific prior written permission.
  16 #
  17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28 #
  29 # WebKit's Python module for interacting with WebKit's buildbot
  30
  31 try:
  32     import json
  33 except ImportError:
  34     # python 2.5 compatibility
  35     import webkitpy.thirdparty.simplejson as json
  36
  37 import operator
  38 import re
  39 import urllib
  40 import urllib2
  41
  42 import webkitpy.common.config.urls as config_urls
  43 from webkitpy.common.net.failuremap import FailureMap
  44 from webkitpy.common.net.layouttestresults import LayoutTestResults
  45 from webkitpy.common.net.networktransaction import NetworkTransaction
  46 from webkitpy.common.net.regressionwindow import RegressionWindow
  47 from webkitpy.common.system.logutils import get_logger
  48 from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup
  49
  50
  51 _log = get_logger(__file__)
  52
  53
  54 class Builder(object):
  55     def __init__(self, name, buildbot):
  56         self._name = name
  57         self._buildbot = buildbot
  58         self._builds_cache = {}
  59         self._revision_to_build_number = None
  60         from webkitpy.thirdparty.autoinstalled.mechanize import Browser
  61         self._browser = Browser()
  62         self._browser.set_handle_robots(False) # The builder pages are excluded by robots.txt
  63
  64     def name(self):
  65         return self._name
  66
  67     def results_url(self):
  68         return "%s/results/%s" % (self._buildbot.buildbot_url, self.url_encoded_name())
  69
  70     # In addition to per-build results, the build.chromium.org builders also
  71     # keep a directory that accumulates test results over many runs.
  72     def accumulated_results_url(self):
  73         return None
  74
  75     def url_encoded_name(self):
  76         return urllib.quote(self._name)
  77
  78     def url(self):
  79         return "%s/builders/%s" % (self._buildbot.buildbot_url, self.url_encoded_name())
  80
  81     # This provides a single place to mock
  82     def _fetch_build(self, build_number):
  83         build_dictionary = self._buildbot._fetch_build_dictionary(self, build_number)
  84         if not build_dictionary:
  85             return None
  86         revision_string = build_dictionary['sourceStamp']['revision']
  87         return Build(self,
  88             build_number=int(build_dictionary['number']),
  89             # 'revision' may be None if a trunk build was started by the force-build button on the web page.
  90             revision=(int(revision_string) if revision_string else None),
  91             # Buildbot uses any nubmer other than 0 to mean fail.  Since we fetch with
  92             # filter=1, passing builds may contain no 'results' value.
  93             is_green=(not build_dictionary.get('results')),
  94         )
  95
  96     def build(self, build_number):
  97         if not build_number:
  98             return None
  99         cached_build = self._builds_cache.get(build_number)
 100         if cached_build:
 101             return cached_build
 102
 103         build = self._fetch_build(build_number)
 104         self._builds_cache[build_number] = build
 105         return build
 106
 107     def latest_cached_build(self):
 108         revision_build_pairs = self.revision_build_pairs_with_results()
 109         revision_build_pairs.sort(key=lambda i: i[1])
 110         latest_build_number = revision_build_pairs[-1][1]
 111         return self.build(latest_build_number)
 112
 113     def force_build(self, username="webkit-patch", comments=None):
 114         def predicate(form):
 115             try:
 116                 return form.find_control("username")
 117             except Exception, e:
 118                 return False
 119         self._browser.open(self.url())
 120         self._browser.select_form(predicate=predicate)
 121         self._browser["username"] = username
 122         if comments:
 123             self._browser["comments"] = comments
 124         return self._browser.submit()
 125
 126     file_name_regexp = re.compile(r"r(?P<revision>\d+) \((?P<build_number>\d+)\)")
 127     def _revision_and_build_for_filename(self, filename):
 128         # Example: "r47483 (1)/" or "r47483 (1).zip"
 129         match = self.file_name_regexp.match(filename)
 130         return (int(match.group("revision")), int(match.group("build_number")))
 131
 132     def _fetch_revision_to_build_map(self):
 133         # All _fetch requests go through _buildbot for easier mocking
 134         # FIXME: This should use NetworkTransaction's 404 handling instead.
 135         try:
 136             # FIXME: This method is horribly slow due to the huge network load.
 137             # FIXME: This is a poor way to do revision -> build mapping.
 138             # Better would be to ask buildbot through some sort of API.
 139             print "Loading revision/build list from %s." % self.results_url()
 140             print "This may take a while..."
 141             result_files = self._buildbot._fetch_twisted_directory_listing(self.results_url())
 142         except urllib2.HTTPError, error:
 143             if error.code != 404:
 144                 raise
 145             result_files = []
 146
 147         # This assumes there was only one build per revision, which is false but we don't care for now.
 148         return dict([self._revision_and_build_for_filename(file_info["filename"]) for file_info in result_files])
 149
 150     def _revision_to_build_map(self):
 151         if not self._revision_to_build_number:
 152             self._revision_to_build_number = self._fetch_revision_to_build_map()
 153         return self._revision_to_build_number
 154
 155     def revision_build_pairs_with_results(self):
 156         return self._revision_to_build_map().items()
 157
 158     # This assumes there can be only one build per revision, which is false, but we don't care for now.
 159     def build_for_revision(self, revision, allow_failed_lookups=False):
 160         # NOTE: This lookup will fail if that exact revision was never built.
 161         build_number = self._revision_to_build_map().get(int(revision))
 162         if not build_number:
 163             return None
 164         build = self.build(build_number)
 165         if not build and allow_failed_lookups:
 166             # Builds for old revisions with fail to lookup via buildbot's json api.
 167             build = Build(self,
 168                 build_number=build_number,
 169                 revision=revision,
 170                 is_green=False,
 171             )
 172         return build
 173
 174     def find_regression_window(self, red_build, look_back_limit=30):
 175         if not red_build or red_build.is_green():
 176             return RegressionWindow(None, None)
 177         common_failures = None
 178         current_build = red_build
 179         build_after_current_build = None
 180         look_back_count = 0
 181         while current_build:
 182             if current_build.is_green():
 183                 # current_build can't possibly have any failures in common
 184                 # with red_build because it's green.
 185                 break
 186             results = current_build.layout_test_results()
 187             # We treat a lack of results as if all the test failed.
 188             # This occurs, for example, when we can't compile at all.
 189             if results:
 190                 failures = set(results.failing_tests())
 191                 if common_failures == None:
 192                     common_failures = failures
 193                 else:
 194                     common_failures = common_failures.intersection(failures)
 195                     if not common_failures:
 196                         # current_build doesn't have any failures in common with
 197                         # the red build we're worried about.  We assume that any
 198                         # failures in current_build were due to flakiness.
 199                         break
 200             look_back_count += 1
 201             if look_back_count > look_back_limit:
 202                 return RegressionWindow(None, current_build, failing_tests=common_failures)
 203             build_after_current_build = current_build
 204             current_build = current_build.previous_build()
 205         # We must iterate at least once because red_build is red.
 206         assert(build_after_current_build)
 207         # Current build must either be green or have no failures in common
 208         # with red build, so we've found our failure transition.
 209         return RegressionWindow(current_build, build_after_current_build, failing_tests=common_failures)
 210
 211     def find_blameworthy_regression_window(self, red_build_number, look_back_limit=30, avoid_flakey_tests=True):
 212         red_build = self.build(red_build_number)
 213         regression_window = self.find_regression_window(red_build, look_back_limit)
 214         if not regression_window.build_before_failure():
 215             return None  # We ran off the limit of our search
 216         # If avoid_flakey_tests, require at least 2 bad builds before we
 217         # suspect a real failure transition.
 218         if avoid_flakey_tests and regression_window.failing_build() == red_build:
 219             return None
 220         return regression_window
 221
 222
 223 class Build(object):
 224     def __init__(self, builder, build_number, revision, is_green):
 225         self._builder = builder
 226         self._number = build_number
 227         self._revision = revision
 228         self._is_green = is_green
 229         self._layout_test_results = None
 230
 231     @staticmethod
 232     def build_url(builder, build_number):
 233         return "%s/builds/%s" % (builder.url(), build_number)
 234
 235     def url(self):
 236         return self.build_url(self.builder(), self._number)
 237
 238     def results_url(self):
 239         results_directory = "r%s (%s)" % (self.revision(), self._number)
 240         return "%s/%s" % (self._builder.results_url(), urllib.quote(results_directory))
 241
 242     def results_zip_url(self):
 243         return "%s.zip" % self.results_url()
 244
 245     def _fetch_file_from_results(self, file_name):
 246         # It seems this can return None if the url redirects and then returns 404.
 247         result = urllib2.urlopen("%s/%s" % (self.results_url(), file_name))
 248         if not result:
 249             return None
 250         # urlopen returns a file-like object which sometimes works fine with str()
 251         # but sometimes is a addinfourl object.  In either case calling read() is correct.
 252         return result.read()
 253
 254     def layout_test_results(self):
 255         if self._layout_test_results:
 256             return self._layout_test_results
 257
 258         # FIXME: This should cache that the result was a 404 and stop hitting the network.
 259         results_file = NetworkTransaction(convert_404_to_None=True).run(lambda: self._fetch_file_from_results("full_results.json"))
 260         if not results_file:
 261             results_file = NetworkTransaction(convert_404_to_None=True).run(lambda: self._fetch_file_from_results("results.html"))
 262
 263         # results_from_string accepts either ORWT html or NRWT json.
 264         self._layout_test_results = LayoutTestResults.results_from_string(results_file)
 265         return self._layout_test_results
 266
 267     def builder(self):
 268         return self._builder
 269
 270     def revision(self):
 271         return self._revision
 272
 273     def is_green(self):
 274         return self._is_green
 275
 276     def previous_build(self):
 277         # previous_build() allows callers to avoid assuming build numbers are sequential.
 278         # They may not be sequential across all master changes, or when non-trunk builds are made.
 279         return self._builder.build(self._number - 1)
 280
 281
 282 class BuildBot(object):
 283     _builder_factory = Builder
 284     _default_url = config_urls.buildbot_url
 285
 286     def __init__(self, url=None):
 287         self.buildbot_url = url if url else self._default_url
 288         self._builder_by_name = {}
 289
 290         # If any core builder is red we should not be landing patches.  Other
 291         # builders should be added to this list once they are known to be
 292         # reliable.
 293         # See https://bugs.webkit.org/show_bug.cgi?id=33296 and related bugs.
 294         self.core_builder_names_regexps = [
 295             "SnowLeopard.*Build",
 296             "SnowLeopard.*\(Test",
 297             "SnowLeopard.*\(WebKit2 Test",
 298             "Leopard.*\((?:Build|Test)",
 299             "Windows.*Build",
 300             "Windows.*\(Test",
 301             "WinCairo",
 302             "WinCE",
 303             "EFL",
 304             "GTK.*32",
 305             "GTK.*64.*Debug",  # Disallow the 64-bit Release bot which is broken.
 306             "Qt",
 307             "Chromium.*(Mac|Linux|Win).*Release$",
 308             "Chromium.*(Mac|Linux).*Release.*\(Tests",
 309         ]
 310
 311     def _parse_last_build_cell(self, builder, cell):
 312         status_link = cell.find('a')
 313         if status_link:
 314             # Will be either a revision number or a build number
 315             revision_string = status_link.string
 316             # If revision_string has non-digits assume it's not a revision number.
 317             builder['built_revision'] = int(revision_string) \
 318                                         if not re.match('\D', revision_string) \
 319                                         else None
 320
 321             # FIXME: We treat slave lost as green even though it is not to
 322             # work around the Qts bot being on a broken internet connection.
 323             # The real fix is https://bugs.webkit.org/show_bug.cgi?id=37099
 324             builder['is_green'] = not re.search('fail', cell.renderContents()) or \
 325                                   not not re.search('lost', cell.renderContents())
 326
 327             status_link_regexp = r"builders/(?P<builder_name>.*)/builds/(?P<build_number>\d+)"
 328             link_match = re.match(status_link_regexp, status_link['href'])
 329             builder['build_number'] = int(link_match.group("build_number"))
 330         else:
 331             # We failed to find a link in the first cell, just give up.  This
 332             # can happen if a builder is just-added, the first cell will just
 333             # be "no build"
 334             # Other parts of the code depend on is_green being present.
 335             builder['is_green'] = False
 336             builder['built_revision'] = None
 337             builder['build_number'] = None
 338
 339     def _parse_current_build_cell(self, builder, cell):
 340         activity_lines = cell.renderContents().split("<br />")
 341         builder["activity"] = activity_lines[0] # normally "building" or "idle"
 342         # The middle lines document how long left for any current builds.
 343         match = re.match("(?P<pending_builds>\d) pending", activity_lines[-1])
 344         builder["pending_builds"] = int(match.group("pending_builds")) if match else 0
 345
 346     def _parse_builder_status_from_row(self, status_row):
 347         status_cells = status_row.findAll('td')
 348         builder = {}
 349
 350         # First cell is the name
 351         name_link = status_cells[0].find('a')
 352         builder["name"] = unicode(name_link.string)
 353
 354         self._parse_last_build_cell(builder, status_cells[1])
 355         self._parse_current_build_cell(builder, status_cells[2])
 356         return builder
 357
 358     def _matches_regexps(self, builder_name, name_regexps):
 359         for name_regexp in name_regexps:
 360             if re.match(name_regexp, builder_name):
 361                 return True
 362         return False
 363
 364     # FIXME: Should move onto Builder
 365     def _is_core_builder(self, builder_name):
 366         return self._matches_regexps(builder_name, self.core_builder_names_regexps)
 367
 368     # FIXME: This method needs to die, but is used by a unit test at the moment.
 369     def _builder_statuses_with_names_matching_regexps(self, builder_statuses, name_regexps):
 370         return [builder for builder in builder_statuses if self._matches_regexps(builder["name"], name_regexps)]
 371
 372     def red_core_builders(self):
 373         return [builder for builder in self.core_builder_statuses() if not builder["is_green"]]
 374
 375     def red_core_builders_names(self):
 376         return [builder["name"] for builder in self.red_core_builders()]
 377
 378     def idle_red_core_builders(self):
 379         return [builder for builder in self.red_core_builders() if builder["activity"] == "idle"]
 380
 381     def core_builders_are_green(self):
 382         return not self.red_core_builders()
 383
 384     # FIXME: These _fetch methods should move to a networking class.
 385     def _fetch_build_dictionary(self, builder, build_number):
 386         # Note: filter=1 will remove None and {} and '', which cuts noise but can
 387         # cause keys to be missing which you might otherwise expect.
 388         # FIXME: The bot sends a *huge* amount of data for each request, we should
 389         # find a way to reduce the response size further.
 390         json_url = "%s/json/builders/%s/builds/%s?filter=1" % (self.buildbot_url, urllib.quote(builder.name()), build_number)
 391         try:
 392             return json.load(urllib2.urlopen(json_url))
 393         except urllib2.URLError, err:
 394             build_url = Build.build_url(builder, build_number)
 395             _log.error("Error fetching data for %s build %s (%s, json: %s): %s" % (builder.name(), build_number, build_url, json_url, err))
 396             return None
 397         except ValueError, err:
 398             build_url = Build.build_url(builder, build_number)
 399             _log.error("Error decoding json data from %s: %s" % (build_url, err))
 400             return None
 401
 402     def _fetch_one_box_per_builder(self):
 403         build_status_url = "%s/one_box_per_builder" % self.buildbot_url
 404         return urllib2.urlopen(build_status_url)
 405
 406     def _file_cell_text(self, file_cell):
 407         """Traverses down through firstChild elements until one containing a string is found, then returns that string"""
 408         element = file_cell
 409         while element.string is None and element.contents:
 410             element = element.contents[0]
 411         return element.string
 412
 413     def _parse_twisted_file_row(self, file_row):
 414         string_or_empty = lambda string: unicode(string) if string else u""
 415         file_cells = file_row.findAll('td')
 416         return {
 417             "filename": string_or_empty(self._file_cell_text(file_cells[0])),
 418             "size": string_or_empty(self._file_cell_text(file_cells[1])),
 419             "type": string_or_empty(self._file_cell_text(file_cells[2])),
 420             "encoding": string_or_empty(self._file_cell_text(file_cells[3])),
 421         }
 422
 423     def _parse_twisted_directory_listing(self, page):
 424         soup = BeautifulSoup(page)
 425         # HACK: Match only table rows with a class to ignore twisted header/footer rows.
 426         file_rows = soup.find('table').findAll('tr', {'class': re.compile(r'\b(?:directory|file)\b')})
 427         return [self._parse_twisted_file_row(file_row) for file_row in file_rows]
 428
 429     # FIXME: There should be a better way to get this information directly from twisted.
 430     def _fetch_twisted_directory_listing(self, url):
 431         return self._parse_twisted_directory_listing(urllib2.urlopen(url))
 432
 433     def builders(self):
 434         return [self.builder_with_name(status["name"]) for status in self.builder_statuses()]
 435
 436     # This method pulls from /one_box_per_builder as an efficient way to get information about
 437     def builder_statuses(self):
 438         soup = BeautifulSoup(self._fetch_one_box_per_builder())
 439         return [self._parse_builder_status_from_row(status_row) for status_row in soup.find('table').findAll('tr')]
 440
 441     def core_builder_statuses(self):
 442         return [builder for builder in self.builder_statuses() if self._is_core_builder(builder["name"])]
 443
 444     def builder_with_name(self, name):
 445         builder = self._builder_by_name.get(name)
 446         if not builder:
 447             builder = self._builder_factory(name, self)
 448             self._builder_by_name[name] = builder
 449         return builder
 450
 451     def failure_map(self, only_core_builders=True):
 452         builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses()
 453         failure_map = FailureMap()
 454         revision_to_failing_bots = {}
 455         for builder_status in builder_statuses:
 456             if builder_status["is_green"]:
 457                 continue
 458             builder = self.builder_with_name(builder_status["name"])
 459             regression_window = builder.find_blameworthy_regression_window(builder_status["build_number"])
 460             if regression_window:
 461                 failure_map.add_regression_window(builder, regression_window)
 462         return failure_map
 463
 464     # This makes fewer requests than calling Builder.latest_build would.  It grabs all builder
 465     # statuses in one request using self.builder_statuses (fetching /one_box_per_builder instead of builder pages).
 466     def _latest_builds_from_builders(self, only_core_builders=True):
 467         builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses()
 468         return [self.builder_with_name(status["name"]).build(status["build_number"]) for status in builder_statuses]
 469
 470     def _build_at_or_before_revision(self, build, revision):
 471         while build:
 472             if build.revision() <= revision:
 473                 return build
 474             build = build.previous_build()
 475
 476     def last_green_revision(self, only_core_builders=True):
 477         builds = self._latest_builds_from_builders(only_core_builders)
 478         target_revision = builds[0].revision()
 479         # An alternate way to do this would be to start at one revision and walk backwards
 480         # checking builder.build_for_revision, however build_for_revision is very slow on first load.
 481         while True:
 482             # Make builds agree on revision
 483             builds = [self._build_at_or_before_revision(build, target_revision) for build in builds]
 484             if None in builds: # One of the builds failed to load from the server.
 485                 return None
 486             min_revision = min(map(lambda build: build.revision(), builds))
 487             if min_revision != target_revision:
 488                 target_revision = min_revision
 489                 continue # Builds don't all agree on revision, keep searching
 490             # Check to make sure they're all green
 491             all_are_green = reduce(operator.and_, map(lambda build: build.is_green(), builds))
 492             if not all_are_green:
 493                 target_revision -= 1
 494                 continue
 495             return min_revision