buildkite_search_source.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. import re
  10. from datetime import datetime
  11. from typing import Any
  12. from materialize.buildkite_insights.buildkite_api.buildkite_constants import (
  13. BUILDKITE_COMPLETED_BUILD_STATES,
  14. BUILDKITE_FAILED_BUILD_STATES,
  15. BUILDKITE_RELEVANT_FAILED_BUILD_STEP_STATES,
  16. )
  17. from materialize.buildkite_insights.cache import annotations_cache, builds_cache
  18. from materialize.buildkite_insights.cache.cache_constants import FetchMode
  19. from materialize.buildkite_insights.data.build_annotation import BuildAnnotation
  20. from materialize.buildkite_insights.data.build_info import Build
  21. from materialize.buildkite_insights.data.build_step import BuildStepMatcher
  22. from materialize.buildkite_insights.util.build_step_utils import (
  23. extract_build_step_outcomes,
  24. )
  25. ANY_PIPELINE_VALUE = "*"
  26. ANY_BRANCH_VALUE = "*"
  27. class BuildkiteDataSource:
  28. def __init__(
  29. self,
  30. fetch_builds_mode: FetchMode,
  31. fetch_annotations_mode: FetchMode,
  32. max_build_fetches: int,
  33. first_build_page_to_fetch: int,
  34. only_failed_builds: bool,
  35. only_failed_build_step_keys: list[str],
  36. ):
  37. self.fetch_builds_mode = fetch_builds_mode
  38. self.fetch_annotations_mode = fetch_annotations_mode
  39. self.max_build_fetches = max_build_fetches
  40. self.first_build_page_to_fetch = first_build_page_to_fetch
  41. self.only_failed_builds = only_failed_builds
  42. self.only_failed_build_step_keys = only_failed_build_step_keys
  43. def fetch_builds(self, pipeline: str, branch: str | None) -> list[Build]:
  44. if self.only_failed_builds:
  45. build_states = BUILDKITE_FAILED_BUILD_STATES
  46. else:
  47. build_states = []
  48. # do not try to continue with incomplete data in case of an exceeded rate limit because fetching the annotations
  49. # will anyway most likely fail
  50. if pipeline == ANY_PIPELINE_VALUE:
  51. raw_builds = builds_cache.get_or_query_builds_for_all_pipelines(
  52. self.fetch_builds_mode,
  53. self.max_build_fetches,
  54. branch=branch,
  55. build_states=build_states,
  56. first_page=self.first_build_page_to_fetch,
  57. )
  58. else:
  59. raw_builds = builds_cache.get_or_query_builds(
  60. pipeline,
  61. self.fetch_builds_mode,
  62. self.max_build_fetches,
  63. branch=branch,
  64. build_states=build_states,
  65. first_page=self.first_build_page_to_fetch,
  66. )
  67. raw_builds = self.filter_builds(raw_builds, self.only_failed_build_step_keys)
  68. builds = []
  69. for build in raw_builds:
  70. build = Build(
  71. number=build["number"],
  72. pipeline=build["pipeline"]["slug"],
  73. state=build["state"],
  74. branch=build["branch"],
  75. web_url=build["web_url"],
  76. created_at=datetime.strptime(
  77. build["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ"
  78. ).replace(microsecond=0),
  79. )
  80. builds.append(build)
  81. return builds
  82. def filter_builds(
  83. self,
  84. builds_data: list[Any],
  85. only_failed_build_step_keys: list[str],
  86. ) -> list[Any]:
  87. if len(only_failed_build_step_keys) == 0:
  88. return builds_data
  89. failed_build_step_matcher = [
  90. BuildStepMatcher(build_step_key, None)
  91. for build_step_key in only_failed_build_step_keys
  92. ]
  93. step_outcomes = extract_build_step_outcomes(
  94. builds_data=builds_data,
  95. selected_build_steps=failed_build_step_matcher,
  96. build_step_states=BUILDKITE_RELEVANT_FAILED_BUILD_STEP_STATES,
  97. )
  98. builds_containing_failed_step_keys = {
  99. outcome.build_number for outcome in step_outcomes
  100. }
  101. filtered_builds = [
  102. build
  103. for build in builds_data
  104. if int(build["number"]) in builds_containing_failed_step_keys
  105. ]
  106. return filtered_builds
  107. def fetch_annotations(
  108. self, build: Build, verbose: bool = False
  109. ) -> list[BuildAnnotation]:
  110. is_completed_build_state = build.state in BUILDKITE_COMPLETED_BUILD_STATES
  111. raw_annotations = annotations_cache.get_or_query_annotations(
  112. fetch_mode=self.fetch_annotations_mode,
  113. pipeline_slug=build.pipeline,
  114. build_number=build.number,
  115. add_to_cache_if_not_present=is_completed_build_state,
  116. quiet_mode=not verbose,
  117. )
  118. result = []
  119. for raw_annotation in raw_annotations:
  120. annotation_html = raw_annotation["body_html"]
  121. annotation_text = self.clean_annotation_text(annotation_html)
  122. annotation_title = self.try_extracting_title_from_annotation_html(
  123. annotation_html
  124. )
  125. result.append(
  126. BuildAnnotation(content=annotation_text, title=annotation_title)
  127. )
  128. return result
  129. def clean_annotation_text(self, annotation_html: str) -> str:
  130. return re.sub(r"<[^>]+>", "", annotation_html)
  131. def try_extracting_title_from_annotation_html(
  132. self, annotation_html: str
  133. ) -> str | None:
  134. # match <p>...</p> header
  135. header_paragraph_match = re.search("<p>(.*?)</p>", annotation_html)
  136. if header_paragraph_match is None:
  137. return None
  138. header_paragraph = header_paragraph_match.group(1)
  139. build_step_name_match = re.search(
  140. "<a href=.*?>(.*?)</a> (failed|succeeded)", header_paragraph
  141. )
  142. if build_step_name_match:
  143. return build_step_name_match.group(1)
  144. else:
  145. return self.clean_annotation_text(header_paragraph)