123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173 |
- # Copyright Materialize, Inc. and contributors. All rights reserved.
- #
- # Use of this software is governed by the Business Source License
- # included in the LICENSE file at the root of this repository.
- #
- # As of the Change Date specified in that file, in accordance with
- # the Business Source License, use of this software will be governed
- # by the Apache License, Version 2.0.
- import re
- from datetime import datetime
- from typing import Any
- from materialize.buildkite_insights.buildkite_api.buildkite_constants import (
- BUILDKITE_COMPLETED_BUILD_STATES,
- BUILDKITE_FAILED_BUILD_STATES,
- BUILDKITE_RELEVANT_FAILED_BUILD_STEP_STATES,
- )
- from materialize.buildkite_insights.cache import annotations_cache, builds_cache
- from materialize.buildkite_insights.cache.cache_constants import FetchMode
- from materialize.buildkite_insights.data.build_annotation import BuildAnnotation
- from materialize.buildkite_insights.data.build_info import Build
- from materialize.buildkite_insights.data.build_step import BuildStepMatcher
- from materialize.buildkite_insights.util.build_step_utils import (
- extract_build_step_outcomes,
- )
- ANY_PIPELINE_VALUE = "*"
- ANY_BRANCH_VALUE = "*"
- class BuildkiteDataSource:
- def __init__(
- self,
- fetch_builds_mode: FetchMode,
- fetch_annotations_mode: FetchMode,
- max_build_fetches: int,
- first_build_page_to_fetch: int,
- only_failed_builds: bool,
- only_failed_build_step_keys: list[str],
- ):
- self.fetch_builds_mode = fetch_builds_mode
- self.fetch_annotations_mode = fetch_annotations_mode
- self.max_build_fetches = max_build_fetches
- self.first_build_page_to_fetch = first_build_page_to_fetch
- self.only_failed_builds = only_failed_builds
- self.only_failed_build_step_keys = only_failed_build_step_keys
- def fetch_builds(self, pipeline: str, branch: str | None) -> list[Build]:
- if self.only_failed_builds:
- build_states = BUILDKITE_FAILED_BUILD_STATES
- else:
- build_states = []
- # do not try to continue with incomplete data in case of an exceeded rate limit because fetching the annotations
- # will anyway most likely fail
- if pipeline == ANY_PIPELINE_VALUE:
- raw_builds = builds_cache.get_or_query_builds_for_all_pipelines(
- self.fetch_builds_mode,
- self.max_build_fetches,
- branch=branch,
- build_states=build_states,
- first_page=self.first_build_page_to_fetch,
- )
- else:
- raw_builds = builds_cache.get_or_query_builds(
- pipeline,
- self.fetch_builds_mode,
- self.max_build_fetches,
- branch=branch,
- build_states=build_states,
- first_page=self.first_build_page_to_fetch,
- )
- raw_builds = self.filter_builds(raw_builds, self.only_failed_build_step_keys)
- builds = []
- for build in raw_builds:
- build = Build(
- number=build["number"],
- pipeline=build["pipeline"]["slug"],
- state=build["state"],
- branch=build["branch"],
- web_url=build["web_url"],
- created_at=datetime.strptime(
- build["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ"
- ).replace(microsecond=0),
- )
- builds.append(build)
- return builds
- def filter_builds(
- self,
- builds_data: list[Any],
- only_failed_build_step_keys: list[str],
- ) -> list[Any]:
- if len(only_failed_build_step_keys) == 0:
- return builds_data
- failed_build_step_matcher = [
- BuildStepMatcher(build_step_key, None)
- for build_step_key in only_failed_build_step_keys
- ]
- step_outcomes = extract_build_step_outcomes(
- builds_data=builds_data,
- selected_build_steps=failed_build_step_matcher,
- build_step_states=BUILDKITE_RELEVANT_FAILED_BUILD_STEP_STATES,
- )
- builds_containing_failed_step_keys = {
- outcome.build_number for outcome in step_outcomes
- }
- filtered_builds = [
- build
- for build in builds_data
- if int(build["number"]) in builds_containing_failed_step_keys
- ]
- return filtered_builds
- def fetch_annotations(
- self, build: Build, verbose: bool = False
- ) -> list[BuildAnnotation]:
- is_completed_build_state = build.state in BUILDKITE_COMPLETED_BUILD_STATES
- raw_annotations = annotations_cache.get_or_query_annotations(
- fetch_mode=self.fetch_annotations_mode,
- pipeline_slug=build.pipeline,
- build_number=build.number,
- add_to_cache_if_not_present=is_completed_build_state,
- quiet_mode=not verbose,
- )
- result = []
- for raw_annotation in raw_annotations:
- annotation_html = raw_annotation["body_html"]
- annotation_text = self.clean_annotation_text(annotation_html)
- annotation_title = self.try_extracting_title_from_annotation_html(
- annotation_html
- )
- result.append(
- BuildAnnotation(content=annotation_text, title=annotation_title)
- )
- return result
- def clean_annotation_text(self, annotation_html: str) -> str:
- return re.sub(r"<[^>]+>", "", annotation_html)
- def try_extracting_title_from_annotation_html(
- self, annotation_html: str
- ) -> str | None:
- # match <p>...</p> header
- header_paragraph_match = re.search("<p>(.*?)</p>", annotation_html)
- if header_paragraph_match is None:
- return None
- header_paragraph = header_paragraph_match.group(1)
- build_step_name_match = re.search(
- "<a href=.*?>(.*?)</a> (failed|succeeded)", header_paragraph
- )
- if build_step_name_match:
- return build_step_name_match.group(1)
- else:
- return self.clean_annotation_text(header_paragraph)
|