123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244 |
- #!/usr/bin/env python3
- # Copyright Materialize, Inc. and contributors. All rights reserved.
- #
- # Use of this software is governed by the Business Source License
- # included in the LICENSE file at the root of this repository.
- #
- # As of the Change Date specified in that file, in accordance with
- # the Business Source License, use of this software will be governed
- # by the Apache License, Version 2.0.
- import argparse
- import pandas as pd
- from materialize.buildkite_insights.annotation_search.buildkite_search_source import (
- ANY_BRANCH_VALUE,
- )
- from materialize.buildkite_insights.buildkite_api.buildkite_config import MZ_PIPELINES
- from materialize.buildkite_insights.buildkite_api.buildkite_constants import (
- BUILDKITE_BUILD_STATES,
- BUILDKITE_BUILD_STEP_STATES,
- BUILDKITE_RELEVANT_COMPLETED_BUILD_STEP_STATES,
- )
- from materialize.buildkite_insights.buildkite_api.generic_api import RateLimitExceeded
- from materialize.buildkite_insights.cache import builds_cache
- from materialize.buildkite_insights.cache.cache_constants import (
- FETCH_MODE_CHOICES,
- FetchMode,
- )
- from materialize.buildkite_insights.data.build_step import (
- BuildJobOutcome,
- BuildStepMatcher,
- )
- from materialize.buildkite_insights.util.build_step_utils import (
- extract_build_step_outcomes,
- step_outcomes_to_job_outcomes,
- )
- OUTPUT_TYPE_TXT = "txt"
- OUTPUT_TYPE_TXT_SHORT = "txt-short"
- OUTPUT_TYPE_CSV = "csv"
- def print_data(
- job_outcomes: list[BuildJobOutcome],
- build_steps: list[BuildStepMatcher],
- output_type: str,
- data_is_incomplete: bool,
- include_commit_hash: bool,
- ) -> None:
- if output_type == OUTPUT_TYPE_CSV:
- _print_outcome_entry_csv_header(include_commit_hash)
- for entry in job_outcomes:
- if output_type in [OUTPUT_TYPE_TXT, OUTPUT_TYPE_TXT_SHORT]:
- _print_outcome_entry_as_txt(entry, output_type, include_commit_hash)
- elif output_type == OUTPUT_TYPE_CSV:
- _print_outcome_entry_as_csv(entry, include_commit_hash)
- if output_type in [OUTPUT_TYPE_TXT, OUTPUT_TYPE_TXT_SHORT]:
- print_stats(job_outcomes, build_steps)
- if data_is_incomplete:
- print("Warning! Data is incomplete due to exceeded rate limit!")
- def _print_outcome_entry_as_txt(
- entry: BuildJobOutcome, output_type: str, include_commit_hash: bool
- ) -> None:
- formatted_duration = (
- f"{entry.duration_in_min:.2f}".rjust(6)
- if entry.duration_in_min is not None
- else "None"
- )
- url = "" if output_type == OUTPUT_TYPE_TXT_SHORT else f"{entry.web_url_to_build}, "
- commit_hash = f"{entry.commit_hash}, " if include_commit_hash else ""
- print(
- f"{entry.step_key}, #{entry.build_number}, {entry.formatted_date()}, {formatted_duration} min, {url}{commit_hash}{'SUCCESS' if entry.passed else 'FAIL'}{f' (RETRY #{entry.retry_count})' if entry.retry_count > 0 else ''}"
- )
- def _print_outcome_entry_csv_header(include_commit_hash: bool) -> None:
- f"step_key,build_number,created_at,duration_in_min,passed,{'commit,' if include_commit_hash else ''}retry_count"
- def _print_outcome_entry_as_csv(
- entry: BuildJobOutcome, include_commit_hash: bool
- ) -> None:
- commit_hash = f"{entry.commit_hash}," if include_commit_hash else ""
- print(
- f"{entry.step_key},{entry.build_number},{entry.created_at.isoformat()},{entry.duration_in_min},{1 if entry.passed else 0},{commit_hash}{entry.retry_count}"
- )
- def print_stats(
- job_outcomes: list[BuildJobOutcome],
- build_matchers: list[BuildStepMatcher],
- ) -> None:
- job_filter_desc = f"jobs matching {build_matchers}"
- if len(job_outcomes) == 0:
- print(f"No data for {job_filter_desc}!")
- return
- dfs = pd.DataFrame(job_outcomes)
- dfs_with_success = dfs.loc[dfs["passed"]]
- number_of_builds = len(job_outcomes)
- number_of_builds_with_successful_step = len(dfs_with_success.index)
- success_prop = number_of_builds_with_successful_step / number_of_builds
- print()
- print(f"Statistics for {job_filter_desc}:")
- print(f"Number of builds: {number_of_builds}")
- print(
- f"Number of builds with job success: {number_of_builds_with_successful_step} ({100 * success_prop:.1f}%)"
- )
- has_successful_builds = len(dfs_with_success.index) > 0
- if has_successful_builds:
- print(
- f"Min duration with success: {dfs_with_success['duration_in_min'].min():.2f} min"
- )
- print(
- f"Max duration with success: {dfs_with_success['duration_in_min'].max():.2f} min"
- )
- print(
- f"Mean duration with success: {dfs_with_success['duration_in_min'].mean():.2f} min"
- )
- print(
- f"Median duration with success: {dfs_with_success['duration_in_min'].median():.2f} min"
- )
- def main(
- pipeline_slug: str,
- build_steps: list[BuildStepMatcher],
- fetch_mode: FetchMode,
- max_fetches: int,
- branch: str | None,
- build_states: list[str],
- build_step_states: list[str],
- output_type: str,
- include_commit_hash: bool,
- ) -> None:
- try:
- builds_data = builds_cache.get_or_query_builds(
- pipeline_slug, fetch_mode, max_fetches, branch, build_states
- )
- data_is_incomplete = False
- except RateLimitExceeded as e:
- builds_data = e.partial_result
- data_is_incomplete = True
- step_outcomes = extract_build_step_outcomes(
- builds_data=builds_data,
- selected_build_steps=build_steps,
- build_step_states=build_step_states,
- )
- job_outcomes = step_outcomes_to_job_outcomes(step_outcomes)
- print_data(
- job_outcomes,
- build_steps,
- output_type,
- data_is_incomplete=data_is_incomplete,
- include_commit_hash=include_commit_hash,
- )
- if __name__ == "__main__":
- parser = argparse.ArgumentParser(
- prog="buildkite-step-insights",
- formatter_class=argparse.RawDescriptionHelpFormatter,
- )
- parser.add_argument("pipeline", choices=MZ_PIPELINES, type=str)
- parser.add_argument("--build-step-key", action="append", default=[], type=str)
- parser.add_argument(
- "--build-step-parallel-index",
- type=int,
- help="This is only applied if exactly one build-step-key is specified",
- )
- parser.add_argument(
- "--fetch",
- type=lambda mode: FetchMode[mode.upper()],
- choices=FETCH_MODE_CHOICES,
- default=FetchMode.AUTO,
- help="Whether to fetch fresh builds from Buildkite.",
- )
- parser.add_argument("--max-fetches", default=3, type=int)
- parser.add_argument(
- "--branch", default="main", type=str, help="Use '*' for all branches"
- )
- parser.add_argument(
- "--build-state",
- action="append",
- default=[],
- choices=BUILDKITE_BUILD_STATES,
- )
- parser.add_argument(
- "--build-step-state",
- action="append",
- default=[],
- choices=BUILDKITE_BUILD_STEP_STATES,
- )
- parser.add_argument(
- "--output-type",
- choices=[OUTPUT_TYPE_TXT, OUTPUT_TYPE_TXT_SHORT, OUTPUT_TYPE_CSV],
- default=OUTPUT_TYPE_TXT,
- type=str,
- )
- parser.add_argument(
- "--include-commit-hash",
- action="store_true",
- )
- args = parser.parse_args()
- selected_build_states = args.build_state
- selected_build_step_states = (
- args.build_step_state or BUILDKITE_RELEVANT_COMPLETED_BUILD_STEP_STATES
- )
- main(
- args.pipeline,
- [
- BuildStepMatcher(
- build_step_key,
- (
- args.build_step_parallel_index
- if len(args.build_step_key) == 1
- else None
- ),
- )
- for build_step_key in args.build_step_key
- ],
- args.fetch,
- args.max_fetches,
- args.branch if args.branch != ANY_BRANCH_VALUE else None,
- selected_build_states,
- selected_build_step_states,
- args.output_type,
- args.include_commit_hash,
- )
|