analysis.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. #!/usr/bin/env python3
  2. # Copyright Materialize, Inc. and contributors. All rights reserved.
  3. #
  4. # Use of this software is governed by the Business Source License
  5. # included in the LICENSE file at the root of this repository.
  6. #
  7. # As of the Change Date specified in that file, in accordance with
  8. # the Business Source License, use of this software will be governed
  9. # by the Apache License, Version 2.0.
  10. import argparse
  11. import pandas as pd
  12. from materialize.buildkite_insights.annotation_search.buildkite_search_source import (
  13. ANY_BRANCH_VALUE,
  14. )
  15. from materialize.buildkite_insights.buildkite_api.buildkite_config import MZ_PIPELINES
  16. from materialize.buildkite_insights.buildkite_api.buildkite_constants import (
  17. BUILDKITE_BUILD_STATES,
  18. BUILDKITE_BUILD_STEP_STATES,
  19. BUILDKITE_RELEVANT_COMPLETED_BUILD_STEP_STATES,
  20. )
  21. from materialize.buildkite_insights.buildkite_api.generic_api import RateLimitExceeded
  22. from materialize.buildkite_insights.cache import builds_cache
  23. from materialize.buildkite_insights.cache.cache_constants import (
  24. FETCH_MODE_CHOICES,
  25. FetchMode,
  26. )
  27. from materialize.buildkite_insights.data.build_step import (
  28. BuildJobOutcome,
  29. BuildStepMatcher,
  30. )
  31. from materialize.buildkite_insights.util.build_step_utils import (
  32. extract_build_step_outcomes,
  33. step_outcomes_to_job_outcomes,
  34. )
  35. OUTPUT_TYPE_TXT = "txt"
  36. OUTPUT_TYPE_TXT_SHORT = "txt-short"
  37. OUTPUT_TYPE_CSV = "csv"
  38. def print_data(
  39. job_outcomes: list[BuildJobOutcome],
  40. build_steps: list[BuildStepMatcher],
  41. output_type: str,
  42. data_is_incomplete: bool,
  43. include_commit_hash: bool,
  44. ) -> None:
  45. if output_type == OUTPUT_TYPE_CSV:
  46. _print_outcome_entry_csv_header(include_commit_hash)
  47. for entry in job_outcomes:
  48. if output_type in [OUTPUT_TYPE_TXT, OUTPUT_TYPE_TXT_SHORT]:
  49. _print_outcome_entry_as_txt(entry, output_type, include_commit_hash)
  50. elif output_type == OUTPUT_TYPE_CSV:
  51. _print_outcome_entry_as_csv(entry, include_commit_hash)
  52. if output_type in [OUTPUT_TYPE_TXT, OUTPUT_TYPE_TXT_SHORT]:
  53. print_stats(job_outcomes, build_steps)
  54. if data_is_incomplete:
  55. print("Warning! Data is incomplete due to exceeded rate limit!")
  56. def _print_outcome_entry_as_txt(
  57. entry: BuildJobOutcome, output_type: str, include_commit_hash: bool
  58. ) -> None:
  59. formatted_duration = (
  60. f"{entry.duration_in_min:.2f}".rjust(6)
  61. if entry.duration_in_min is not None
  62. else "None"
  63. )
  64. url = "" if output_type == OUTPUT_TYPE_TXT_SHORT else f"{entry.web_url_to_build}, "
  65. commit_hash = f"{entry.commit_hash}, " if include_commit_hash else ""
  66. print(
  67. f"{entry.step_key}, #{entry.build_number}, {entry.formatted_date()}, {formatted_duration} min, {url}{commit_hash}{'SUCCESS' if entry.passed else 'FAIL'}{f' (RETRY #{entry.retry_count})' if entry.retry_count > 0 else ''}"
  68. )
  69. def _print_outcome_entry_csv_header(include_commit_hash: bool) -> None:
  70. f"step_key,build_number,created_at,duration_in_min,passed,{'commit,' if include_commit_hash else ''}retry_count"
  71. def _print_outcome_entry_as_csv(
  72. entry: BuildJobOutcome, include_commit_hash: bool
  73. ) -> None:
  74. commit_hash = f"{entry.commit_hash}," if include_commit_hash else ""
  75. print(
  76. f"{entry.step_key},{entry.build_number},{entry.created_at.isoformat()},{entry.duration_in_min},{1 if entry.passed else 0},{commit_hash}{entry.retry_count}"
  77. )
  78. def print_stats(
  79. job_outcomes: list[BuildJobOutcome],
  80. build_matchers: list[BuildStepMatcher],
  81. ) -> None:
  82. job_filter_desc = f"jobs matching {build_matchers}"
  83. if len(job_outcomes) == 0:
  84. print(f"No data for {job_filter_desc}!")
  85. return
  86. dfs = pd.DataFrame(job_outcomes)
  87. dfs_with_success = dfs.loc[dfs["passed"]]
  88. number_of_builds = len(job_outcomes)
  89. number_of_builds_with_successful_step = len(dfs_with_success.index)
  90. success_prop = number_of_builds_with_successful_step / number_of_builds
  91. print()
  92. print(f"Statistics for {job_filter_desc}:")
  93. print(f"Number of builds: {number_of_builds}")
  94. print(
  95. f"Number of builds with job success: {number_of_builds_with_successful_step} ({100 * success_prop:.1f}%)"
  96. )
  97. has_successful_builds = len(dfs_with_success.index) > 0
  98. if has_successful_builds:
  99. print(
  100. f"Min duration with success: {dfs_with_success['duration_in_min'].min():.2f} min"
  101. )
  102. print(
  103. f"Max duration with success: {dfs_with_success['duration_in_min'].max():.2f} min"
  104. )
  105. print(
  106. f"Mean duration with success: {dfs_with_success['duration_in_min'].mean():.2f} min"
  107. )
  108. print(
  109. f"Median duration with success: {dfs_with_success['duration_in_min'].median():.2f} min"
  110. )
  111. def main(
  112. pipeline_slug: str,
  113. build_steps: list[BuildStepMatcher],
  114. fetch_mode: FetchMode,
  115. max_fetches: int,
  116. branch: str | None,
  117. build_states: list[str],
  118. build_step_states: list[str],
  119. output_type: str,
  120. include_commit_hash: bool,
  121. ) -> None:
  122. try:
  123. builds_data = builds_cache.get_or_query_builds(
  124. pipeline_slug, fetch_mode, max_fetches, branch, build_states
  125. )
  126. data_is_incomplete = False
  127. except RateLimitExceeded as e:
  128. builds_data = e.partial_result
  129. data_is_incomplete = True
  130. step_outcomes = extract_build_step_outcomes(
  131. builds_data=builds_data,
  132. selected_build_steps=build_steps,
  133. build_step_states=build_step_states,
  134. )
  135. job_outcomes = step_outcomes_to_job_outcomes(step_outcomes)
  136. print_data(
  137. job_outcomes,
  138. build_steps,
  139. output_type,
  140. data_is_incomplete=data_is_incomplete,
  141. include_commit_hash=include_commit_hash,
  142. )
  143. if __name__ == "__main__":
  144. parser = argparse.ArgumentParser(
  145. prog="buildkite-step-insights",
  146. formatter_class=argparse.RawDescriptionHelpFormatter,
  147. )
  148. parser.add_argument("pipeline", choices=MZ_PIPELINES, type=str)
  149. parser.add_argument("--build-step-key", action="append", default=[], type=str)
  150. parser.add_argument(
  151. "--build-step-parallel-index",
  152. type=int,
  153. help="This is only applied if exactly one build-step-key is specified",
  154. )
  155. parser.add_argument(
  156. "--fetch",
  157. type=lambda mode: FetchMode[mode.upper()],
  158. choices=FETCH_MODE_CHOICES,
  159. default=FetchMode.AUTO,
  160. help="Whether to fetch fresh builds from Buildkite.",
  161. )
  162. parser.add_argument("--max-fetches", default=3, type=int)
  163. parser.add_argument(
  164. "--branch", default="main", type=str, help="Use '*' for all branches"
  165. )
  166. parser.add_argument(
  167. "--build-state",
  168. action="append",
  169. default=[],
  170. choices=BUILDKITE_BUILD_STATES,
  171. )
  172. parser.add_argument(
  173. "--build-step-state",
  174. action="append",
  175. default=[],
  176. choices=BUILDKITE_BUILD_STEP_STATES,
  177. )
  178. parser.add_argument(
  179. "--output-type",
  180. choices=[OUTPUT_TYPE_TXT, OUTPUT_TYPE_TXT_SHORT, OUTPUT_TYPE_CSV],
  181. default=OUTPUT_TYPE_TXT,
  182. type=str,
  183. )
  184. parser.add_argument(
  185. "--include-commit-hash",
  186. action="store_true",
  187. )
  188. args = parser.parse_args()
  189. selected_build_states = args.build_state
  190. selected_build_step_states = (
  191. args.build_step_state or BUILDKITE_RELEVANT_COMPLETED_BUILD_STEP_STATES
  192. )
  193. main(
  194. args.pipeline,
  195. [
  196. BuildStepMatcher(
  197. build_step_key,
  198. (
  199. args.build_step_parallel_index
  200. if len(args.build_step_key) == 1
  201. else None
  202. ),
  203. )
  204. for build_step_key in args.build_step_key
  205. ],
  206. args.fetch,
  207. args.max_fetches,
  208. args.branch if args.branch != ANY_BRANCH_VALUE else None,
  209. selected_build_states,
  210. selected_build_step_states,
  211. args.output_type,
  212. args.include_commit_hash,
  213. )