artifact_search.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. #!/usr/bin/env python3
  2. # Copyright Materialize, Inc. and contributors. All rights reserved.
  3. #
  4. # Use of this software is governed by the Business Source License
  5. # included in the LICENSE file at the root of this repository.
  6. #
  7. # As of the Change Date specified in that file, in accordance with
  8. # the Business Source License, use of this software will be governed
  9. # by the Apache License, Version 2.0.
  10. import argparse
  11. import re
  12. from typing import Any
  13. from materialize.buildkite_insights.artifact_search.artifact_search_presentation import (
  14. print_artifact_match,
  15. print_before_search_results,
  16. print_summary,
  17. )
  18. from materialize.buildkite_insights.buildkite_api.buildkite_config import MZ_PIPELINES
  19. from materialize.buildkite_insights.buildkite_api.generic_api import RateLimitExceeded
  20. from materialize.buildkite_insights.cache import (
  21. artifacts_cache,
  22. builds_cache,
  23. logs_cache,
  24. )
  25. from materialize.buildkite_insights.cache.cache_constants import (
  26. FETCH_MODE_CHOICES,
  27. FetchMode,
  28. )
  29. from materialize.buildkite_insights.util.build_step_utils import (
  30. extract_build_step_names_by_job_id,
  31. )
  32. from materialize.buildkite_insights.util.search_utility import (
  33. _search_value_to_pattern,
  34. determine_line_number,
  35. determine_position_in_line,
  36. )
  37. ACCEPTED_FILE_ENDINGS = {"log", "txt", "xml", "zst"}
  38. def main(
  39. pipeline_slug: str,
  40. build_number: int,
  41. specified_job_id: str | None,
  42. pattern: str,
  43. fetch: FetchMode,
  44. max_results: int,
  45. use_regex: bool,
  46. file_name_regex: str | None,
  47. include_zst_files: bool,
  48. search_logs_instead_of_artifacts: bool,
  49. ) -> None:
  50. assert len(pattern) > 0, "pattern must not be empty"
  51. if specified_job_id is not None:
  52. build_step_name_by_job_id = dict()
  53. build_step_name_by_job_id[specified_job_id] = "(unknown)"
  54. else:
  55. build = builds_cache.get_or_query_single_build(
  56. pipeline_slug, fetch, build_number=build_number
  57. )
  58. build_step_name_by_job_id = extract_build_step_names_by_job_id(build)
  59. try:
  60. (
  61. count_matches,
  62. count_all_artifacts,
  63. ignored_file_names,
  64. max_search_results_hit,
  65. ) = (
  66. _search_logs(
  67. pipeline_slug=pipeline_slug,
  68. build_number=build_number,
  69. pattern=pattern,
  70. fetch=fetch,
  71. max_results=max_results,
  72. use_regex=use_regex,
  73. build_step_name_by_job_id=build_step_name_by_job_id,
  74. )
  75. if search_logs_instead_of_artifacts
  76. else _search_artifacts(
  77. pipeline_slug=pipeline_slug,
  78. build_number=build_number,
  79. pattern=pattern,
  80. fetch=fetch,
  81. max_results=max_results,
  82. use_regex=use_regex,
  83. file_name_regex=file_name_regex,
  84. include_zst_files=include_zst_files,
  85. build_step_name_by_job_id=build_step_name_by_job_id,
  86. )
  87. )
  88. except RateLimitExceeded:
  89. print("Aborting due to exceeded rate limit!")
  90. return
  91. print_summary(
  92. pipeline_slug=pipeline_slug,
  93. build_number=build_number,
  94. job_id=specified_job_id,
  95. count_artifacts=count_all_artifacts,
  96. count_matches=count_matches,
  97. ignored_file_names=ignored_file_names,
  98. max_search_results_hit=max_search_results_hit,
  99. )
  100. def _search_artifacts(
  101. pipeline_slug: str,
  102. build_number: int,
  103. pattern: str,
  104. fetch: FetchMode,
  105. max_results: int,
  106. use_regex: bool,
  107. file_name_regex: str | None,
  108. include_zst_files: bool,
  109. build_step_name_by_job_id: dict[str, str],
  110. ) -> tuple[int, int, set[str], bool]:
  111. """
  112. :return: count_matches, count_all_artifacts, ignored_file_names, max_search_results_hit
  113. """
  114. artifact_list_by_job_id: dict[str, list[Any]] = dict()
  115. for job_id in build_step_name_by_job_id.keys():
  116. artifact_list_by_job_id[job_id] = (
  117. artifacts_cache.get_or_query_job_artifact_list(
  118. pipeline_slug, fetch, build_number=build_number, job_id=job_id
  119. )
  120. )
  121. print_before_search_results()
  122. count_matches = 0
  123. count_all_artifacts = 0
  124. ignored_file_names = set()
  125. max_search_results_hit = False
  126. for job_id, artifact_list in artifact_list_by_job_id.items():
  127. artifact_list = _filter_artifact_list(artifact_list, file_name_regex)
  128. count_artifacts_of_job = len(artifact_list)
  129. build_step_name = build_step_name_by_job_id[job_id]
  130. if count_artifacts_of_job == 0:
  131. print(f"Skipping job '{build_step_name}' ({job_id}) without artifacts.")
  132. continue
  133. print(
  134. f"Searching {count_artifacts_of_job} artifacts of job '{build_step_name}' ({job_id})."
  135. )
  136. count_all_artifacts = count_all_artifacts + count_artifacts_of_job
  137. for artifact in artifact_list:
  138. max_entries_to_print = max(0, max_results - count_matches)
  139. if max_entries_to_print == 0:
  140. max_search_results_hit = True
  141. break
  142. artifact_id = artifact["id"]
  143. artifact_file_name = artifact["filename"]
  144. if not _can_search_artifact(artifact_file_name, include_zst_files):
  145. print(f"Skipping artifact {artifact_file_name} due to file ending!")
  146. ignored_file_names.add(artifact_file_name)
  147. continue
  148. artifact_content = artifacts_cache.get_or_download_artifact(
  149. pipeline_slug,
  150. fetch,
  151. build_number=build_number,
  152. job_id=job_id,
  153. artifact_id=artifact_id,
  154. is_zst_compressed=is_zst_file(artifact_file_name),
  155. )
  156. matches_in_artifact, max_search_results_hit = _search_artifact_content(
  157. artifact_file_name=artifact_file_name,
  158. artifact_content=artifact_content,
  159. pattern=pattern,
  160. use_regex=use_regex,
  161. max_entries_to_print=max_entries_to_print,
  162. )
  163. count_matches = count_matches + matches_in_artifact
  164. return (
  165. count_matches,
  166. count_all_artifacts,
  167. ignored_file_names,
  168. max_search_results_hit,
  169. )
  170. def _filter_artifact_list(
  171. artifact_list: list[Any], file_name_regex: str | None
  172. ) -> list[Any]:
  173. if file_name_regex is None:
  174. return artifact_list
  175. filtered_list = []
  176. for artifact in artifact_list:
  177. artifact_file_name = artifact["filename"]
  178. if re.search(file_name_regex, artifact_file_name):
  179. filtered_list.append(artifact)
  180. return filtered_list
  181. def _search_logs(
  182. pipeline_slug: str,
  183. build_number: int,
  184. pattern: str,
  185. fetch: FetchMode,
  186. max_results: int,
  187. use_regex: bool,
  188. build_step_name_by_job_id: dict[str, str],
  189. ) -> tuple[int, int, set[str], bool]:
  190. """
  191. :return: count_matches, count_all_artifacts, ignored_file_names, max_search_results_hit
  192. """
  193. print_before_search_results()
  194. count_matches = 0
  195. count_all_artifacts = 0
  196. ignored_file_names = set()
  197. max_search_results_hit = False
  198. for job_id, build_step_name in build_step_name_by_job_id.items():
  199. print(f"Searching log of job '{build_step_name}' ({job_id}).")
  200. count_all_artifacts = count_all_artifacts + 1
  201. max_entries_to_print = max(0, max_results - count_matches)
  202. if max_entries_to_print == 0:
  203. max_search_results_hit = True
  204. break
  205. log_content = logs_cache.get_or_download_log(
  206. pipeline_slug,
  207. fetch,
  208. build_number=build_number,
  209. job_id=job_id,
  210. )
  211. matches_in_log, max_search_results_hit = _search_artifact_content(
  212. artifact_file_name="log",
  213. artifact_content=log_content,
  214. pattern=pattern,
  215. use_regex=use_regex,
  216. max_entries_to_print=max_entries_to_print,
  217. )
  218. count_matches = count_matches + matches_in_log
  219. return (
  220. count_matches,
  221. count_all_artifacts,
  222. ignored_file_names,
  223. max_search_results_hit,
  224. )
  225. def _can_search_artifact(artifact_file_name: str, include_zst_files: bool) -> bool:
  226. if not include_zst_files and is_zst_file(artifact_file_name):
  227. return False
  228. for file_ending in ACCEPTED_FILE_ENDINGS:
  229. if artifact_file_name.endswith(f".{file_ending}"):
  230. return True
  231. return False
  232. def _search_artifact_content(
  233. artifact_file_name: str,
  234. artifact_content: str,
  235. pattern: str,
  236. use_regex: bool,
  237. max_entries_to_print: int,
  238. ) -> tuple[int, bool]:
  239. """
  240. :return: number of highlighted results and whether further matches exceeding max_entries_to_print exist
  241. """
  242. search_pattern = _search_value_to_pattern(pattern, use_regex)
  243. search_offset = 0
  244. match_count = 0
  245. while True:
  246. match = search_pattern.search(artifact_content, pos=search_offset)
  247. if match is None:
  248. break
  249. match_count = match_count + 1
  250. line_number = determine_line_number(artifact_content, position=match.start())
  251. position_in_line = determine_position_in_line(
  252. artifact_content, position=match.start()
  253. )
  254. print_artifact_match(
  255. file_name=artifact_file_name,
  256. line_number=line_number,
  257. position_in_line=position_in_line,
  258. content=artifact_content,
  259. search_value=pattern,
  260. use_regex=use_regex,
  261. search_offset=search_offset,
  262. )
  263. search_offset = match.end()
  264. if match_count >= max_entries_to_print:
  265. return match_count, True
  266. return match_count, False
  267. def is_zst_file(file_name: str) -> bool:
  268. return file_name.endswith(".zst")
  269. if __name__ == "__main__":
  270. parser = argparse.ArgumentParser(
  271. prog="buildkite-artifact-search",
  272. formatter_class=argparse.RawDescriptionHelpFormatter,
  273. )
  274. parser.add_argument(
  275. "pipeline",
  276. choices=MZ_PIPELINES,
  277. type=str,
  278. )
  279. # no hyphen because positionals with hyphen cause issues
  280. parser.add_argument(
  281. "buildnumber",
  282. type=int,
  283. )
  284. parser.add_argument("pattern", type=str)
  285. parser.add_argument("--job-id", type=str)
  286. parser.add_argument("--max-results", default=50, type=int)
  287. parser.add_argument(
  288. "--use-regex",
  289. action="store_true",
  290. )
  291. parser.add_argument("--file-name-regex", type=str)
  292. parser.add_argument(
  293. "--include-zst-files", action=argparse.BooleanOptionalAction, default=True
  294. )
  295. parser.add_argument(
  296. "--search-logs-instead-of-artifacts",
  297. default=False,
  298. action="store_true",
  299. )
  300. parser.add_argument(
  301. "--fetch",
  302. type=lambda mode: FetchMode[mode.upper()],
  303. choices=FETCH_MODE_CHOICES,
  304. default=FetchMode.AUTO,
  305. help="Whether to fetch fresh builds from Buildkite.",
  306. )
  307. args = parser.parse_args()
  308. main(
  309. args.pipeline,
  310. args.buildnumber,
  311. args.job_id,
  312. args.pattern,
  313. args.fetch,
  314. args.max_results,
  315. args.use_regex,
  316. args.file_name_regex,
  317. args.include_zst_files,
  318. args.search_logs_instead_of_artifacts,
  319. )