artifacts_cache.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. from typing import Any
  10. from materialize.buildkite_insights.buildkite_api import artifacts_api
  11. from materialize.buildkite_insights.cache import generic_cache
  12. from materialize.buildkite_insights.cache.cache_constants import FetchMode
  13. from materialize.buildkite_insights.cache.generic_cache import CacheFilePath
  14. from materialize.util import (
  15. decompress_zst_to_directory,
  16. ensure_dir_exists,
  17. sha256_of_utf8_string,
  18. )
  19. def get_or_query_job_artifact_list(
  20. pipeline_slug: str,
  21. fetch_mode: FetchMode,
  22. build_number: int,
  23. job_id: str,
  24. ) -> list[Any]:
  25. cache_file_path = _get_file_path_for_job_artifact_list(
  26. pipeline_slug=pipeline_slug, build_number=build_number, job_id=job_id
  27. )
  28. fetch_action = lambda: artifacts_api.get_build_job_artifact_list(
  29. pipeline_slug=pipeline_slug,
  30. build_number=build_number,
  31. job_id=job_id,
  32. )
  33. return generic_cache.get_or_query_data(cache_file_path, fetch_action, fetch_mode)
  34. def get_or_download_artifact(
  35. pipeline_slug: str,
  36. fetch_mode: FetchMode,
  37. build_number: int,
  38. job_id: str,
  39. artifact_id: str,
  40. is_zst_compressed: bool,
  41. ) -> str:
  42. cache_file_path = _get_file_path_for_artifact(
  43. pipeline_slug=pipeline_slug, artifact_id=artifact_id
  44. )
  45. if not is_zst_compressed:
  46. action = lambda: artifacts_api.download_artifact(
  47. pipeline_slug=pipeline_slug,
  48. build_number=build_number,
  49. job_id=job_id,
  50. artifact_id=artifact_id,
  51. )
  52. else:
  53. def action() -> str:
  54. zst_file_path = _get_file_path_for_artifact(
  55. pipeline_slug=pipeline_slug,
  56. artifact_id=artifact_id,
  57. cache_item_type="compressed-artifact",
  58. file_extension="zst",
  59. )
  60. uncompress_directory_path = zst_file_path.get().replace(".zst", "")
  61. ensure_dir_exists(zst_file_path.get_path_to_directory())
  62. ensure_dir_exists(uncompress_directory_path)
  63. artifacts_api.download_artifact_to_file(
  64. pipeline_slug=pipeline_slug,
  65. build_number=build_number,
  66. job_id=job_id,
  67. artifact_id=artifact_id,
  68. file_path=zst_file_path.get(),
  69. )
  70. extracted_files = decompress_zst_to_directory(
  71. zst_file_path=zst_file_path.get(),
  72. destination_dir_path=uncompress_directory_path,
  73. )
  74. if len(extracted_files) != 1:
  75. raise RuntimeError(
  76. f"Only archives with exactly one file supported at the moment. {zst_file_path.get()} contains {len(extracted_files)} files"
  77. )
  78. with open(extracted_files[0]) as file:
  79. return file.read()
  80. return generic_cache.get_or_query_data(
  81. cache_file_path,
  82. action,
  83. fetch_mode,
  84. max_allowed_cache_age_in_hours=96,
  85. quiet_mode=True,
  86. )
  87. def _get_file_path_for_job_artifact_list(
  88. pipeline_slug: str,
  89. build_number: int,
  90. job_id: str,
  91. ) -> CacheFilePath:
  92. meta_data = f"{build_number}-{job_id}"
  93. hash_value = sha256_of_utf8_string(meta_data)[:8]
  94. return CacheFilePath(
  95. cache_item_type="build_job_artifact_list",
  96. pipeline_slug=pipeline_slug,
  97. params_hash=hash_value,
  98. )
  99. def _get_file_path_for_artifact(
  100. pipeline_slug: str,
  101. artifact_id: str,
  102. cache_item_type: str = "artifact",
  103. file_extension: str = "json",
  104. ) -> CacheFilePath:
  105. # artifacts are text but also stored as string json
  106. return CacheFilePath(
  107. cache_item_type=cache_item_type,
  108. pipeline_slug=pipeline_slug,
  109. params_hash=artifact_id,
  110. file_extension=file_extension,
  111. )