bazel.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. import hashlib
  10. import os
  11. import pathlib
  12. import subprocess
  13. from enum import Enum
  14. import requests
  15. from materialize import MZ_ROOT, ui
  16. from materialize.build_config import BuildConfig
  17. from materialize.teleport import TeleportProxy
  18. """Utilities for interacting with Bazel from python scripts"""
  19. # Path where we put the current revision of the repo that we can side channel
  20. # into Bazel.
  21. MZ_GIT_HASH_FILE = "/tmp/mz_git_hash.txt"
  22. def output_paths(target, options=[]) -> list[pathlib.Path]:
  23. """Returns the absolute path of outputs from the built Bazel target."""
  24. cmd_args = ["bazel", "cquery", f"{target}", *options, "--output=files"]
  25. paths = subprocess.check_output(
  26. cmd_args, text=True, stderr=subprocess.DEVNULL
  27. ).splitlines()
  28. return [pathlib.Path(path) for path in paths]
  29. def write_git_hash():
  30. """
  31. Temporary file where we write the current git hash, so we can side channel
  32. it into Bazel.
  33. For production releases we stamp builds with the `workspace_status_command`
  34. but this workflow is not friendly to remote caching. Specifically, the
  35. "volatile status" of a workspace is not supposed to cause builds to get
  36. invalidated, and it doesn't when the result is cached locally, but it does
  37. when it's cached remotely.
  38. See: <https://bazel.build/docs/user-manual#workspace-status>
  39. <https://github.com/bazelbuild/bazel/issues/10075>
  40. """
  41. repo = MZ_ROOT / ".git"
  42. cmd_args = ["git", f"--git-dir={repo}", "rev-parse", "HEAD"]
  43. result = subprocess.run(
  44. cmd_args, text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL
  45. )
  46. if result.returncode == 0:
  47. with open(MZ_GIT_HASH_FILE, "w") as f:
  48. f.write(result.stdout.strip())
  49. else:
  50. ui.warn(f"Failed to get current revision of {MZ_ROOT}, falling back to all 0s")
  51. def calc_ingerity(path) -> str:
  52. """
  53. Calculate the 'integrity' for a given file.
  54. 'integrity' is a hash of the file used in rules like 'http_archive'.
  55. See: <https://bazel.build/rules/lib/repo/http#http_archive-integrity>
  56. """
  57. digest = subprocess.run(
  58. ["openssl", "dgst", "-sha256", "-binary", str(path)], stdout=subprocess.PIPE
  59. )
  60. base64 = subprocess.run(
  61. ["openssl", "base64", "-A"], input=digest.stdout, stdout=subprocess.PIPE
  62. )
  63. formatted = subprocess.run(
  64. ["sed", "s/^/sha256-/"], input=base64.stdout, stdout=subprocess.PIPE
  65. )
  66. return formatted.stdout.decode("utf-8")
  67. def toolchain_hashes(stable, nightly) -> dict[str, dict[str, str]]:
  68. """
  69. Generates the hashes for our Bazel toolchains.
  70. Fetches the specified Stable and Nightly version of the Rust compiler from our toolchains repo,
  71. hashes the downloaded files, and returns a properly formatted dictionary for Bazel.
  72. """
  73. ARCHS = [
  74. "aarch64-apple-darwin",
  75. "aarch64-unknown-linux-gnu",
  76. "x86_64-apple-darwin",
  77. "x86_64-unknown-linux-gnu",
  78. ]
  79. TOOLS = [
  80. "cargo",
  81. "clippy",
  82. "llvm-tools",
  83. "rust-std",
  84. "rustc",
  85. ]
  86. VERSIONS = {"stable": stable, "nightly": nightly}
  87. URL_TEMPLATE = "https://github.com/MaterializeInc/toolchains/releases/download/rust-{version}/{tool}-{channel}-{arch}.tar.zst"
  88. hashes = {}
  89. for arch in ARCHS:
  90. hashes[arch] = {}
  91. for channel, version in VERSIONS.items():
  92. hashes[arch][channel] = {}
  93. for tool in TOOLS:
  94. if channel == "stable":
  95. url_channel = version
  96. else:
  97. url_channel = channel
  98. print(f"Processing {tool} {version} {arch}")
  99. # Download the file.
  100. url = URL_TEMPLATE.format(
  101. version=version, tool=tool, channel=url_channel, arch=arch
  102. )
  103. response = requests.get(url, stream=True)
  104. response.raise_for_status()
  105. # Hash the response.
  106. sha256_hash = hashlib.sha256()
  107. for chunk in response.iter_content(chunk_size=8192):
  108. if chunk:
  109. sha256_hash.update(chunk)
  110. hashes[arch][channel][tool] = sha256_hash.hexdigest()
  111. return hashes
  112. def remote_cache_arg(config: BuildConfig) -> list[str]:
  113. """List of arguments that could possibly enable use of a remote cache."""
  114. ci_remote = os.getenv("CI_BAZEL_REMOTE_CACHE")
  115. config_remote = config.bazel.remote_cache
  116. if ci_remote:
  117. remote_cache = ci_remote
  118. elif config_remote:
  119. bazel_remote = RemoteCache(config_remote)
  120. remote_cache = bazel_remote.address()
  121. else:
  122. remote_cache = None
  123. if remote_cache:
  124. return [f"--remote_cache={remote_cache}"]
  125. else:
  126. return []
  127. class RemoteCache:
  128. """The remote cache we're conecting to."""
  129. def __init__(self, value: str):
  130. if value.startswith("teleport"):
  131. app_name = value.split(":")[1]
  132. self.kind = RemoteCacheKind.teleport
  133. self.data = app_name
  134. else:
  135. self.kind = RemoteCacheKind.normal
  136. self.data = value
  137. def address(self) -> str:
  138. """Address for connecting to this remote cache."""
  139. if self.kind == RemoteCacheKind.normal:
  140. return self.data
  141. else:
  142. TeleportProxy.spawn(self.data, "6889")
  143. return "http://localhost:6889"
  144. class RemoteCacheKind(Enum):
  145. """Kind of remote cache we're connecting to."""
  146. teleport = "teleport"
  147. """Connecting to a remote cache through a teleport proxy."""
  148. normal = "normal"
  149. """An HTTP address for the cache."""
  150. def __str__(self):
  151. return self.value