upload_debug_symbols_to_polarsignals.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. import argparse
  10. import os
  11. import subprocess
  12. import tempfile
  13. import urllib.request
  14. from pathlib import Path
  15. from tempfile import _TemporaryFileWrapper
  16. import boto3
  17. from materialize import mzbuild, spawn, ui
  18. from materialize.ci_util.upload_debug_symbols_to_s3 import (
  19. DEBUGINFO_BINS,
  20. DEBUGINFO_S3_BUCKET,
  21. )
  22. from materialize.mzbuild import Repository, ResolvedImage
  23. from materialize.rustc_flags import Sanitizer
  24. from materialize.xcompile import Arch
  25. # Upload debuginfo and sources to Polar Signals (our continuous
  26. # profiling provider).
  27. # This script is only invoked for build tags. Polar Signals is
  28. # expensive, so we don't want to upload development or unstable builds
  29. # that won't ever be profiled by Polar Signals.
  30. DEBUGINFO_URL = "https://debuginfo.dev.materialize.com"
  31. def main() -> None:
  32. parser = argparse.ArgumentParser(
  33. prog="upload_debug_symbols_to_polarsignals",
  34. description="""Upload debug symbols to Polar Signals.""",
  35. )
  36. parser.add_argument(
  37. "--arch",
  38. help="the architecture of the binaries to upload",
  39. choices=[str(Arch.X86_64), str(Arch.AARCH64)],
  40. default=str(Arch.host()),
  41. )
  42. parser.add_argument(
  43. "--protocol",
  44. help="the source for downloading debug symbols",
  45. choices=["http", "s3"],
  46. default="s3",
  47. )
  48. parser.add_argument(
  49. "--token",
  50. help="the Polar Signals API token",
  51. default=os.getenv("POLAR_SIGNALS_API_TOKEN"),
  52. )
  53. args = parser.parse_intermixed_args()
  54. coverage = ui.env_is_truthy("CI_COVERAGE_ENABLED")
  55. sanitizer = Sanitizer[os.getenv("CI_SANITIZER", "none")]
  56. bazel = ui.env_is_truthy("CI_BAZEL_BUILD")
  57. bazel_remote_cache = os.getenv("CI_BAZEL_REMOTE_CACHE")
  58. bazel_lto = ui.env_is_truthy("CI_BAZEL_LTO")
  59. repo = mzbuild.Repository(
  60. Path("."),
  61. coverage=coverage,
  62. sanitizer=sanitizer,
  63. bazel=bazel,
  64. bazel_remote_cache=bazel_remote_cache,
  65. bazel_lto=bazel_lto,
  66. arch=Arch(args.arch),
  67. )
  68. collect_and_upload_debug_data_to_polarsignals(
  69. repo, DEBUGINFO_BINS, args.protocol, args.token
  70. )
  71. def collect_and_upload_debug_data_to_polarsignals(
  72. repo: mzbuild.Repository,
  73. debuginfo_bins: set[str],
  74. protocol: str,
  75. polar_signals_api_token: str,
  76. ) -> None:
  77. ui.section("Collecting and uploading debug data to PolarSignals...")
  78. relevant_images_by_name = get_build_images(repo, debuginfo_bins)
  79. print(f"Considered images are: {relevant_images_by_name.keys()}")
  80. for image_name, image in relevant_images_by_name.items():
  81. remove_docker_container_if_exists(image_name)
  82. container_name = create_docker_container(image_name, image)
  83. print(
  84. f"Created docker container from image {image_name} (spec: {image.spec()})"
  85. )
  86. path_to_binary = copy_binary_from_image(image_name, container_name)
  87. print(f"Copied binary from image {image_name}")
  88. build_id = get_build_id(repo, path_to_binary)
  89. print(f"{image_name} has build_id {build_id}")
  90. if protocol == "s3":
  91. bin_path, dbg_path = fetch_debug_symbols_from_s3(build_id)
  92. elif protocol == "http":
  93. bin_path, dbg_path = fetch_debug_symbols_from_http(build_id)
  94. else:
  95. raise ValueError(f"Unknown protocol: {protocol}")
  96. print(f"Fetched debug symbols of {image_name} from {protocol}")
  97. upload_completed = upload_debug_data_to_polarsignals(
  98. repo, build_id, bin_path, dbg_path, polar_signals_api_token
  99. )
  100. if upload_completed:
  101. print(f"Uploaded debug symbols of {image_name} to PolarSignals")
  102. else:
  103. print(f"Did not upload debug symbols of {image_name} to PolarSignals")
  104. def get_build_images(
  105. repo: mzbuild.Repository, image_names: set[str]
  106. ) -> dict[str, ResolvedImage]:
  107. relevant_images = []
  108. for image_name, image in repo.images.items():
  109. if image_name in image_names:
  110. relevant_images.append(image)
  111. dependency_set = repo.resolve_dependencies(relevant_images)
  112. resolved_images = dict()
  113. for image_name in image_names:
  114. resolved_images[image_name] = dependency_set[image_name]
  115. return resolved_images
  116. def remove_docker_container_if_exists(image_name: str) -> None:
  117. try:
  118. subprocess.run(["docker", "rm", image_name], check=True)
  119. except subprocess.CalledProcessError as e:
  120. print(f"Removing container failed, ignoring: {e}")
  121. def create_docker_container(image_name: str, image: ResolvedImage) -> str:
  122. try:
  123. image_spec = image.spec()
  124. docker_container_name = image_name
  125. command = ["docker", "create", "--name", docker_container_name, image_spec]
  126. subprocess.run(command, check=True)
  127. return docker_container_name
  128. except subprocess.CalledProcessError as e:
  129. if "manifest unknown" in str(e):
  130. raise RuntimeError(f"Docker image not found: {image.spec()}")
  131. print(f"Error creating docker container: {e}")
  132. raise e
  133. def copy_binary_from_image(image_name: str, docker_container_name: str) -> str:
  134. try:
  135. source_path = f"/usr/local/bin/{image_name}"
  136. target_path = f"./{image_name}"
  137. command = [
  138. "docker",
  139. "cp",
  140. f"{docker_container_name}:{source_path}",
  141. target_path,
  142. ]
  143. subprocess.run(command, check=True)
  144. return target_path
  145. except subprocess.CalledProcessError as e:
  146. print(f"Error copying file: {e}")
  147. raise e
  148. def get_build_id(repo: mzbuild.Repository, path_to_binary: str) -> str:
  149. return spawn.run_with_retries(
  150. lambda: spawn.capture(
  151. ["parca-debuginfo", "buildid", path_to_binary],
  152. cwd=repo.rd.root,
  153. ).strip()
  154. )
  155. def fetch_debug_symbols_from_http(build_id: str) -> tuple[str, str]:
  156. file_names = [
  157. "executable",
  158. "debuginfo",
  159. ]
  160. downloaded_file_paths = dict()
  161. for file_name in file_names:
  162. key = f"buildid/{build_id}/{file_name}"
  163. target_file_name = key.replace("/", "_")
  164. print(
  165. f"Downloading {file_name} from {DEBUGINFO_URL}/{key} to {target_file_name}"
  166. )
  167. urllib.request.urlretrieve(f"{DEBUGINFO_URL}/{key}", target_file_name)
  168. downloaded_file_paths[file_name] = target_file_name
  169. return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"]
  170. def fetch_debug_symbols_from_s3(build_id: str) -> tuple[str, str]:
  171. s3 = boto3.client("s3")
  172. file_names = [
  173. "executable",
  174. "debuginfo",
  175. ]
  176. downloaded_file_paths = dict()
  177. for file_name in file_names:
  178. key = f"buildid/{build_id}/{file_name}"
  179. target_file_name = key.replace("/", "_")
  180. print(
  181. f"Downloading {file_name} from s3://{DEBUGINFO_S3_BUCKET}/{key} to {target_file_name}"
  182. )
  183. with open(target_file_name, "wb") as data:
  184. s3.download_fileobj(DEBUGINFO_S3_BUCKET, key, data)
  185. downloaded_file_paths[file_name] = target_file_name
  186. return downloaded_file_paths["executable"], downloaded_file_paths["debuginfo"]
  187. def upload_debug_data_to_polarsignals(
  188. repo: Repository,
  189. build_id: str,
  190. bin_path: Path | str,
  191. dbg_path: Path | str,
  192. polar_signals_api_token: str,
  193. ) -> bool:
  194. _upload_debug_info_to_polarsignals(repo, dbg_path, polar_signals_api_token)
  195. with tempfile.NamedTemporaryFile() as tarball:
  196. _create_source_tarball(repo, bin_path, tarball)
  197. return _upload_source_tarball_to_polarsignals(
  198. repo, bin_path, tarball, build_id, polar_signals_api_token
  199. )
  200. def _upload_debug_info_to_polarsignals(
  201. repo: mzbuild.Repository, dbg_path: Path | str, polar_signals_api_token: str
  202. ) -> None:
  203. print(f"Uploading debuginfo for {dbg_path} to Polar Signals...")
  204. spawn.run_with_retries(
  205. lambda: spawn.runv(
  206. [
  207. "parca-debuginfo",
  208. "upload",
  209. "--store-address=grpc.polarsignals.com:443",
  210. "--no-extract",
  211. dbg_path,
  212. ],
  213. cwd=repo.rd.root,
  214. env=dict(os.environ, PARCA_DEBUGINFO_BEARER_TOKEN=polar_signals_api_token),
  215. )
  216. )
  217. def _create_source_tarball(
  218. repo: mzbuild.Repository, bin_path: Path | str, tarball: _TemporaryFileWrapper
  219. ) -> None:
  220. print(f"Constructing source tarball for {bin_path}...")
  221. p1 = subprocess.Popen(
  222. ["llvm-dwarfdump", "--show-sources", bin_path],
  223. stdout=subprocess.PIPE,
  224. )
  225. p2 = subprocess.Popen(
  226. [
  227. "tar",
  228. "-cf",
  229. tarball.name,
  230. "--zstd",
  231. "-T",
  232. "-",
  233. "--ignore-failed-read",
  234. ],
  235. stdin=p1.stdout,
  236. # Suppress noisy warnings about missing files.
  237. stdout=subprocess.DEVNULL,
  238. stderr=subprocess.DEVNULL,
  239. )
  240. # This causes p1 to receive SIGPIPE if p2 exits early,
  241. # like in the shell.
  242. assert p1.stdout
  243. p1.stdout.close()
  244. for p in [p1, p2]:
  245. if p.wait():
  246. raise subprocess.CalledProcessError(p.returncode, p.args)
  247. def _upload_source_tarball_to_polarsignals(
  248. repo: mzbuild.Repository,
  249. bin_path: Path | str,
  250. tarball: _TemporaryFileWrapper,
  251. build_id: str,
  252. polar_signals_api_token: str,
  253. ) -> bool:
  254. print(f"Uploading source tarball for {bin_path} to Polar Signals...")
  255. output = spawn.run_with_retries(
  256. lambda: spawn.capture(
  257. [
  258. "parca-debuginfo",
  259. "upload",
  260. "--store-address=grpc.polarsignals.com:443",
  261. "--type=sources",
  262. f"--build-id={build_id}",
  263. tarball.name,
  264. ],
  265. cwd=repo.rd.root,
  266. env=dict(
  267. os.environ,
  268. PARCA_DEBUGINFO_BEARER_TOKEN=polar_signals_api_token,
  269. ),
  270. ).strip()
  271. )
  272. if "Skipping upload of" in output:
  273. return False
  274. return True
  275. if __name__ == "__main__":
  276. main()