extract_stats.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. from collections import defaultdict
  10. from dataclasses import dataclass
  11. from datetime import datetime
  12. from materialize.buildkite_insights.util.data_io import (
  13. SimpleFilePath,
  14. read_results_from_file,
  15. )
  16. # https://instances.vantage.sh/aws/ec2
  17. aws_instance_cost = {
  18. "c5.2xlarge": 0.340,
  19. "c5.12xlarge": 2.040,
  20. "c5a.2xlarge": 0.308,
  21. "c5a.8xlarge": 1.232,
  22. "c6a.large": 0.0765,
  23. "c6a.xlarge": 0.153,
  24. "c6a.2xlarge": 0.306,
  25. "c6a.4xlarge": 0.612,
  26. "c6a.8xlarge": 1.224,
  27. "c6a.12xlarge": 1.836,
  28. "c7a.large": 0.1026,
  29. "c7a.xlarge": 0.2053,
  30. "c7a.2xlarge": 0.4106,
  31. "c7a.4xlarge": 0.8211,
  32. "c7a.8xlarge": 1.642,
  33. "c7a.12xlarge": 2.463,
  34. "c7g.large": 0.0725,
  35. "c8g.large": 0.0798,
  36. "c6g.xlarge": 0.1360,
  37. "c7g.xlarge": 0.1450,
  38. "c8g.xlarge": 0.1595,
  39. "c6g.2xlarge": 0.272,
  40. "c7g.2xlarge": 0.290,
  41. "c8g.2xlarge": 0.319,
  42. "c6g.4xlarge": 0.544,
  43. "c7g.4xlarge": 0.580,
  44. "c8g.4xlarge": 0.6381,
  45. "c6g.8xlarge": 1.088,
  46. "c6g.12xlarge": 1.632,
  47. "c7g.12xlarge": 1.740,
  48. "c8g.12xlarge": 1.914,
  49. "c7g.16xlarge": 2.320,
  50. "c8g.16xlarge": 2.552,
  51. "m5.4xlarge": 0.768,
  52. "m5a.8xlarge": 1.376,
  53. "m6a.8xlarge": 1.382,
  54. "m7a.8xlarge": 1.855,
  55. "m6a.12xlarge": 2.074,
  56. "m7a.12xlarge": 2.782,
  57. "m6a.16xlarge": 2.7648,
  58. "m7a.16xlarge": 3.7094,
  59. "m6a.24xlarge": 4.1472,
  60. "m7a.24xlarge": 5.5642,
  61. "m6a.32xlarge": 5.5296,
  62. "m7a.32xlarge": 7.4189,
  63. "m6a.48xlarge": 8.2944,
  64. "m7a.48xlarge": 11.1283,
  65. "m6g.4xlarge": 0.616,
  66. "m6g.8xlarge": 1.232,
  67. "m7g.8xlarge": 1.306,
  68. "m6g.12xlarge": 1.848,
  69. "m7g.12xlarge": 1.958,
  70. "m7g.16xlarge": 2.6112,
  71. "m8g.12xlarge": 2.154,
  72. "m8g.16xlarge": 2.8723,
  73. "m8g.24xlarge": 4.3085,
  74. "m8g.48xlarge": 8.617,
  75. "m6i.4xlarge": 0.768,
  76. "m6i.12xlarge": 2.304,
  77. "m7i.8xlarge": 1.613,
  78. "r7g.8xlarge": 1.714,
  79. "r8g.8xlarge": 1.885,
  80. }
  81. # https://www.hetzner.com/cloud/
  82. hetzner_instance_cost = {
  83. "aarch64-2cpu-4gb": 0.0059,
  84. "aarch64-4cpu-8gb": 0.0101,
  85. "aarch64-8cpu-16gb": 0.0202,
  86. "aarch64-16cpu-32gb": 0.0395,
  87. "x86-64-2cpu-4gb": 0.0060,
  88. "x86-64-4cpu-8gb": 0.0113,
  89. "x86-64-8cpu-16gb": 0.0273,
  90. "x86-64-16cpu-32gb": 0.0540,
  91. "x86-64-dedi-2cpu-8gb": 0.0200,
  92. "x86-64-dedi-4cpu-16gb": 0.0392,
  93. "x86-64-dedi-8cpu-32gb": 0.0777,
  94. "x86-64-dedi-16cpu-64gb": 0.1546,
  95. "x86-64-dedi-32cpu-128gb": 0.3085,
  96. "x86-64-dedi-48cpu-192gb": 0.4623,
  97. "x86-64": 0, # local experiments
  98. }
  99. @dataclass
  100. class Failures:
  101. failures: int
  102. total: int
  103. def main() -> None:
  104. job_costs = defaultdict(lambda: defaultdict(float))
  105. pipeline_costs = defaultdict(lambda: defaultdict(float))
  106. job_counts = defaultdict(lambda: defaultdict(int))
  107. pipeline_counts = defaultdict(lambda: defaultdict(int))
  108. job_failures = defaultdict(
  109. lambda: defaultdict(lambda: Failures(failures=0, total=0))
  110. )
  111. job_to_pipeline = {}
  112. build_durations = defaultdict(lambda: defaultdict(float))
  113. build_counts = defaultdict(lambda: defaultdict(int))
  114. data = read_results_from_file(SimpleFilePath("data.json"))
  115. for build in data:
  116. pipeline_name = build["pipeline"]["name"]
  117. created = datetime.fromisoformat(build["created_at"])
  118. year_month = f"{created.year}-{created.month:02}"
  119. pipeline_counts[year_month][pipeline_name] += 1
  120. if build["started_at"] and build["finished_at"]:
  121. if not build["state"] in ("passed", "failed"):
  122. continue
  123. pipeline = build["pipeline"]["slug"]
  124. if pipeline not in ("test", "nightly", "release-qualification"):
  125. continue
  126. if "CI_SANITIZER" in build["env"]:
  127. continue
  128. if "CI_COVERAGE_ENABLED" in build["env"]:
  129. continue
  130. if any(job.get("retries_count") for job in build["jobs"]):
  131. continue
  132. year_month_day = f"{created.year}-{created.month:02}-{created.day:02}"
  133. start = datetime.fromisoformat(build["started_at"])
  134. finished = datetime.fromisoformat(build["finished_at"])
  135. duration = (finished - start).total_seconds()
  136. is_main = build["branch"] == "main"
  137. with_build = any(
  138. job.get("step_key")
  139. in (
  140. "build-x86_64",
  141. "build-aarch64",
  142. "build-x86_64-lto",
  143. "build-aarch64-lto",
  144. )
  145. and job["state"] == "passed"
  146. for job in build["jobs"]
  147. )
  148. build_durations[(pipeline, is_main, with_build)][year_month_day] += duration
  149. build_counts[(pipeline, is_main, with_build)][year_month_day] += 1
  150. for job in build["jobs"]:
  151. if (
  152. not job.get("agent")
  153. or not job.get("started_at")
  154. or not job.get("finished_at")
  155. ):
  156. continue
  157. job_name = job["name"] or "None"
  158. if not job_name in job_to_pipeline:
  159. job_to_pipeline[job_name] = pipeline_name
  160. for metadata in job["agent"]["meta_data"]:
  161. if metadata.startswith("aws:instance-type="):
  162. cost = aws_instance_cost[
  163. metadata.removeprefix("aws:instance-type=")
  164. ]
  165. break
  166. if metadata.startswith("queue=hetzner-"):
  167. name = metadata.removeprefix("queue=hetzner-")
  168. if "gb-" in name:
  169. name = name[: name.index("gb-") + 2]
  170. cost = hetzner_instance_cost[name]
  171. break
  172. else:
  173. # Can't calculate cost for mac-aarch64
  174. cost = 0
  175. start = datetime.fromisoformat(job["started_at"])
  176. finished = datetime.fromisoformat(job["finished_at"])
  177. duration = (finished - start).total_seconds()
  178. total_cost = cost * duration / 3600
  179. job_costs[year_month][job_name] += total_cost
  180. pipeline_costs[year_month][pipeline_name] += total_cost
  181. job_counts[year_month][job_name] += 1
  182. if job["state"] in ("failed", "broken"):
  183. job_failures[year_month][job_name].failures += 1
  184. if job["state"] in ("passed", "failed", "broken"):
  185. job_failures[year_month][job_name].total += 1
  186. def print_stats_day(
  187. name,
  188. data,
  189. print_fn=lambda x, key: "" if key not in x else f"{x.get(key, 0):.2f}",
  190. ):
  191. keys = set()
  192. for ps in data.values():
  193. for p in ps.keys():
  194. keys.add(p)
  195. keys = sorted(keys)
  196. year_month_days = sorted(data.keys())
  197. additional_keys = [name]
  198. print(
  199. ",".join(
  200. additional_keys
  201. + [
  202. f"{ymd} ({'main' if is_main else 'PR'} {'with build' if with_build else 'without build'})"
  203. for ymd, is_main, with_build in year_month_days
  204. ]
  205. )
  206. )
  207. for key in keys:
  208. additional_values = [f'"{key}"']
  209. print(
  210. ",".join(
  211. additional_values
  212. + [print_fn(data[day], key) for day in year_month_days]
  213. )
  214. )
  215. def print_stats(
  216. name,
  217. data,
  218. include_pipeline=False,
  219. print_fn=lambda x, key: f"{x.get(key, 0):.2f}",
  220. ):
  221. keys = set()
  222. for ps in data.values():
  223. for p in ps.keys():
  224. keys.add(p)
  225. keys = sorted(keys)
  226. year_months = sorted(data.keys(), reverse=True)
  227. additional_keys = [name] + (["Pipeline"] if include_pipeline else [])
  228. print(",".join(additional_keys + year_months))
  229. for key in keys:
  230. additional_values = [f'"{key}"'] + (
  231. [f'"{job_to_pipeline[key]}"'] if include_pipeline else []
  232. )
  233. print(
  234. ",".join(
  235. additional_values
  236. + [print_fn(data[year_month], key) for year_month in year_months]
  237. )
  238. )
  239. job_cost_per_run = {
  240. key: {key2: value2 / job_counts[key][key2] for key2, value2 in value.items()}
  241. for key, value in job_costs.items()
  242. }
  243. pipeline_cost_per_run = {
  244. key: {
  245. key2: value2 / pipeline_counts[key][key2] for key2, value2 in value.items()
  246. }
  247. for key, value in pipeline_costs.items()
  248. }
  249. build_durations_per_run = {
  250. key: {key2: value2 / build_counts[key][key2] for key2, value2 in value.items()}
  251. for key, value in build_durations.items()
  252. }
  253. print_stats_day("Runtime [s/run]", build_durations_per_run)
  254. print()
  255. print_stats("Pipeline [$]", pipeline_costs)
  256. print()
  257. print_stats("Pipeline [$/run]", pipeline_cost_per_run)
  258. print()
  259. print_stats("Job [$]", job_costs, include_pipeline=True)
  260. print()
  261. print_stats("Job [$/run]", job_cost_per_run, include_pipeline=True)
  262. print()
  263. print_stats(
  264. "Job [% failed]",
  265. job_failures,
  266. include_pipeline=True,
  267. print_fn=lambda x, key: (
  268. f"{x[key].failures * 100 / x[key].total:.2f}" if x[key].total else ""
  269. ),
  270. )
  271. if __name__ == "__main__":
  272. main()