123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303 |
- # Copyright Materialize, Inc. and contributors. All rights reserved.
- #
- # Use of this software is governed by the Business Source License
- # included in the LICENSE file at the root of this repository.
- #
- # As of the Change Date specified in that file, in accordance with
- # the Business Source License, use of this software will be governed
- # by the Apache License, Version 2.0.
- from collections import defaultdict
- from dataclasses import dataclass
- from datetime import datetime
- from materialize.buildkite_insights.util.data_io import (
- SimpleFilePath,
- read_results_from_file,
- )
- # https://instances.vantage.sh/aws/ec2
- aws_instance_cost = {
- "c5.2xlarge": 0.340,
- "c5.12xlarge": 2.040,
- "c5a.2xlarge": 0.308,
- "c5a.8xlarge": 1.232,
- "c6a.large": 0.0765,
- "c6a.xlarge": 0.153,
- "c6a.2xlarge": 0.306,
- "c6a.4xlarge": 0.612,
- "c6a.8xlarge": 1.224,
- "c6a.12xlarge": 1.836,
- "c7a.large": 0.1026,
- "c7a.xlarge": 0.2053,
- "c7a.2xlarge": 0.4106,
- "c7a.4xlarge": 0.8211,
- "c7a.8xlarge": 1.642,
- "c7a.12xlarge": 2.463,
- "c7g.large": 0.0725,
- "c8g.large": 0.0798,
- "c6g.xlarge": 0.1360,
- "c7g.xlarge": 0.1450,
- "c8g.xlarge": 0.1595,
- "c6g.2xlarge": 0.272,
- "c7g.2xlarge": 0.290,
- "c8g.2xlarge": 0.319,
- "c6g.4xlarge": 0.544,
- "c7g.4xlarge": 0.580,
- "c8g.4xlarge": 0.6381,
- "c6g.8xlarge": 1.088,
- "c6g.12xlarge": 1.632,
- "c7g.12xlarge": 1.740,
- "c8g.12xlarge": 1.914,
- "c7g.16xlarge": 2.320,
- "c8g.16xlarge": 2.552,
- "m5.4xlarge": 0.768,
- "m5a.8xlarge": 1.376,
- "m6a.8xlarge": 1.382,
- "m7a.8xlarge": 1.855,
- "m6a.12xlarge": 2.074,
- "m7a.12xlarge": 2.782,
- "m6a.16xlarge": 2.7648,
- "m7a.16xlarge": 3.7094,
- "m6a.24xlarge": 4.1472,
- "m7a.24xlarge": 5.5642,
- "m6a.32xlarge": 5.5296,
- "m7a.32xlarge": 7.4189,
- "m6a.48xlarge": 8.2944,
- "m7a.48xlarge": 11.1283,
- "m6g.4xlarge": 0.616,
- "m6g.8xlarge": 1.232,
- "m7g.8xlarge": 1.306,
- "m6g.12xlarge": 1.848,
- "m7g.12xlarge": 1.958,
- "m7g.16xlarge": 2.6112,
- "m8g.12xlarge": 2.154,
- "m8g.16xlarge": 2.8723,
- "m8g.24xlarge": 4.3085,
- "m8g.48xlarge": 8.617,
- "m6i.4xlarge": 0.768,
- "m6i.12xlarge": 2.304,
- "m7i.8xlarge": 1.613,
- "r7g.8xlarge": 1.714,
- "r8g.8xlarge": 1.885,
- }
- # https://www.hetzner.com/cloud/
- hetzner_instance_cost = {
- "aarch64-2cpu-4gb": 0.0059,
- "aarch64-4cpu-8gb": 0.0101,
- "aarch64-8cpu-16gb": 0.0202,
- "aarch64-16cpu-32gb": 0.0395,
- "x86-64-2cpu-4gb": 0.0060,
- "x86-64-4cpu-8gb": 0.0113,
- "x86-64-8cpu-16gb": 0.0273,
- "x86-64-16cpu-32gb": 0.0540,
- "x86-64-dedi-2cpu-8gb": 0.0200,
- "x86-64-dedi-4cpu-16gb": 0.0392,
- "x86-64-dedi-8cpu-32gb": 0.0777,
- "x86-64-dedi-16cpu-64gb": 0.1546,
- "x86-64-dedi-32cpu-128gb": 0.3085,
- "x86-64-dedi-48cpu-192gb": 0.4623,
- "x86-64": 0, # local experiments
- }
- @dataclass
- class Failures:
- failures: int
- total: int
- def main() -> None:
- job_costs = defaultdict(lambda: defaultdict(float))
- pipeline_costs = defaultdict(lambda: defaultdict(float))
- job_counts = defaultdict(lambda: defaultdict(int))
- pipeline_counts = defaultdict(lambda: defaultdict(int))
- job_failures = defaultdict(
- lambda: defaultdict(lambda: Failures(failures=0, total=0))
- )
- job_to_pipeline = {}
- build_durations = defaultdict(lambda: defaultdict(float))
- build_counts = defaultdict(lambda: defaultdict(int))
- data = read_results_from_file(SimpleFilePath("data.json"))
- for build in data:
- pipeline_name = build["pipeline"]["name"]
- created = datetime.fromisoformat(build["created_at"])
- year_month = f"{created.year}-{created.month:02}"
- pipeline_counts[year_month][pipeline_name] += 1
- if build["started_at"] and build["finished_at"]:
- if not build["state"] in ("passed", "failed"):
- continue
- pipeline = build["pipeline"]["slug"]
- if pipeline not in ("test", "nightly", "release-qualification"):
- continue
- if "CI_SANITIZER" in build["env"]:
- continue
- if "CI_COVERAGE_ENABLED" in build["env"]:
- continue
- if any(job.get("retries_count") for job in build["jobs"]):
- continue
- year_month_day = f"{created.year}-{created.month:02}-{created.day:02}"
- start = datetime.fromisoformat(build["started_at"])
- finished = datetime.fromisoformat(build["finished_at"])
- duration = (finished - start).total_seconds()
- is_main = build["branch"] == "main"
- with_build = any(
- job.get("step_key")
- in (
- "build-x86_64",
- "build-aarch64",
- "build-x86_64-lto",
- "build-aarch64-lto",
- )
- and job["state"] == "passed"
- for job in build["jobs"]
- )
- build_durations[(pipeline, is_main, with_build)][year_month_day] += duration
- build_counts[(pipeline, is_main, with_build)][year_month_day] += 1
- for job in build["jobs"]:
- if (
- not job.get("agent")
- or not job.get("started_at")
- or not job.get("finished_at")
- ):
- continue
- job_name = job["name"] or "None"
- if not job_name in job_to_pipeline:
- job_to_pipeline[job_name] = pipeline_name
- for metadata in job["agent"]["meta_data"]:
- if metadata.startswith("aws:instance-type="):
- cost = aws_instance_cost[
- metadata.removeprefix("aws:instance-type=")
- ]
- break
- if metadata.startswith("queue=hetzner-"):
- name = metadata.removeprefix("queue=hetzner-")
- if "gb-" in name:
- name = name[: name.index("gb-") + 2]
- cost = hetzner_instance_cost[name]
- break
- else:
- # Can't calculate cost for mac-aarch64
- cost = 0
- start = datetime.fromisoformat(job["started_at"])
- finished = datetime.fromisoformat(job["finished_at"])
- duration = (finished - start).total_seconds()
- total_cost = cost * duration / 3600
- job_costs[year_month][job_name] += total_cost
- pipeline_costs[year_month][pipeline_name] += total_cost
- job_counts[year_month][job_name] += 1
- if job["state"] in ("failed", "broken"):
- job_failures[year_month][job_name].failures += 1
- if job["state"] in ("passed", "failed", "broken"):
- job_failures[year_month][job_name].total += 1
- def print_stats_day(
- name,
- data,
- print_fn=lambda x, key: "" if key not in x else f"{x.get(key, 0):.2f}",
- ):
- keys = set()
- for ps in data.values():
- for p in ps.keys():
- keys.add(p)
- keys = sorted(keys)
- year_month_days = sorted(data.keys())
- additional_keys = [name]
- print(
- ",".join(
- additional_keys
- + [
- f"{ymd} ({'main' if is_main else 'PR'} {'with build' if with_build else 'without build'})"
- for ymd, is_main, with_build in year_month_days
- ]
- )
- )
- for key in keys:
- additional_values = [f'"{key}"']
- print(
- ",".join(
- additional_values
- + [print_fn(data[day], key) for day in year_month_days]
- )
- )
- def print_stats(
- name,
- data,
- include_pipeline=False,
- print_fn=lambda x, key: f"{x.get(key, 0):.2f}",
- ):
- keys = set()
- for ps in data.values():
- for p in ps.keys():
- keys.add(p)
- keys = sorted(keys)
- year_months = sorted(data.keys(), reverse=True)
- additional_keys = [name] + (["Pipeline"] if include_pipeline else [])
- print(",".join(additional_keys + year_months))
- for key in keys:
- additional_values = [f'"{key}"'] + (
- [f'"{job_to_pipeline[key]}"'] if include_pipeline else []
- )
- print(
- ",".join(
- additional_values
- + [print_fn(data[year_month], key) for year_month in year_months]
- )
- )
- job_cost_per_run = {
- key: {key2: value2 / job_counts[key][key2] for key2, value2 in value.items()}
- for key, value in job_costs.items()
- }
- pipeline_cost_per_run = {
- key: {
- key2: value2 / pipeline_counts[key][key2] for key2, value2 in value.items()
- }
- for key, value in pipeline_costs.items()
- }
- build_durations_per_run = {
- key: {key2: value2 / build_counts[key][key2] for key2, value2 in value.items()}
- for key, value in build_durations.items()
- }
- print_stats_day("Runtime [s/run]", build_durations_per_run)
- print()
- print_stats("Pipeline [$]", pipeline_costs)
- print()
- print_stats("Pipeline [$/run]", pipeline_cost_per_run)
- print()
- print_stats("Job [$]", job_costs, include_pipeline=True)
- print()
- print_stats("Job [$/run]", job_cost_per_run, include_pipeline=True)
- print()
- print_stats(
- "Job [% failed]",
- job_failures,
- include_pipeline=True,
- print_fn=lambda x, key: (
- f"{x[key].failures * 100 / x[key].total:.2f}" if x[key].total else ""
- ),
- )
- if __name__ == "__main__":
- main()
|