123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- # Copyright Materialize, Inc. and contributors. All rights reserved.
- #
- # Use of this software is governed by the Business Source License
- # included in the LICENSE file at the root of this repository.
- #
- # As of the Change Date specified in that file, in accordance with
- # the Business Source License, use of this software will be governed
- # by the Apache License, Version 2.0.
- from statistics import mean, variance
- from typing import Generic, TypeVar
- from materialize.feature_benchmark.benchmark_result import BenchmarkScenarioResult
- from materialize.feature_benchmark.benchmark_result_evaluator import (
- RelativeThresholdEvaluator,
- )
- from materialize.feature_benchmark.measurement import (
- MeasurementType,
- )
- from materialize.feature_benchmark.scenario import Scenario
- from materialize.feature_benchmark.scenario_version import ScenarioVersion
- T = TypeVar("T", bound=int | float)
- class ReportMeasurement(Generic[T]):
- result: T | None
- min: T | None
- max: T | None
- mean: T | None
- variance: float | None
- def __init__(self, points: list[T | None]):
- self.result = points[0]
- set_points = [point for point in points if point is not None]
- if self.result is not None and set_points:
- self.min = min(set_points)
- self.max = max(set_points)
- self.mean = mean(set_points)
- self.variance = variance(set_points) if len(set_points) > 1 else None
- else:
- self.min = None
- self.max = None
- self.mean = None
- self.variance = None
- class Report:
- def __init__(self, cycle_number: int) -> None:
- self.cycle_number = cycle_number
- """ 1-based cycle number. """
- self._result_by_scenario_name: dict[str, BenchmarkScenarioResult] = dict()
- def add_scenario_result(self, result: BenchmarkScenarioResult) -> None:
- assert (
- result.scenario_name not in self._result_by_scenario_name.keys()
- ), f"Result of scenario {result.scenario_name} already present"
- self._result_by_scenario_name[result.scenario_name] = result
- def get_scenario_names(self) -> list[str]:
- return list(self._result_by_scenario_name.keys())
- def as_string(self, use_colors: bool, limit_to_scenario: str | None = None) -> str:
- output_lines = []
- output_lines.append(
- f"{'NAME':<35} | {'TYPE':<15} | {'THIS':^15} | {'OTHER':^15} | {'UNIT':^6} | {'THRESHOLD':^10} | {'Regression?':^13} | 'THIS' is"
- )
- output_lines.append("-" * 152)
- for scenario_result in self._result_by_scenario_name.values():
- evaluator = RelativeThresholdEvaluator(scenario_result.scenario_class)
- for metric in scenario_result.metrics:
- if not metric.has_values():
- continue
- if (
- limit_to_scenario is not None
- and scenario_result.scenario_name != limit_to_scenario
- ):
- continue
- regression = "!!YES!!" if evaluator.is_regression(metric) else "no"
- threshold = f"{(evaluator.get_threshold(metric) * 100):.0f}%"
- output_lines.append(
- f"{scenario_result.scenario_name:<35} | {metric.measurement_type:<15} | {metric.this_as_str():>15} | {metric.other_as_str():>15} | {metric.unit():^6} | {threshold:^10} | {regression:^13} | {evaluator.human_readable(metric, use_colors)}"
- )
- return "\n".join(output_lines)
- def __str__(self) -> str:
- return self.as_string(use_colors=False)
- def measurements_of_this(
- self, scenario_name: str
- ) -> dict[MeasurementType, ReportMeasurement]:
- scenario_result = self.get_scenario_result_by_name(scenario_name)
- this_results = dict()
- for metric in scenario_result.metrics:
- this_results[metric.measurement_type] = ReportMeasurement(
- metric.points_this()
- )
- return this_results
- def get_scenario_version(self, scenario_name: str) -> ScenarioVersion:
- scenario_result = self.get_scenario_result_by_name(scenario_name)
- return scenario_result.get_scenario_version()
- def get_scenario_result_by_name(
- self, scenario_name: str
- ) -> BenchmarkScenarioResult:
- return self._result_by_scenario_name[scenario_name]
- def has_scenario_regression(self, scenario_name: str) -> bool:
- scenario_result = self.get_scenario_result_by_name(scenario_name)
- evaluator = RelativeThresholdEvaluator(scenario_result.scenario_class)
- for metric in scenario_result.metrics:
- if evaluator.is_regression(metric):
- return True
- return False
- def determine_scenario_classes_with_regressions(
- selected_report_by_scenario_name: dict[str, Report]
- ) -> list[type[Scenario]]:
- scenario_classes_with_regressions = set()
- for scenario_name, report in selected_report_by_scenario_name.items():
- if report.has_scenario_regression(scenario_name):
- scenario_result = report.get_scenario_result_by_name(scenario_name)
- scenario_classes_with_regressions.add(scenario_result.scenario_class)
- return list(scenario_classes_with_regressions)
|