report.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. from statistics import mean, variance
  10. from typing import Generic, TypeVar
  11. from materialize.feature_benchmark.benchmark_result import BenchmarkScenarioResult
  12. from materialize.feature_benchmark.benchmark_result_evaluator import (
  13. RelativeThresholdEvaluator,
  14. )
  15. from materialize.feature_benchmark.measurement import (
  16. MeasurementType,
  17. )
  18. from materialize.feature_benchmark.scenario import Scenario
  19. from materialize.feature_benchmark.scenario_version import ScenarioVersion
  20. T = TypeVar("T", bound=int | float)
  21. class ReportMeasurement(Generic[T]):
  22. result: T | None
  23. min: T | None
  24. max: T | None
  25. mean: T | None
  26. variance: float | None
  27. def __init__(self, points: list[T | None]):
  28. self.result = points[0]
  29. set_points = [point for point in points if point is not None]
  30. if self.result is not None and set_points:
  31. self.min = min(set_points)
  32. self.max = max(set_points)
  33. self.mean = mean(set_points)
  34. self.variance = variance(set_points) if len(set_points) > 1 else None
  35. else:
  36. self.min = None
  37. self.max = None
  38. self.mean = None
  39. self.variance = None
  40. class Report:
  41. def __init__(self, cycle_number: int) -> None:
  42. self.cycle_number = cycle_number
  43. """ 1-based cycle number. """
  44. self._result_by_scenario_name: dict[str, BenchmarkScenarioResult] = dict()
  45. def add_scenario_result(self, result: BenchmarkScenarioResult) -> None:
  46. assert (
  47. result.scenario_name not in self._result_by_scenario_name.keys()
  48. ), f"Result of scenario {result.scenario_name} already present"
  49. self._result_by_scenario_name[result.scenario_name] = result
  50. def get_scenario_names(self) -> list[str]:
  51. return list(self._result_by_scenario_name.keys())
  52. def as_string(self, use_colors: bool, limit_to_scenario: str | None = None) -> str:
  53. output_lines = []
  54. output_lines.append(
  55. f"{'NAME':<35} | {'TYPE':<15} | {'THIS':^15} | {'OTHER':^15} | {'UNIT':^6} | {'THRESHOLD':^10} | {'Regression?':^13} | 'THIS' is"
  56. )
  57. output_lines.append("-" * 152)
  58. for scenario_result in self._result_by_scenario_name.values():
  59. evaluator = RelativeThresholdEvaluator(scenario_result.scenario_class)
  60. for metric in scenario_result.metrics:
  61. if not metric.has_values():
  62. continue
  63. if (
  64. limit_to_scenario is not None
  65. and scenario_result.scenario_name != limit_to_scenario
  66. ):
  67. continue
  68. regression = "!!YES!!" if evaluator.is_regression(metric) else "no"
  69. threshold = f"{(evaluator.get_threshold(metric) * 100):.0f}%"
  70. output_lines.append(
  71. f"{scenario_result.scenario_name:<35} | {metric.measurement_type:<15} | {metric.this_as_str():>15} | {metric.other_as_str():>15} | {metric.unit():^6} | {threshold:^10} | {regression:^13} | {evaluator.human_readable(metric, use_colors)}"
  72. )
  73. return "\n".join(output_lines)
  74. def __str__(self) -> str:
  75. return self.as_string(use_colors=False)
  76. def measurements_of_this(
  77. self, scenario_name: str
  78. ) -> dict[MeasurementType, ReportMeasurement]:
  79. scenario_result = self.get_scenario_result_by_name(scenario_name)
  80. this_results = dict()
  81. for metric in scenario_result.metrics:
  82. this_results[metric.measurement_type] = ReportMeasurement(
  83. metric.points_this()
  84. )
  85. return this_results
  86. def get_scenario_version(self, scenario_name: str) -> ScenarioVersion:
  87. scenario_result = self.get_scenario_result_by_name(scenario_name)
  88. return scenario_result.get_scenario_version()
  89. def get_scenario_result_by_name(
  90. self, scenario_name: str
  91. ) -> BenchmarkScenarioResult:
  92. return self._result_by_scenario_name[scenario_name]
  93. def has_scenario_regression(self, scenario_name: str) -> bool:
  94. scenario_result = self.get_scenario_result_by_name(scenario_name)
  95. evaluator = RelativeThresholdEvaluator(scenario_result.scenario_class)
  96. for metric in scenario_result.metrics:
  97. if evaluator.is_regression(metric):
  98. return True
  99. return False
  100. def determine_scenario_classes_with_regressions(
  101. selected_report_by_scenario_name: dict[str, Report]
  102. ) -> list[type[Scenario]]:
  103. scenario_classes_with_regressions = set()
  104. for scenario_name, report in selected_report_by_scenario_name.items():
  105. if report.has_scenario_regression(scenario_name):
  106. scenario_result = report.get_scenario_result_by_name(scenario_name)
  107. scenario_classes_with_regressions.add(scenario_result.scenario_class)
  108. return list(scenario_classes_with_regressions)