lib.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. import os
  10. import pandas as pd
  11. from matplotlib import pyplot as plt
  12. from materialize.scalability.df.df_details import DfDetails
  13. from materialize.scalability.df.df_totals import DfTotals
  14. from materialize.scalability.io import paths
  15. from materialize.scalability.plot.plot import (
  16. plot_duration_by_connections_for_workload,
  17. plot_duration_by_endpoints_for_workload,
  18. plot_tps_per_connections,
  19. )
  20. def plotit(workload_name: str, include_zero_in_y_axis: bool = True) -> None:
  21. fig = plt.figure(layout="constrained", figsize=(16, 22))
  22. (
  23. tps_figure,
  24. duration_per_connections_figure,
  25. duration_per_endpoints_figure,
  26. ) = fig.subfigures(3, 1)
  27. df_totals_by_endpoint_name, df_details_by_endpoint_name = load_data_from_filesystem(
  28. workload_name
  29. )
  30. plot_tps_per_connections(
  31. workload_name,
  32. tps_figure,
  33. df_totals_by_endpoint_name,
  34. baseline_version_name=None,
  35. include_zero_in_y_axis=include_zero_in_y_axis,
  36. )
  37. plot_duration_by_connections_for_workload(
  38. workload_name,
  39. duration_per_connections_figure,
  40. df_details_by_endpoint_name,
  41. include_zero_in_y_axis=include_zero_in_y_axis,
  42. )
  43. plot_duration_by_endpoints_for_workload(
  44. workload_name,
  45. duration_per_endpoints_figure,
  46. df_details_by_endpoint_name,
  47. include_zero_in_y_axis=include_zero_in_y_axis,
  48. )
  49. def load_data_from_filesystem(
  50. workload_name: str,
  51. ) -> tuple[dict[str, DfTotals], dict[str, DfDetails]]:
  52. endpoint_names = paths.get_endpoint_names_from_results_dir()
  53. endpoint_names.sort()
  54. df_totals_by_endpoint_name = dict()
  55. df_details_by_endpoint_name = dict()
  56. for i, endpoint_name in enumerate(endpoint_names):
  57. totals_data_path = paths.df_totals_csv(endpoint_name, workload_name)
  58. details_data_path = paths.df_details_csv(endpoint_name, workload_name)
  59. if not os.path.exists(totals_data_path):
  60. print(
  61. f"Skipping {workload_name} for endpoint {endpoint_name} (data not present)"
  62. )
  63. continue
  64. assert os.path.exists(details_data_path)
  65. df_totals_by_endpoint_name[endpoint_name] = DfTotals(
  66. pd.read_csv(totals_data_path)
  67. )
  68. df_details_by_endpoint_name[endpoint_name] = DfDetails(
  69. pd.read_csv(details_data_path)
  70. )
  71. return df_totals_by_endpoint_name, df_details_by_endpoint_name