optbench.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. import csv
  10. import re
  11. import tempfile
  12. from contextlib import closing
  13. from pathlib import Path
  14. from typing import cast
  15. import click
  16. import numpy as np
  17. import pandas as pd
  18. from ..optbench import Scenario, scenarios, sql, util
  19. # import logging
  20. # logging.basicConfig(encoding='utf-8', level=logging.DEBUG)
  21. # Typer CLI Application
  22. # ---------------------
  23. @click.group()
  24. def app() -> None:
  25. pass
  26. class Arg:
  27. scenario = dict(
  28. type=click.Choice(scenarios()),
  29. callback=lambda ctx, param, value: Scenario(value),
  30. )
  31. base = dict(
  32. type=click.Path(
  33. exists=True,
  34. file_okay=True,
  35. dir_okay=False,
  36. writable=False,
  37. readable=True,
  38. resolve_path=True,
  39. ),
  40. callback=lambda ctx, param, value: Path(value),
  41. )
  42. diff = dict(
  43. type=click.Path(
  44. exists=True,
  45. file_okay=True,
  46. dir_okay=False,
  47. writable=False,
  48. readable=True,
  49. resolve_path=True,
  50. ),
  51. callback=lambda ctx, param, value: Path(value),
  52. )
  53. class Opt:
  54. repository = dict(
  55. default=Path(tempfile.gettempdir()),
  56. type=click.Path(
  57. exists=True,
  58. file_okay=False,
  59. dir_okay=True,
  60. writable=True,
  61. readable=True,
  62. resolve_path=True,
  63. ),
  64. help="Experiment results folder.",
  65. callback=lambda ctx, param, value: Path(value),
  66. )
  67. samples = dict(default=11, help="Samples per query.")
  68. print_results = dict(default=False, help="Print the experiment results.")
  69. no_indexes = dict(default=False, help="Skip CREATE [DEFAULT]/DROP INDEX DDL.")
  70. db_port = dict(default=6875, help="DB connection port.", envvar="PGPORT")
  71. db_host = dict(default="localhost", help="DB connection host.", envvar="PGHOST")
  72. db_user = dict(default="materialize", help="DB connection user.", envvar="PGUSER")
  73. db_pass = dict(default=None, help="DB connection password.", envvar="PGPASSWORD")
  74. db_require_ssl = dict(
  75. is_flag=True,
  76. help="DB connection requires SSL.",
  77. envvar="PGREQUIRESSL",
  78. )
  79. @app.command()
  80. @click.argument("scenario", **Arg.scenario)
  81. @click.option("--no-indexes", **Opt.no_indexes)
  82. @click.option("--db-port", **Opt.db_port)
  83. @click.option("--db-host", **Opt.db_host)
  84. @click.option("--db-user", **Opt.db_user)
  85. @click.option("--db-pass", **Opt.db_pass)
  86. @click.option("--db-require-ssl", **Opt.db_require_ssl)
  87. def init(
  88. scenario: Scenario,
  89. no_indexes: bool,
  90. db_port: int,
  91. db_host: str,
  92. db_user: str,
  93. db_pass: str | None,
  94. db_require_ssl: bool,
  95. ) -> None:
  96. """Initialize the DB under test for the given scenario."""
  97. info(f'Initializing "{scenario}" as the DB under test')
  98. try:
  99. # connect to the default database in order to re-create the
  100. # database for the selected scenario
  101. with closing(
  102. sql.Database(
  103. port=db_port,
  104. host=db_host,
  105. user=db_user,
  106. database=None,
  107. password=db_pass,
  108. require_ssl=db_require_ssl,
  109. )
  110. ) as db:
  111. db.drop_database(scenario)
  112. db.create_database(scenario)
  113. # re-connect to the database for the selected scenario
  114. with closing(
  115. sql.Database(
  116. port=db_port,
  117. host=db_host,
  118. user=db_user,
  119. database=str(scenario),
  120. password=db_pass,
  121. require_ssl=db_require_ssl,
  122. )
  123. ) as db:
  124. statements = sql.parse_from_file(scenario.schema_path())
  125. if no_indexes:
  126. idx_re = re.compile(r"(create|create\s+default|drop)\s+index\s+")
  127. statements = [
  128. statement
  129. for statement in statements
  130. if not idx_re.match(statement.lower())
  131. ]
  132. db.execute_all(statements)
  133. except Exception as e:
  134. raise click.ClickException(f"init command failed: {e}")
  135. @app.command()
  136. @click.argument("scenario", **Arg.scenario)
  137. @click.option("--samples", **Opt.samples)
  138. @click.option("--repository", **Opt.repository)
  139. @click.option("--print-results", **Opt.print_results)
  140. @click.option("--db-port", **Opt.db_port)
  141. @click.option("--db-host", **Opt.db_host)
  142. @click.option("--db-user", **Opt.db_user)
  143. @click.option("--db-pass", **Opt.db_pass)
  144. @click.option("--db-require-ssl", **Opt.db_require_ssl)
  145. def run(
  146. scenario: Scenario,
  147. samples: int,
  148. repository: Path,
  149. print_results: bool,
  150. db_port: int,
  151. db_host: str,
  152. db_user: str,
  153. db_pass: str | None,
  154. db_require_ssl: bool,
  155. ) -> None:
  156. """Run benchmark in the DB under test for a given scenario."""
  157. info(f'Running "{scenario}" scenario')
  158. try:
  159. with closing(
  160. sql.Database(
  161. port=db_port,
  162. host=db_host,
  163. user=db_user,
  164. database=str(scenario),
  165. password=db_pass,
  166. require_ssl=db_require_ssl,
  167. )
  168. ) as db:
  169. db_version = db.mz_version() or db.version()
  170. df = pd.DataFrame.from_records(
  171. [
  172. (
  173. query.name(),
  174. sample,
  175. cast(
  176. np.timedelta64,
  177. db.explain(query, timing=True).optimization_time(),
  178. ).astype(int),
  179. )
  180. for sample in range(samples)
  181. for query in [
  182. sql.Query(query)
  183. for query in sql.parse_from_file(scenario.workload_path())
  184. ]
  185. ],
  186. columns=["query", "sample", "data"],
  187. ).pivot(index="sample", columns="query", values="data")
  188. if print_results:
  189. print(df.to_string())
  190. results_path = util.results_path(repository, scenario, db_version)
  191. info(f'Writing results to "{results_path}"')
  192. df.to_csv(results_path, index=False, quoting=csv.QUOTE_MINIMAL)
  193. except Exception as e:
  194. raise click.ClickException(f"run command failed: {e}")
  195. @app.command()
  196. @click.argument("base", **Arg.base)
  197. @click.argument("diff", **Arg.diff)
  198. def compare(
  199. base: Path,
  200. diff: Path,
  201. ) -> None:
  202. """Compare the results of a base and diff benchmark runs."""
  203. info(f'Compare experiment results between "{base}" and "{diff}"')
  204. try:
  205. base_df = pd.read_csv(base, quoting=csv.QUOTE_MINIMAL).agg(
  206. ["min", "median", "max"]
  207. )
  208. diff_df = pd.read_csv(diff, quoting=csv.QUOTE_MINIMAL).agg(
  209. ["min", "median", "max"]
  210. )
  211. # compute diff/base quotient for all (metric, query) pairs
  212. quot_df = diff_df / base_df
  213. # append average quotient across all queries for each metric
  214. quot_df.insert(0, "Avg", quot_df.mean(axis=1))
  215. # TODO: use styler to color-code the cells
  216. print("base times")
  217. print("----------")
  218. print(base_df.to_string())
  219. print("")
  220. print("diff times")
  221. print("----------")
  222. print(diff_df.to_string())
  223. print("")
  224. print("diff/base ratio")
  225. print("---------------")
  226. print(quot_df.to_string())
  227. except Exception as e:
  228. raise click.ClickException(f"compare command failed: {e}")
  229. # Utility methods
  230. # ---------------
  231. def print_df(df: pd.DataFrame) -> None:
  232. with pd.option_context("display.max_rows", None, "display.max_columns", None):
  233. print(df)
  234. def info(msg: str, fg: str = "green") -> None:
  235. click.secho(msg, fg=fg)
  236. def err(msg: str, fg: str = "red") -> None:
  237. click.secho(msg, fg=fg, err=True)
  238. if __name__ == "__main__":
  239. app()