analyze.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. """
  10. Utilities to analyze data extracted from a Materialize catalog.
  11. """
  12. from pathlib import Path
  13. from textwrap import dedent
  14. from typing import TextIO
  15. from materialize.mzexplore import sql
  16. from materialize.mzexplore.common import explain_diff, explain_file, info, warn
  17. def changes(
  18. out: TextIO,
  19. target: Path,
  20. header_name: str,
  21. base_suffix: str,
  22. diff_suffix: str,
  23. ) -> None:
  24. """
  25. Append sections to an `*.md` file with items corresponding to changes pairs
  26. of optimized plans for the same catalog item extracted with the given `base`
  27. and `diff` suffix.
  28. """
  29. # Ensure that the target dir exists
  30. if not target.is_dir():
  31. warn(f"Target path `{target}` is not a folder")
  32. return
  33. info(f"Comparing `{base_suffix}` vs `{diff_suffix}` " f"plans in {target}")
  34. info("Comparing optimized plans")
  35. out.write(
  36. dedent(
  37. f"""
  38. ## {header_name}
  39. """
  40. ).lstrip("\n")
  41. )
  42. for base_path in target.glob(f"**/*.optimized_plan.{base_suffix}.txt"):
  43. base = explain_file(base_path)
  44. if base is None:
  45. warn(f"File {base_path} is not recognized as an ExplainFile")
  46. continue
  47. diff = explain_diff(base=base, diff_suffix=diff_suffix)
  48. if not (target / diff.path()).is_file():
  49. warn(f"Cannot find diff file {diff.file_name()} for {base}")
  50. continue
  51. item_type = base.item_type
  52. database = sql.identifier(base.database)
  53. schema = sql.identifier(base.schema)
  54. name = sql.identifier(base.name)
  55. base_data = (target / base.path()).read_text(encoding="utf8")
  56. diff_data = (target / diff.path()).read_text(encoding="utf8")
  57. if base_data != diff_data:
  58. info(f"Found diff at {item_type.sql()} `{database}.{schema}.{name}`")
  59. out.write(
  60. dedent(
  61. f"""
  62. - TODO(REGRESSION|IMPROVEMENT) in {item_type.sql()} `{database}.{schema}.{name}`
  63. ```bash
  64. code --diff \\
  65. {target / base.path()} \\
  66. {target / diff.path()}
  67. ```
  68. """
  69. ).lstrip("\n")
  70. )