namescore.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. """Calculate the `namescore`---the perctange of column references with
  10. name information---of an `EXPLAIN PLAN`. By default, runs on all SLT
  11. files in $MZ_ROOT/test/sqllogictest."""
  12. import argparse
  13. import os
  14. import re
  15. from materialize import MZ_ROOT
  16. SLT_ROOT = MZ_ROOT / "test" / "sqllogictest"
  17. COLUMN_REF_RE = re.compile(
  18. r"""
  19. \#[0-9]+({[^}]+})?
  20. """,
  21. re.VERBOSE,
  22. )
  23. def find_slt_files() -> list[str]:
  24. """Find all .slt files in $MZ_ROOT/test/sqllogictest directory"""
  25. slt_files = []
  26. for root, _dirs, files in os.walk(SLT_ROOT):
  27. for file in files:
  28. if file.endswith(".slt"):
  29. slt_files.append(os.path.join(root, file))
  30. return slt_files
  31. def namescore(filename: str) -> tuple[int, int]:
  32. """Calculate the namescore of a file"""
  33. named_refs = 0
  34. refs = 0
  35. with open(filename) as f:
  36. content = f.read()
  37. for match in COLUMN_REF_RE.finditer(content):
  38. refs += 1
  39. if match.group(1):
  40. named_refs += 1
  41. return (named_refs, refs)
  42. def main() -> None:
  43. parser = argparse.ArgumentParser(
  44. prog="namescore",
  45. formatter_class=argparse.RawDescriptionHelpFormatter,
  46. description="""
  47. calculates the `namescore` (percentage of column references with names)
  48. of given files (or all SLT files in $MZ_ROOT/test/sqllogictest by default)""",
  49. )
  50. parser.add_argument(
  51. "tests",
  52. nargs="*",
  53. help="explicit files to run on [default: all SLT files in $MZ_ROOT/test/sqllogictest]",
  54. )
  55. args = parser.parse_args()
  56. tests = args.tests or find_slt_files()
  57. named_refs = 0
  58. refs = 0
  59. nonames = 0
  60. total = len(tests)
  61. for test in tests:
  62. nr, r = namescore(test)
  63. if r == 0:
  64. assert nr == 0
  65. nonames += 1
  66. continue
  67. print(
  68. f"{test.removeprefix(str(SLT_ROOT) + os.sep)}: {nr / r * 100:.2f}% ({nr} / {r})"
  69. )
  70. named_refs += nr
  71. refs += r
  72. print(
  73. f"\nOverall namescore: {named_refs / refs * 100:.2f}% ({named_refs} / {refs}); {nonames} files with no column references / {total} total files"
  74. )
  75. if __name__ == "__main__":
  76. main()