12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- # Copyright Materialize, Inc. and contributors. All rights reserved.
- #
- # Use of this software is governed by the Business Source License
- # included in the LICENSE file at the root of this repository.
- #
- # As of the Change Date specified in that file, in accordance with
- # the Business Source License, use of this software will be governed
- # by the Apache License, Version 2.0.
- """Calculate the `namescore`---the perctange of column references with
- name information---of an `EXPLAIN PLAN`. By default, runs on all SLT
- files in $MZ_ROOT/test/sqllogictest."""
- import argparse
- import os
- import re
- from materialize import MZ_ROOT
- SLT_ROOT = MZ_ROOT / "test" / "sqllogictest"
- COLUMN_REF_RE = re.compile(
- r"""
- \#[0-9]+({[^}]+})?
- """,
- re.VERBOSE,
- )
- def find_slt_files() -> list[str]:
- """Find all .slt files in $MZ_ROOT/test/sqllogictest directory"""
- slt_files = []
- for root, _dirs, files in os.walk(SLT_ROOT):
- for file in files:
- if file.endswith(".slt"):
- slt_files.append(os.path.join(root, file))
- return slt_files
- def namescore(filename: str) -> tuple[int, int]:
- """Calculate the namescore of a file"""
- named_refs = 0
- refs = 0
- with open(filename) as f:
- content = f.read()
- for match in COLUMN_REF_RE.finditer(content):
- refs += 1
- if match.group(1):
- named_refs += 1
- return (named_refs, refs)
- def main() -> None:
- parser = argparse.ArgumentParser(
- prog="namescore",
- formatter_class=argparse.RawDescriptionHelpFormatter,
- description="""
- calculates the `namescore` (percentage of column references with names)
- of given files (or all SLT files in $MZ_ROOT/test/sqllogictest by default)""",
- )
- parser.add_argument(
- "tests",
- nargs="*",
- help="explicit files to run on [default: all SLT files in $MZ_ROOT/test/sqllogictest]",
- )
- args = parser.parse_args()
- tests = args.tests or find_slt_files()
- named_refs = 0
- refs = 0
- nonames = 0
- total = len(tests)
- for test in tests:
- nr, r = namescore(test)
- if r == 0:
- assert nr == 0
- nonames += 1
- continue
- print(
- f"{test.removeprefix(str(SLT_ROOT) + os.sep)}: {nr / r * 100:.2f}% ({nr} / {r})"
- )
- named_refs += nr
- refs += r
- print(
- f"\nOverall namescore: {named_refs / refs * 100:.2f}% ({named_refs} / {refs}); {nonames} files with no column references / {total} total files"
- )
- if __name__ == "__main__":
- main()
|