# Copyright Materialize, Inc. and contributors. All rights reserved. # # Use of this software is governed by the Business Source License # included in the LICENSE file at the root of this repository. # # As of the Change Date specified in that file, in accordance with # the Business Source License, use of this software will be governed # by the Apache License, Version 2.0. import argparse import os import re import subprocess from collections import OrderedDict from collections.abc import Callable import junit_xml from materialize import MZ_ROOT, buildkite, ci_util # - None value indicates that this line is interesting, but we don't know yet # if it can actually be covered. # - Positive values indicate that the line can be covered and how often is has # been covered in end-to-end tests. # - Negative values indicate that the line has only been covered in unit tests. Coverage = dict[str, OrderedDict[int, int | None]] SOURCE_RE = re.compile( r""" ( src/(.*$)" | bazel-out/.*/bin/(.*$) | external/(.*$) )""", re.VERBOSE, ) # * Deriving generates more code, but we don't expect to cover this in most # cases, so ignore such lines. # * Same for mz_ore::test # * The await keyword is not properly supported # (https://github.com/rust-lang/rust/issues/98712). IGNORE_SRC_LINE_RE = re.compile( r""" ( \#\[derive\(.*\)\] | \#\[mz_ore::test.*\] | \.await ) """, re.VERBOSE, ) IGNORE_FILE_PATH_RE = re.compile( r""" ( /maelstrom/ ) """, re.VERBOSE, ) def ignore_file_in_coverage_report(file_path: str) -> bool: if not file_path.endswith(".rs"): return True if IGNORE_FILE_PATH_RE.search(file_path): return True return False unittests_have_run = False def mark_covered_lines( lcov_file: str, coverage: Coverage, unittests: bool = False ) -> None: """ For a description of the lcov tracing file format, see the bottom of https://linux.die.net/man/1/geninfo """ global unittests_have_run if unittests: unittests_have_run = True else: assert ( not unittests_have_run ), "Call mark_covered_lines for unit tests last in order to get correct code coverage reports" # There will always be an SF line specifying a file before a DA line # according to the lcov tracing file format definition file = None for line in open(lcov_file): line = line.strip() if not line: continue if line == "end_of_record": continue method, content = tuple(line.strip().split(":", 1)) # SF:/var/lib/buildkite-agent/builds/buildkite-builders-d43b1b5-i-0193496e7aec9a4e3-1/materialize/coverage/src/transform/src/lib.rs if method == "SF": if content.startswith("src/"): # for unit tests file = content else: result = SOURCE_RE.search(content) assert result, f"Unexpected file {content}" file = result.group(1) # DA:111,15524 # DA:112,0 # DA:113,15901 elif method == "DA": assert file, "file was not set by a SF line" if file in coverage: line_str, hit_str = content.split(",", 1) line_nr = int(line_str) hit = int(hit_str) if hit_str.isnumeric() else int(float(hit_str)) if line_nr in coverage[file]: if unittests: if not coverage[file][line_nr]: coverage[file][line_nr] = ( coverage[file][line_nr] or 0 ) - hit else: coverage[file][line_nr] = (coverage[file][line_nr] or 0) + hit def get_report( coverage: Coverage, fn: Callable[[OrderedDict[int, int | None], int, str], bool] ) -> str: """ Remove uncovered lines in real files and print a git diff, then restore to original state. The fn function determines when to keep a line. Everything not kept will show up in the diff. """ try: # Remove lines which are not covered so they show up with "!" marker for file, lines in coverage.items(): with open(file, "r+") as f: content = f.readlines() f.seek(0) for i, line in enumerate(content): if fn(lines, i, line): f.write(line) f.truncate() result = subprocess.run( [ "git", "diff", # Spaces can be moved around, leading to confusing reports "--ignore-all-space", "--output-indicator-old=!", "HEAD", ], check=True, capture_output=True, ) return result.stdout.decode("utf-8").strip() finally: # Restore the code into its original state subprocess.run(["git", "reset", "--hard"], check=True) def main() -> None: parser = argparse.ArgumentParser( prog="ci-coverage-pr-report", formatter_class=argparse.RawDescriptionHelpFormatter, description=""" ci-coverage-pr-report creates a code coverage report for CI.""", ) parser.add_argument("--unittests", type=str, help="unit test lcov file") parser.add_argument("tests", nargs="+", help="all other lcov files from test runs") args = parser.parse_args() result = subprocess.run(["git", "diff"], check=True, capture_output=True) output = result.stdout.decode("utf-8").strip() assert not output, f"Has to run on clean git state: \n{output}" test_cases = [] coverage: Coverage = {} for file, line in buildkite.find_modified_lines(): if not ignore_file_in_coverage_report(file): coverage.setdefault(file, OrderedDict())[line] = None for lcov_file in args.tests: mark_covered_lines(lcov_file, coverage) if args.unittests: if os.path.isfile(args.unittests): mark_covered_lines(args.unittests, coverage, unittests=True) else: test_case = junit_xml.TestCase("Unit Tests", "Code Coverage") test_case.add_error_info(message="No coverage for unit tests available") test_cases.append(test_case) unit_test_only_report = get_report( coverage, lambda lines, i, line: bool( (lines.get(i + 1) or 0) >= 0 or IGNORE_SRC_LINE_RE.search(line) ), ) # If a line has "None" marker, then it can't be covered, print it out. # If a line has positive or negative coverage then it is # covered in normal tests or unit tests, print it out. # All remaining lines can be covered, but are not covered. uncovered_report = get_report( coverage, lambda lines, i, line: bool( lines.get(i + 1) is None or (lines.get(i + 1) or 0) != 0 or IGNORE_SRC_LINE_RE.search(line) ), ) test_case = junit_xml.TestCase("Uncovered Lines in PR", "Code Coverage") if len(uncovered_report): print("Uncovered Lines in PR") # Buildkite interprets the +++ and --- chars at the start of line, put # in a zero-width space as a workaround. ZWSP = "\u200B" print( uncovered_report.replace("\n+++", f"\n{ZWSP}+++").replace( "\n---", f"\n{ZWSP}---" ) ) test_case.add_error_info( message="The following changed lines are uncovered:", output=uncovered_report, ) else: test_case.add_error_info(message="All changed lines are covered.") test_cases.append(test_case) test_case = junit_xml.TestCase( "Lines Covered only in Unit Tests in PR", "Code Coverage" ) if len(unit_test_only_report): print("Lines Covered only in Unit Tests in PR") # Buildkite interprets the +++ and --- chars at the start of line, put # in a zero-width space as a workaround. print( unit_test_only_report.replace("\n+++", "\n\u200B+++").replace( "\n---", "\n\u200B---" ) ) test_case.add_error_info( message="The following changed lines are covered only in unit tests:", output=unit_test_only_report, ) else: test_case.add_error_info( message="All changed, covered lines are covered outside of unit tests." ) test_cases.append(test_case) junit_suite = junit_xml.TestSuite("Code Coverage", test_cases) junit_report = MZ_ROOT / ci_util.junit_report_filename("coverage") with junit_report.open("w") as f: junit_xml.to_xml_report_file(f, [junit_suite]) if __name__ == "__main__": main()