ci_coverage_pr_report.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. import argparse
  10. import os
  11. import re
  12. import subprocess
  13. from collections import OrderedDict
  14. from collections.abc import Callable
  15. import junit_xml
  16. from materialize import MZ_ROOT, buildkite, ci_util
  17. # - None value indicates that this line is interesting, but we don't know yet
  18. # if it can actually be covered.
  19. # - Positive values indicate that the line can be covered and how often is has
  20. # been covered in end-to-end tests.
  21. # - Negative values indicate that the line has only been covered in unit tests.
  22. Coverage = dict[str, OrderedDict[int, int | None]]
  23. SOURCE_RE = re.compile(
  24. r"""
  25. ( src/(.*$)"
  26. | bazel-out/.*/bin/(.*$)
  27. | external/(.*$)
  28. )""",
  29. re.VERBOSE,
  30. )
  31. # * Deriving generates more code, but we don't expect to cover this in most
  32. # cases, so ignore such lines.
  33. # * Same for mz_ore::test
  34. # * The await keyword is not properly supported
  35. # (https://github.com/rust-lang/rust/issues/98712).
  36. IGNORE_SRC_LINE_RE = re.compile(
  37. r"""
  38. ( \#\[derive\(.*\)\]
  39. | \#\[mz_ore::test.*\]
  40. | \.await
  41. )
  42. """,
  43. re.VERBOSE,
  44. )
  45. IGNORE_FILE_PATH_RE = re.compile(
  46. r"""
  47. ( /maelstrom/
  48. )
  49. """,
  50. re.VERBOSE,
  51. )
  52. def ignore_file_in_coverage_report(file_path: str) -> bool:
  53. if not file_path.endswith(".rs"):
  54. return True
  55. if IGNORE_FILE_PATH_RE.search(file_path):
  56. return True
  57. return False
  58. unittests_have_run = False
  59. def mark_covered_lines(
  60. lcov_file: str, coverage: Coverage, unittests: bool = False
  61. ) -> None:
  62. """
  63. For a description of the lcov tracing file format, see the bottom of
  64. https://linux.die.net/man/1/geninfo
  65. """
  66. global unittests_have_run
  67. if unittests:
  68. unittests_have_run = True
  69. else:
  70. assert (
  71. not unittests_have_run
  72. ), "Call mark_covered_lines for unit tests last in order to get correct code coverage reports"
  73. # There will always be an SF line specifying a file before a DA line
  74. # according to the lcov tracing file format definition
  75. file = None
  76. for line in open(lcov_file):
  77. line = line.strip()
  78. if not line:
  79. continue
  80. if line == "end_of_record":
  81. continue
  82. method, content = tuple(line.strip().split(":", 1))
  83. # SF:/var/lib/buildkite-agent/builds/buildkite-builders-d43b1b5-i-0193496e7aec9a4e3-1/materialize/coverage/src/transform/src/lib.rs
  84. if method == "SF":
  85. if content.startswith("src/"): # for unit tests
  86. file = content
  87. else:
  88. result = SOURCE_RE.search(content)
  89. assert result, f"Unexpected file {content}"
  90. file = result.group(1)
  91. # DA:111,15524
  92. # DA:112,0
  93. # DA:113,15901
  94. elif method == "DA":
  95. assert file, "file was not set by a SF line"
  96. if file in coverage:
  97. line_str, hit_str = content.split(",", 1)
  98. line_nr = int(line_str)
  99. hit = int(hit_str) if hit_str.isnumeric() else int(float(hit_str))
  100. if line_nr in coverage[file]:
  101. if unittests:
  102. if not coverage[file][line_nr]:
  103. coverage[file][line_nr] = (
  104. coverage[file][line_nr] or 0
  105. ) - hit
  106. else:
  107. coverage[file][line_nr] = (coverage[file][line_nr] or 0) + hit
  108. def get_report(
  109. coverage: Coverage, fn: Callable[[OrderedDict[int, int | None], int, str], bool]
  110. ) -> str:
  111. """
  112. Remove uncovered lines in real files and print a git diff, then restore to
  113. original state.
  114. The fn function determines when to keep a line. Everything not kept will
  115. show up in the diff.
  116. """
  117. try:
  118. # Remove lines which are not covered so they show up with "!" marker
  119. for file, lines in coverage.items():
  120. with open(file, "r+") as f:
  121. content = f.readlines()
  122. f.seek(0)
  123. for i, line in enumerate(content):
  124. if fn(lines, i, line):
  125. f.write(line)
  126. f.truncate()
  127. result = subprocess.run(
  128. [
  129. "git",
  130. "diff",
  131. # Spaces can be moved around, leading to confusing reports
  132. "--ignore-all-space",
  133. "--output-indicator-old=!",
  134. "HEAD",
  135. ],
  136. check=True,
  137. capture_output=True,
  138. )
  139. return result.stdout.decode("utf-8").strip()
  140. finally:
  141. # Restore the code into its original state
  142. subprocess.run(["git", "reset", "--hard"], check=True)
  143. def main() -> None:
  144. parser = argparse.ArgumentParser(
  145. prog="ci-coverage-pr-report",
  146. formatter_class=argparse.RawDescriptionHelpFormatter,
  147. description="""
  148. ci-coverage-pr-report creates a code coverage report for CI.""",
  149. )
  150. parser.add_argument("--unittests", type=str, help="unit test lcov file")
  151. parser.add_argument("tests", nargs="+", help="all other lcov files from test runs")
  152. args = parser.parse_args()
  153. result = subprocess.run(["git", "diff"], check=True, capture_output=True)
  154. output = result.stdout.decode("utf-8").strip()
  155. assert not output, f"Has to run on clean git state: \n{output}"
  156. test_cases = []
  157. coverage: Coverage = {}
  158. for file, line in buildkite.find_modified_lines():
  159. if not ignore_file_in_coverage_report(file):
  160. coverage.setdefault(file, OrderedDict())[line] = None
  161. for lcov_file in args.tests:
  162. mark_covered_lines(lcov_file, coverage)
  163. if args.unittests:
  164. if os.path.isfile(args.unittests):
  165. mark_covered_lines(args.unittests, coverage, unittests=True)
  166. else:
  167. test_case = junit_xml.TestCase("Unit Tests", "Code Coverage")
  168. test_case.add_error_info(message="No coverage for unit tests available")
  169. test_cases.append(test_case)
  170. unit_test_only_report = get_report(
  171. coverage,
  172. lambda lines, i, line: bool(
  173. (lines.get(i + 1) or 0) >= 0 or IGNORE_SRC_LINE_RE.search(line)
  174. ),
  175. )
  176. # If a line has "None" marker, then it can't be covered, print it out.
  177. # If a line has positive or negative coverage then it is
  178. # covered in normal tests or unit tests, print it out.
  179. # All remaining lines can be covered, but are not covered.
  180. uncovered_report = get_report(
  181. coverage,
  182. lambda lines, i, line: bool(
  183. lines.get(i + 1) is None
  184. or (lines.get(i + 1) or 0) != 0
  185. or IGNORE_SRC_LINE_RE.search(line)
  186. ),
  187. )
  188. test_case = junit_xml.TestCase("Uncovered Lines in PR", "Code Coverage")
  189. if len(uncovered_report):
  190. print("Uncovered Lines in PR")
  191. # Buildkite interprets the +++ and --- chars at the start of line, put
  192. # in a zero-width space as a workaround.
  193. ZWSP = "\u200B"
  194. print(
  195. uncovered_report.replace("\n+++", f"\n{ZWSP}+++").replace(
  196. "\n---", f"\n{ZWSP}---"
  197. )
  198. )
  199. test_case.add_error_info(
  200. message="The following changed lines are uncovered:",
  201. output=uncovered_report,
  202. )
  203. else:
  204. test_case.add_error_info(message="All changed lines are covered.")
  205. test_cases.append(test_case)
  206. test_case = junit_xml.TestCase(
  207. "Lines Covered only in Unit Tests in PR", "Code Coverage"
  208. )
  209. if len(unit_test_only_report):
  210. print("Lines Covered only in Unit Tests in PR")
  211. # Buildkite interprets the +++ and --- chars at the start of line, put
  212. # in a zero-width space as a workaround.
  213. print(
  214. unit_test_only_report.replace("\n+++", "\n\u200B+++").replace(
  215. "\n---", "\n\u200B---"
  216. )
  217. )
  218. test_case.add_error_info(
  219. message="The following changed lines are covered only in unit tests:",
  220. output=unit_test_only_report,
  221. )
  222. else:
  223. test_case.add_error_info(
  224. message="All changed, covered lines are covered outside of unit tests."
  225. )
  226. test_cases.append(test_case)
  227. junit_suite = junit_xml.TestSuite("Code Coverage", test_cases)
  228. junit_report = MZ_ROOT / ci_util.junit_report_filename("coverage")
  229. with junit_report.open("w") as f:
  230. junit_xml.to_xml_report_file(f, [junit_suite])
  231. if __name__ == "__main__":
  232. main()