123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339 |
- # Copyright Materialize, Inc. and contributors. All rights reserved.
- #
- # Use of this software is governed by the Business Source License
- # included in the LICENSE file at the root of this repository.
- #
- # As of the Change Date specified in that file, in accordance with
- # the Business Source License, use of this software will be governed
- # by the Apache License, Version 2.0.
- #
- # ci_closed_issues_detect.py - Detect references to already closed issues.
- import argparse
- import os
- import re
- import sys
- from collections.abc import Iterator
- from dataclasses import dataclass
- from typing import IO
- import requests
- from materialize import buildkite, spawn
- ISSUE_RE = re.compile(
- r"""
- ( TimelyDataflow/timely-dataflow\#(?P<timelydataflow>[0-9]+)
- | ( materialize\# | materialize/issues/ ) (?P<materialize>[0-9]+)
- | ( cloud\# | cloud/issues/ ) (?P<cloud>[0-9]+)
- | ( incidents-and-escalations\# | incidents-and-escalations/issues/ ) (?P<incidentsandescalations>[0-9]+)
- | ( database-issues\# | database-issues/issues/ ) (?P<databaseissues>[0-9]+)
- # only match from the beginning of the line or after a space character to avoid matching Buildkite URLs
- | (^|\s) \# (?P<ambiguous>[0-9]+)
- )
- """,
- re.VERBOSE,
- )
- GROUP_REPO = {
- "timelydataflow": "TimelyDataflow/timely-dataflow",
- "materialize": "MaterializeInc/materialize",
- "cloud": "MaterializeInc/cloud",
- "incidentsandescalations": "MaterializeInc/incidents-and-escalations",
- "databaseissues": "MaterializeInc/database-issues",
- "ambiguous": None,
- }
- REFERENCE_RE = re.compile(
- r"""
- ( reenable
- | re-enable
- | reconsider
- | TODO
- # Used in Buildkite pipeline config files
- | skip:
- # Used in platform-checks
- | @disabled
- # Used in pytest
- | @pytest.mark.skip
- # Used in output-consistency framework
- | YesIgnore
- | tracked\ with
- # Used in proto files
- | //\ buf\ breaking:\ ignore
- # Used in documentation
- | in\ the\ future
- )
- """,
- re.VERBOSE | re.IGNORECASE,
- )
- IGNORE_RE = re.compile(
- r"""
- ( discussion\ of\ this\ in
- | discussed\ in
- | [sS]ee\ \<
- # is_null_propagation.slt
- | isnull\(\#0\)
- # src/transform/tests/test_transforms/column_knowledge.spec
- | \(\#1\)\ IS\ NULL
- # test/sqllogictest/cockroach/*.slt
- | cockroach\#
- | Liquibase
- # ci/test/lint-buf/README.md
- | Ignore\ because\ of\ database-issues#99999
- # src/storage-client/src/controller.rs
- | issues/20211\>
- # src/sql/src/plan/statement.rs
- | issues/20019\>
- # src/storage/src/storage_state.rs
- | \#19907$
- )
- """,
- re.VERBOSE | re.IGNORECASE,
- )
- COMMENT_RE = re.compile(r"#|//")
- IGNORE_FILENAME_RE = re.compile(
- r"""
- ( .*\.(svg|png|jpg|jpeg|avro|ico)
- | doc/developer/design/20230223_stabilize_with_mutually_recursive.md
- )
- """,
- re.VERBOSE,
- )
- FILENAME_REFERENCE_RE = re.compile(r".*\.(td|slt|test)\.gh(?P<ambiguous>[0-9]+)")
- @dataclass
- class IssueRef:
- repository: str | None
- issue_id: int
- filename: str
- line_number: int
- text: str | None
- @dataclass
- class CommentBlock:
- char: str
- pos: int
- text: str
- line_number: int
- def comment_blocks(file: IO) -> Iterator[tuple[int, str]]:
- comment: CommentBlock | None = None
- for line_number, line in enumerate(file):
- if comment_match := COMMENT_RE.search(line):
- char = comment_match.group(0)
- pos = comment_match.span()[0]
- if comment is None:
- comment = CommentBlock(char, pos, line, line_number + 1)
- continue
- if char == comment.char and pos == comment.pos:
- comment.text += line
- continue
- yield (comment.line_number, comment.text)
- comment = CommentBlock(char, pos, line, line_number + 1)
- continue
- if comment is not None:
- yield (comment.line_number, comment.text)
- comment = None
- yield (line_number + 1, line)
- if comment is not None:
- yield (comment.line_number, comment.text)
- def detect_referenced_issues(filename: str) -> list[IssueRef]:
- issue_refs: list[IssueRef] = []
- with open(filename) as file:
- for line_number, text in comment_blocks(file):
- if not REFERENCE_RE.search(text) or IGNORE_RE.search(text):
- continue
- offset = 0
- while issue_match := ISSUE_RE.search(text, offset):
- offset = issue_match.span()[1]
- groups = [
- (key, value)
- for key, value in issue_match.groupdict().items()
- if value
- ]
- assert len(groups) == 1, f"Expected only 1 element in {groups}"
- group, issue_id = groups[0]
- is_referenced_with_url = "issues/" in issue_match.group(0)
- # Explain plans can look like issue references
- if (
- group == "ambiguous"
- and int(issue_id) < 100
- and not is_referenced_with_url
- ):
- continue
- issue_refs.append(
- IssueRef(
- GROUP_REPO[group],
- int(issue_id),
- filename,
- line_number,
- text.strip(),
- )
- )
- return issue_refs
- def is_issue_closed_on_github(repository: str | None, issue_id: int) -> bool:
- assert repository
- headers = {
- "Accept": "application/vnd.github+json",
- "X-GitHub-Api-Version": "2022-11-28",
- }
- if token := os.getenv("GITHUB_CI_ISSUE_REFERENCE_CHECKER_TOKEN") or os.getenv(
- "GITHUB_TOKEN"
- ):
- headers["Authorization"] = f"Bearer {token}"
- url = f"https://api.github.com/repos/{repository}/issues/{issue_id}"
- response = requests.get(url, headers=headers)
- if response.status_code == 404 and not os.getenv("CI"):
- print(
- f"Can't check issue #{issue_id} in {repository} repo, set GITHUB_TOKEN environment variable or run this check in CI"
- )
- return False
- if response.status_code != 200:
- raise ValueError(
- f"Bad return code from GitHub on {url}: {response.status_code}: {response.text}"
- )
- issue_json = response.json()
- # We can't check the issue number anymore because issues can have moved
- return issue_json["state"] == "closed"
- def filter_changed_lines(issue_refs: list[IssueRef]) -> list[IssueRef]:
- changed_lines = buildkite.find_modified_lines()
- return [
- issue_ref
- for issue_ref in issue_refs
- if issue_ref.text is not None
- and any(
- (issue_ref.filename, issue_ref.line_number + i) in changed_lines
- for i in range(issue_ref.text.count("\n"))
- )
- ]
- def filter_ambiguous_issues(
- issue_refs: list[IssueRef],
- ) -> tuple[list[IssueRef], list[IssueRef]]:
- return [issue_ref for issue_ref in issue_refs if issue_ref.repository], [
- issue_ref for issue_ref in issue_refs if not issue_ref.repository
- ]
- def filter_closed_issues(issue_refs: list[IssueRef]) -> list[IssueRef]:
- issues = {(issue_ref.repository, issue_ref.issue_id) for issue_ref in issue_refs}
- closed_issues = {
- (repository, issue)
- for repository, issue in issues
- if is_issue_closed_on_github(repository, issue)
- }
- return [
- issue_ref
- for issue_ref in issue_refs
- if (issue_ref.repository, issue_ref.issue_id) in closed_issues
- ]
- def main() -> int:
- parser = argparse.ArgumentParser(
- prog="ci-closed-issues-detect",
- formatter_class=argparse.RawDescriptionHelpFormatter,
- description="ci-closed-issues-detect detects references to already closed GitHub issues.",
- )
- parser.add_argument(
- "--changed-lines-only",
- action="store_true",
- help="only report issues in changed files/lines",
- )
- args = parser.parse_args()
- filenames = spawn.capture(
- ["git", "ls-tree", "--full-tree", "-r", "--name-only", "HEAD"]
- )
- issue_refs: list[IssueRef] = []
- for filename in filenames.splitlines():
- if issue_match := FILENAME_REFERENCE_RE.search(filename):
- groups = [
- (key, value) for key, value in issue_match.groupdict().items() if value
- ]
- assert len(groups) == 1, f"Expected only 1 element in {groups}"
- group, issue_id = groups[0]
- issue_refs.append(
- IssueRef(
- GROUP_REPO[group],
- int(issue_id),
- filename,
- 0,
- None,
- )
- )
- # Files without any ending can be interesting datadriven test files
- if (
- not IGNORE_FILENAME_RE.match(filename)
- and not os.path.isdir(filename)
- and not os.path.islink(filename)
- ):
- issue_refs.extend(detect_referenced_issues(filename))
- issue_refs, ambiguous_refs = filter_ambiguous_issues(issue_refs)
- if args.changed_lines_only:
- issue_refs = filter_changed_lines(issue_refs)
- ambiguous_refs = filter_changed_lines(ambiguous_refs)
- issue_refs = filter_closed_issues(issue_refs)
- for issue_ref in ambiguous_refs:
- print(f"--- Ambiguous issue reference: #{issue_ref.issue_id}")
- if issue_ref.text is not None:
- print(f"{issue_ref.filename}:{issue_ref.line_number}:")
- print(issue_ref.text)
- else:
- print(f"{issue_ref.filename} (filename)")
- print(
- f"Use database-issues#{issue_ref.issue_id} or materialize#{issue_ref.issue_id} instead to have an unambiguous reference"
- )
- for issue_ref in issue_refs:
- url = buildkite.inline_link(
- f"https://github.com/{issue_ref.repository}/issues/{issue_ref.issue_id}",
- f"{issue_ref.repository}#{issue_ref.issue_id}",
- )
- print(f"--- Issue is referenced in comment but already closed: {url}")
- if issue_ref.text is not None:
- print(f"{issue_ref.filename}:{issue_ref.line_number}:")
- print(issue_ref.text)
- else:
- print(f"{issue_ref.filename} (filename)")
- return 1 if issue_refs + ambiguous_refs else 0
- if __name__ == "__main__":
- sys.exit(main())
|