123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187 |
- # Copyright Materialize, Inc. and contributors. All rights reserved.
- #
- # Use of this software is governed by the Business Source License
- # included in the LICENSE file at the root of this repository.
- #
- # As of the Change Date specified in that file, in accordance with
- # the Business Source License, use of this software will be governed
- # by the Apache License, Version 2.0.
- """GitHub utilities."""
- import os
- import re
- from dataclasses import dataclass
- from typing import Any
- import requests
- from materialize.observed_error import ObservedBaseError, WithIssue
- CI_RE = re.compile("ci-regexp: (.*)")
- CI_APPLY_TO = re.compile("ci-apply-to: (.*)")
- CI_LOCATION = re.compile("ci-location: (.*)")
- CI_IGNORE_FAILURE = re.compile("ci-ignore-failure: (.*)")
- @dataclass
- class KnownGitHubIssue:
- regex: re.Pattern[Any]
- apply_to: str | None
- info: dict[str, Any]
- ignore_failure: bool
- location: str | None
- @dataclass(kw_only=True, unsafe_hash=True)
- class GitHubIssueWithInvalidRegexp(ObservedBaseError, WithIssue):
- regex_pattern: str
- def to_text(self) -> str:
- return f"Invalid regex in ci-regexp: {self.regex_pattern}"
- def to_markdown(self) -> str:
- return f'<a href="{self.issue_url}">{self.issue_title} (#{self.issue_number})</a>: Invalid regex in ci-regexp: {self.regex_pattern}, ignoring'
- def get_known_issues_from_github_page(
- token: str | None, repo: str, page: int = 1
- ) -> Any:
- headers = {
- "Accept": "application/vnd.github+json",
- "X-GitHub-Api-Version": "2022-11-28",
- }
- if token:
- headers["Authorization"] = f"Bearer {token}"
- response = requests.get(
- f'https://api.github.com/search/issues?q=repo:{repo}%20type:issue%20in:body%20"ci-regexp%3A"&per_page=100&page={page}',
- headers=headers,
- )
- if response.status_code != 200:
- raise ValueError(f"Bad return code from GitHub: {response.status_code}")
- issues_json = response.json()
- assert issues_json["incomplete_results"] == False
- return issues_json
- def get_known_issues_from_github(
- token: str | None = os.getenv("GITHUB_TOKEN"),
- repo: str = "MaterializeInc/database-issues",
- ) -> tuple[list[KnownGitHubIssue], list[GitHubIssueWithInvalidRegexp]]:
- page = 1
- issues_json = get_known_issues_from_github_page(token, repo, page)
- while issues_json["total_count"] > len(issues_json["items"]):
- page += 1
- next_page_json = get_known_issues_from_github_page(token, repo, page)
- if not next_page_json["items"]:
- break
- issues_json["items"].extend(next_page_json["items"])
- known_issues = []
- issues_with_invalid_regex = []
- for issue in issues_json["items"]:
- matches = CI_RE.findall(issue["body"])
- matches_apply_to = CI_APPLY_TO.findall(issue["body"])
- matches_location = CI_LOCATION.findall(issue["body"])
- matches_ignore_failure = CI_IGNORE_FAILURE.findall(issue["body"])
- if len(matches) > 1:
- issues_with_invalid_regex.append(
- GitHubIssueWithInvalidRegexp(
- internal_error_type="GITHUB_INVALID_REGEXP",
- issue_url=issue["html_url"],
- issue_title=issue["title"],
- issue_number=issue["number"],
- regex_pattern=f"Multiple regexes, but only one supported: {[match.strip() for match in matches]}",
- )
- )
- continue
- if len(matches_ignore_failure) > 1:
- issues_with_invalid_regex.append(
- GitHubIssueWithInvalidRegexp(
- internal_error_type="GITHUB_INVALID_IGNORE_FAILURE",
- issue_url=issue["html_url"],
- issue_title=issue["title"],
- issue_number=issue["number"],
- regex_pattern=f"Multiple ci-ignore-failures, but only one supported: {[match.strip() for match in matches_ignore_failure]}",
- )
- )
- continue
- if len(matches) == 0:
- continue
- if len(matches_location) >= 2:
- issues_with_invalid_regex.append(
- GitHubIssueWithInvalidRegexp(
- internal_error_type="GITHUB_INVALID_IGNORE_FAILURE",
- issue_url=issue["html_url"],
- issue_title=issue["title"],
- issue_number=issue["number"],
- regex_pattern=f"Multiple ci-locations, but only one supported: {[match.strip() for match in matches_location]}",
- )
- )
- continue
- location: str | None = (
- matches_location[0] if len(matches_location) == 1 else None
- )
- ignore_failure = len(matches_ignore_failure) == 1 and matches_ignore_failure[
- 0
- ].strip() in ("true", "yes", "1")
- try:
- regex_pattern = re.compile(matches[0].strip().encode())
- except:
- issues_with_invalid_regex.append(
- GitHubIssueWithInvalidRegexp(
- internal_error_type="GITHUB_INVALID_REGEXP",
- issue_url=issue["html_url"],
- issue_title=issue["title"],
- issue_number=issue["number"],
- regex_pattern=matches[0].strip(),
- )
- )
- continue
- if matches_apply_to:
- for match_apply_to in matches_apply_to:
- known_issues.append(
- KnownGitHubIssue(
- regex_pattern,
- match_apply_to.strip().lower(),
- issue,
- ignore_failure,
- location,
- )
- )
- else:
- known_issues.append(
- KnownGitHubIssue(regex_pattern, None, issue, ignore_failure, location)
- )
- return (known_issues, issues_with_invalid_regex)
- def for_github_re(text: bytes) -> bytes:
- """
- Matching newlines in regular expressions is kind of annoying, don't expect
- ci-regexp to do that correctly, but instead replace all newlines with a
- space. For examples this makes matching this panic easier:
- thread 'test_auth_deduplication' panicked at src/environmentd/tests/auth.rs:1878:5:
- assertion `left == right` failed
- Previously the regex should have been:
- thread 'test_auth_deduplication' panicked at src/environmentd/tests/auth.rs.*\n.*left == right
- With this function it can be:
- thread 'test_auth_deduplication' panicked at src/environmentd/tests/auth.rs.*left == right
- """
- return text.replace(b"\n", b" ")
|