github.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. """GitHub utilities."""
  10. import os
  11. import re
  12. from dataclasses import dataclass
  13. from typing import Any
  14. import requests
  15. from materialize.observed_error import ObservedBaseError, WithIssue
  16. CI_RE = re.compile("ci-regexp: (.*)")
  17. CI_APPLY_TO = re.compile("ci-apply-to: (.*)")
  18. CI_LOCATION = re.compile("ci-location: (.*)")
  19. CI_IGNORE_FAILURE = re.compile("ci-ignore-failure: (.*)")
  20. @dataclass
  21. class KnownGitHubIssue:
  22. regex: re.Pattern[Any]
  23. apply_to: str | None
  24. info: dict[str, Any]
  25. ignore_failure: bool
  26. location: str | None
  27. @dataclass(kw_only=True, unsafe_hash=True)
  28. class GitHubIssueWithInvalidRegexp(ObservedBaseError, WithIssue):
  29. regex_pattern: str
  30. def to_text(self) -> str:
  31. return f"Invalid regex in ci-regexp: {self.regex_pattern}"
  32. def to_markdown(self) -> str:
  33. return f'<a href="{self.issue_url}">{self.issue_title} (#{self.issue_number})</a>: Invalid regex in ci-regexp: {self.regex_pattern}, ignoring'
  34. def get_known_issues_from_github_page(
  35. token: str | None, repo: str, page: int = 1
  36. ) -> Any:
  37. headers = {
  38. "Accept": "application/vnd.github+json",
  39. "X-GitHub-Api-Version": "2022-11-28",
  40. }
  41. if token:
  42. headers["Authorization"] = f"Bearer {token}"
  43. response = requests.get(
  44. f'https://api.github.com/search/issues?q=repo:{repo}%20type:issue%20in:body%20"ci-regexp%3A"&per_page=100&page={page}',
  45. headers=headers,
  46. )
  47. if response.status_code != 200:
  48. raise ValueError(f"Bad return code from GitHub: {response.status_code}")
  49. issues_json = response.json()
  50. assert issues_json["incomplete_results"] == False
  51. return issues_json
  52. def get_known_issues_from_github(
  53. token: str | None = os.getenv("GITHUB_TOKEN"),
  54. repo: str = "MaterializeInc/database-issues",
  55. ) -> tuple[list[KnownGitHubIssue], list[GitHubIssueWithInvalidRegexp]]:
  56. page = 1
  57. issues_json = get_known_issues_from_github_page(token, repo, page)
  58. while issues_json["total_count"] > len(issues_json["items"]):
  59. page += 1
  60. next_page_json = get_known_issues_from_github_page(token, repo, page)
  61. if not next_page_json["items"]:
  62. break
  63. issues_json["items"].extend(next_page_json["items"])
  64. known_issues = []
  65. issues_with_invalid_regex = []
  66. for issue in issues_json["items"]:
  67. matches = CI_RE.findall(issue["body"])
  68. matches_apply_to = CI_APPLY_TO.findall(issue["body"])
  69. matches_location = CI_LOCATION.findall(issue["body"])
  70. matches_ignore_failure = CI_IGNORE_FAILURE.findall(issue["body"])
  71. if len(matches) > 1:
  72. issues_with_invalid_regex.append(
  73. GitHubIssueWithInvalidRegexp(
  74. internal_error_type="GITHUB_INVALID_REGEXP",
  75. issue_url=issue["html_url"],
  76. issue_title=issue["title"],
  77. issue_number=issue["number"],
  78. regex_pattern=f"Multiple regexes, but only one supported: {[match.strip() for match in matches]}",
  79. )
  80. )
  81. continue
  82. if len(matches_ignore_failure) > 1:
  83. issues_with_invalid_regex.append(
  84. GitHubIssueWithInvalidRegexp(
  85. internal_error_type="GITHUB_INVALID_IGNORE_FAILURE",
  86. issue_url=issue["html_url"],
  87. issue_title=issue["title"],
  88. issue_number=issue["number"],
  89. regex_pattern=f"Multiple ci-ignore-failures, but only one supported: {[match.strip() for match in matches_ignore_failure]}",
  90. )
  91. )
  92. continue
  93. if len(matches) == 0:
  94. continue
  95. if len(matches_location) >= 2:
  96. issues_with_invalid_regex.append(
  97. GitHubIssueWithInvalidRegexp(
  98. internal_error_type="GITHUB_INVALID_IGNORE_FAILURE",
  99. issue_url=issue["html_url"],
  100. issue_title=issue["title"],
  101. issue_number=issue["number"],
  102. regex_pattern=f"Multiple ci-locations, but only one supported: {[match.strip() for match in matches_location]}",
  103. )
  104. )
  105. continue
  106. location: str | None = (
  107. matches_location[0] if len(matches_location) == 1 else None
  108. )
  109. ignore_failure = len(matches_ignore_failure) == 1 and matches_ignore_failure[
  110. 0
  111. ].strip() in ("true", "yes", "1")
  112. try:
  113. regex_pattern = re.compile(matches[0].strip().encode())
  114. except:
  115. issues_with_invalid_regex.append(
  116. GitHubIssueWithInvalidRegexp(
  117. internal_error_type="GITHUB_INVALID_REGEXP",
  118. issue_url=issue["html_url"],
  119. issue_title=issue["title"],
  120. issue_number=issue["number"],
  121. regex_pattern=matches[0].strip(),
  122. )
  123. )
  124. continue
  125. if matches_apply_to:
  126. for match_apply_to in matches_apply_to:
  127. known_issues.append(
  128. KnownGitHubIssue(
  129. regex_pattern,
  130. match_apply_to.strip().lower(),
  131. issue,
  132. ignore_failure,
  133. location,
  134. )
  135. )
  136. else:
  137. known_issues.append(
  138. KnownGitHubIssue(regex_pattern, None, issue, ignore_failure, location)
  139. )
  140. return (known_issues, issues_with_invalid_regex)
  141. def for_github_re(text: bytes) -> bytes:
  142. """
  143. Matching newlines in regular expressions is kind of annoying, don't expect
  144. ci-regexp to do that correctly, but instead replace all newlines with a
  145. space. For examples this makes matching this panic easier:
  146. thread 'test_auth_deduplication' panicked at src/environmentd/tests/auth.rs:1878:5:
  147. assertion `left == right` failed
  148. Previously the regex should have been:
  149. thread 'test_auth_deduplication' panicked at src/environmentd/tests/auth.rs.*\n.*left == right
  150. With this function it can be:
  151. thread 'test_auth_deduplication' panicked at src/environmentd/tests/auth.rs.*left == right
  152. """
  153. return text.replace(b"\n", b" ")