lint-docs-catalog.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. #!/usr/bin/env python3
  2. # Copyright Materialize, Inc. and contributors. All rights reserved.
  3. #
  4. # Use of this software is governed by the Business Source License
  5. # included in the LICENSE file at the root of this repository.
  6. #
  7. # As of the Change Date specified in that file, in accordance with
  8. # the Business Source License, use of this software will be governed
  9. # by the Apache License, Version 2.0.
  10. import fileinput
  11. import re
  12. import sys
  13. from enum import Enum
  14. class ParserState(Enum):
  15. NONE = 0
  16. FIELDS = 1
  17. HEADER = 2
  18. SEPARATOR = 3
  19. HEADER_SEPARATOR_RE = re.compile(r"\|?(\s*-+\s*)(\|\s*-+\s*){2}\|?")
  20. TABLE_RE = re.compile(r"(?:\|?[\s`\[\]]*([\w_ ]+)[\s`\[\]]*)")
  21. RELATION_MARKER_RE = re.compile(r"RELATION_SPEC (\w+)\.(\w+)")
  22. UNDOCUMENTED_RELATION_MARKER = re.compile(r"RELATION_SPEC_UNDOCUMENTED (\w+)\.(\w+)")
  23. HEADER = """
  24. # Copyright Materialize, Inc. and contributors. All rights reserved.
  25. #
  26. # Use of this software is governed by the Business Source License
  27. # included in the LICENSE file at the root of this repository.
  28. #
  29. # As of the Change Date specified in that file, in accordance with
  30. # the Business Source License, use of this software will be governed
  31. # by the Apache License, Version 2.0.
  32. # This file is auto-generated by ci/test/lint-docs-catalog.py
  33. mode cockroach
  34. statement ok
  35. CREATE VIEW objects AS
  36. SELECT
  37. schema.name AS schema,
  38. objects.name AS object,
  39. columns.position,
  40. columns.name,
  41. columns.type
  42. FROM
  43. mz_catalog.mz_columns AS columns,
  44. mz_catalog.mz_objects AS objects,
  45. mz_catalog.mz_schemas AS schema
  46. WHERE columns.id = objects.id AND objects.schema_id = schema.id
  47. statement ok
  48. CREATE INDEX objects_idx ON objects(schema, object)
  49. """
  50. def main() -> None:
  51. print(HEADER)
  52. state = ParserState.NONE
  53. position = 1
  54. objects = []
  55. schemas = set()
  56. for line in fileinput.input(sys.argv):
  57. if state == ParserState.NONE:
  58. undocumented = UNDOCUMENTED_RELATION_MARKER.search(line)
  59. if undocumented:
  60. schema = undocumented.group(1)
  61. object_name = undocumented.group(2)
  62. objects.append(object_name)
  63. schemas.add(f"'{schema}'")
  64. continue
  65. marker_match = RELATION_MARKER_RE.search(line)
  66. if marker_match:
  67. schema = marker_match.group(1)
  68. object_name = marker_match.group(2)
  69. print("query ITT")
  70. print(
  71. f"SELECT position, name, type FROM objects WHERE schema = '{schema}' AND object = '{object_name}' ORDER BY position"
  72. )
  73. print("----")
  74. state = ParserState.HEADER
  75. objects.append(object_name)
  76. schemas.add(f"'{schema}'")
  77. elif state == ParserState.HEADER:
  78. if HEADER_SEPARATOR_RE.match(line):
  79. state = ParserState.FIELDS
  80. elif state == ParserState.FIELDS:
  81. table_match = TABLE_RE.findall(line)
  82. if table_match and len(table_match) >= 2:
  83. field = table_match[0]
  84. type_name = table_match[1]
  85. # We currently cannot determine the type of lists from the catalog.
  86. if type_name == "mz_aclitem array":
  87. type_name = "mz_aclitem[]"
  88. elif type_name == "text array":
  89. type_name = "text[]"
  90. elif "list" in type_name:
  91. type_name = "list"
  92. elif "array" in type_name:
  93. type_name = "array"
  94. type_name = type_name.replace(" ", "␠")
  95. print(" ".join([str(position), field, type_name]))
  96. position += 1
  97. else:
  98. print()
  99. state = ParserState.NONE
  100. position = 1
  101. if objects:
  102. print("query T")
  103. schemas_str = ",".join(schemas)
  104. print(
  105. f"SELECT DISTINCT object FROM objects WHERE schema IN ({schemas_str}) ORDER BY object"
  106. )
  107. print("----")
  108. for object_name in sorted(objects):
  109. print(object_name)
  110. if __name__ == "__main__":
  111. main()