#!/usr/bin/env python3

# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.

import fileinput
import re
import sys
from enum import Enum


class ParserState(Enum):
    NONE = 0
    FIELDS = 1
    HEADER = 2
    SEPARATOR = 3


HEADER_SEPARATOR_RE = re.compile(r"\|?(\s*-+\s*)(\|\s*-+\s*){2}\|?")
TABLE_RE = re.compile(r"(?:\|?[\s`\[\]]*([\w_ ]+)[\s`\[\]]*)")
RELATION_MARKER_RE = re.compile(r"RELATION_SPEC (\w+)\.(\w+)")
UNDOCUMENTED_RELATION_MARKER = re.compile(r"RELATION_SPEC_UNDOCUMENTED (\w+)\.(\w+)")

HEADER = """
# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.

# This file is auto-generated by ci/test/lint-docs-catalog.py

mode cockroach

statement ok
CREATE VIEW objects AS
  SELECT
    schema.name AS schema,
    objects.name AS object,
    columns.position,
    columns.name,
    columns.type
  FROM
    mz_catalog.mz_columns AS columns,
    mz_catalog.mz_objects AS objects,
    mz_catalog.mz_schemas AS schema
  WHERE columns.id = objects.id AND objects.schema_id = schema.id

statement ok
CREATE INDEX objects_idx ON objects(schema, object)
"""


def main() -> None:
    print(HEADER)

    state = ParserState.NONE
    position = 1
    objects = []
    schemas = set()
    for line in fileinput.input(sys.argv):
        if state == ParserState.NONE:
            undocumented = UNDOCUMENTED_RELATION_MARKER.search(line)
            if undocumented:
                schema = undocumented.group(1)
                object_name = undocumented.group(2)
                objects.append(object_name)
                schemas.add(f"'{schema}'")
                continue
            marker_match = RELATION_MARKER_RE.search(line)
            if marker_match:
                schema = marker_match.group(1)
                object_name = marker_match.group(2)
                print("query ITT")
                print(
                    f"SELECT position, name, type FROM objects WHERE schema = '{schema}' AND object = '{object_name}' ORDER BY position"
                )
                print("----")
                state = ParserState.HEADER
                objects.append(object_name)
                schemas.add(f"'{schema}'")
        elif state == ParserState.HEADER:
            if HEADER_SEPARATOR_RE.match(line):
                state = ParserState.FIELDS
        elif state == ParserState.FIELDS:
            table_match = TABLE_RE.findall(line)
            if table_match and len(table_match) >= 2:
                field = table_match[0]
                type_name = table_match[1]
                # We currently cannot determine the type of lists from the catalog.
                if type_name == "mz_aclitem array":
                    type_name = "mz_aclitem[]"
                elif type_name == "text array":
                    type_name = "text[]"
                elif "list" in type_name:
                    type_name = "list"
                elif "array" in type_name:
                    type_name = "array"
                type_name = type_name.replace(" ", "␠")
                print("  ".join([str(position), field, type_name]))
                position += 1
            else:
                print()
                state = ParserState.NONE
                position = 1

    if objects:
        print("query T")
        schemas_str = ",".join(schemas)
        print(
            f"SELECT DISTINCT object FROM objects WHERE schema IN ({schemas_str}) ORDER BY object"
        )
        print("----")
        for object_name in sorted(objects):
            print(object_name)


if __name__ == "__main__":
    main()