gen-chroma-syntax.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. #!/usr/bin/env python3
  2. # Copyright Materialize, Inc. and contributors. All rights reserved.
  3. #
  4. # Use of this software is governed by the Business Source License
  5. # included in the LICENSE file at the root of this repository.
  6. #
  7. # As of the Change Date specified in that file, in accordance with
  8. # the Business Source License, use of this software will be governed
  9. # by the Apache License, Version 2.0.
  10. """Regenerates a Materialize-dialect Chroma syntax file using the local Materialize keywords"""
  11. import argparse
  12. import xml.etree.ElementTree as ET
  13. from pathlib import Path
  14. from materialize import MZ_ROOT
  15. CONFIG_FIELDS = {
  16. "name": "Materialize SQL dialect",
  17. "alias": ["materialize", "mzsql"],
  18. "mime_type": "text/x-materializesql",
  19. }
  20. def keyword_pattern():
  21. keywords_file = MZ_ROOT / "src/sql-lexer/src/keywords.txt"
  22. keywords = [
  23. line.upper()
  24. for line in keywords_file.read_text().splitlines()
  25. if not (line.startswith("#") or not line.strip())
  26. ]
  27. return f"({'|'.join(keywords)})\\b"
  28. def set_keywords(root: ET.Element):
  29. rule = root.find(".//rule/token[@type='Keyword']/..")
  30. if not rule:
  31. raise RuntimeError("No keyword rule found")
  32. rule.set("pattern", keyword_pattern())
  33. def set_config(root: ET.Element):
  34. config = root.find("config")
  35. if not config:
  36. raise RuntimeError("No config found")
  37. for field_name, field_value in CONFIG_FIELDS.items():
  38. if isinstance(field_value, list):
  39. for element in config.findall(field_name):
  40. config.remove(element)
  41. for item in field_value:
  42. field = ET.SubElement(config, field_name)
  43. field.text = item
  44. else:
  45. field = config.find(field_name)
  46. if field is None:
  47. raise RuntimeError(f"No such config field: '{field_name}'")
  48. field.text = field_value
  49. def main() -> None:
  50. parser = argparse.ArgumentParser()
  51. parser.add_argument(
  52. "--chroma-dir",
  53. default="../chroma",
  54. )
  55. args = parser.parse_args()
  56. lexer_dir = Path(f"{args.chroma_dir}/lexers/embedded/")
  57. tree = ET.parse(lexer_dir / "postgresql_sql_dialect.xml")
  58. root = tree.getroot()
  59. if not root:
  60. raise RuntimeError("Could not find root element")
  61. set_keywords(root)
  62. set_config(root)
  63. ET.indent(root, " ")
  64. tree.write(lexer_dir / "materialize_sql_dialect.xml", encoding="unicode")
  65. if __name__ == "__main__":
  66. main()