upsert_many_columns.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. from textwrap import dedent
  10. from materialize.checks.actions import Testdrive
  11. from materialize.checks.checks import Check
  12. MANY_KEYS = ", ".join(
  13. [f'{{"name": "key{i+1}", "type": "string"}}' for i in range(1000)]
  14. )
  15. MANY_VALUES = ", ".join(
  16. [f'{{"name": "f{i+1}", "type": "string"}}' for i in range(1000)]
  17. )
  18. MANY_KEYS_SCHEMA = dedent(
  19. f"""
  20. $ set keyschema={{
  21. "type" : "record",
  22. "name" : "Key",
  23. "fields" : [ {MANY_KEYS} ]
  24. }}
  25. $ set schema={{
  26. "type" : "record",
  27. "name" : "test",
  28. "fields" : [
  29. {{"name":"f1", "type":"string"}}
  30. ]
  31. }}
  32. """
  33. )
  34. MANY_VALUES_SCHEMA = dedent(
  35. f"""
  36. $ set keyschema={{
  37. "type": "record",
  38. "name": "Key",
  39. "fields": [
  40. {{"name": "key1", "type": "string"}}
  41. ]
  42. }}
  43. $ set schema={{
  44. "type" : "record",
  45. "name" : "test",
  46. "fields" : [ {MANY_VALUES} ]
  47. }}
  48. """
  49. )
  50. class UpsertManyValueColumns(Check):
  51. """Upsert 1K value columns"""
  52. DATA_A = ", ".join([f'"f{i+1}": "A{i+1}XYZ"' for i in range(1000)])
  53. DATA_B = ", ".join([f'"f{i+1}": "B{i+1}XYZ"' for i in range(1000)])
  54. DATA_C = ", ".join([f'"f{i+1}": "C{i+1}XYZ"' for i in range(1000)])
  55. def initialize(self) -> Testdrive:
  56. return Testdrive(
  57. MANY_VALUES_SCHEMA
  58. + dedent(
  59. f"""
  60. $ kafka-create-topic topic=upsert-many-value-columns
  61. $ kafka-ingest format=avro key-format=avro topic=upsert-many-value-columns key-schema=${{keyschema}} schema=${{schema}}
  62. {{"key1": "1"}} {{ {UpsertManyValueColumns.DATA_A} }}
  63. {{"key1": "2"}} {{ {UpsertManyValueColumns.DATA_A} }}
  64. {{"key1": "3"}} {{ {UpsertManyValueColumns.DATA_A} }}
  65. > CREATE SOURCE upsert_many_value_columns_source_src
  66. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-upsert-many-value-columns-${{testdrive.seed}}')
  67. > CREATE TABLE upsert_many_value_columns FROM SOURCE upsert_many_value_columns_source_src (REFERENCE "testdrive-upsert-many-value-columns-${{testdrive.seed}}")
  68. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  69. ENVELOPE UPSERT
  70. > CREATE MATERIALIZED VIEW upsert_many_value_columns_view AS
  71. SELECT key1, f1, f1000
  72. FROM upsert_many_value_columns
  73. """
  74. )
  75. )
  76. def manipulate(self) -> list[Testdrive]:
  77. return [
  78. Testdrive(MANY_VALUES_SCHEMA + dedent(s))
  79. for s in [
  80. f"""
  81. $ kafka-ingest format=avro key-format=avro topic=upsert-many-value-columns key-schema=${{keyschema}} schema=${{schema}}
  82. {{"key1": "1"}} {{ {UpsertManyValueColumns.DATA_B} }}
  83. {{"key1": "2"}}
  84. """,
  85. f"""
  86. $ kafka-ingest format=avro key-format=avro topic=upsert-many-value-columns key-schema=${{keyschema}} schema=${{schema}}
  87. {{"key1": "1"}} {{ {UpsertManyValueColumns.DATA_C} }}
  88. {{"key1": "3"}}
  89. """,
  90. ]
  91. ]
  92. def validate(self) -> Testdrive:
  93. return Testdrive(
  94. dedent(
  95. """
  96. > SELECT * FROM upsert_many_value_columns_view
  97. 1 C1XYZ C1000XYZ
  98. """
  99. )
  100. )
  101. class UpsertManyKeyColumns(Check):
  102. """Upsert 1K key columns"""
  103. KEYS_A = ", ".join([f'"key{i+1}": "A{i+1}XYZ"' for i in range(1000)])
  104. KEYS_B = ", ".join([f'"key{i+1}": "B{i+1}XYZ"' for i in range(1000)])
  105. KEYS_C = ", ".join([f'"key{i+1}": "C{i+1}XYZ"' for i in range(1000)])
  106. def initialize(self) -> Testdrive:
  107. return Testdrive(
  108. MANY_KEYS_SCHEMA
  109. + dedent(
  110. f"""
  111. $ kafka-create-topic topic=upsert-many-key-columns
  112. $ kafka-ingest format=avro key-format=avro topic=upsert-many-key-columns key-schema=${{keyschema}} schema=${{schema}}
  113. {{ {UpsertManyKeyColumns.KEYS_A} }} {{ "f1" : "X" }}
  114. {{ {UpsertManyKeyColumns.KEYS_B} }} {{ "f1" : "X" }}
  115. {{ {UpsertManyKeyColumns.KEYS_C} }} {{ "f1" : "X" }}
  116. > CREATE SOURCE upsert_many_key_columns_source_src
  117. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-upsert-many-key-columns-${{testdrive.seed}}')
  118. > CREATE TABLE upsert_many_key_columns FROM SOURCE upsert_many_key_columns_source_src (REFERENCE "testdrive-upsert-many-key-columns-${{testdrive.seed}}")
  119. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  120. ENVELOPE UPSERT
  121. > CREATE MATERIALIZED VIEW upsert_many_key_columns_view AS
  122. SELECT key1, key1000, f1
  123. FROM upsert_many_key_columns
  124. """
  125. )
  126. )
  127. def manipulate(self) -> list[Testdrive]:
  128. return [
  129. Testdrive(MANY_KEYS_SCHEMA + dedent(s))
  130. for s in [
  131. f"""
  132. $ kafka-ingest format=avro key-format=avro topic=upsert-many-key-columns key-schema=${{keyschema}} schema=${{schema}}
  133. {{ {UpsertManyKeyColumns.KEYS_A} }} {{ "f1" : "Y" }}
  134. {{ {UpsertManyKeyColumns.KEYS_B} }}
  135. """,
  136. f"""
  137. $ kafka-ingest format=avro key-format=avro topic=upsert-many-key-columns key-schema=${{keyschema}} schema=${{schema}}
  138. {{ {UpsertManyKeyColumns.KEYS_A} }} {{ "f1" : "Z" }}
  139. {{ {UpsertManyKeyColumns.KEYS_C} }}
  140. """,
  141. ]
  142. ]
  143. def validate(self) -> Testdrive:
  144. return Testdrive(
  145. dedent(
  146. """
  147. > SELECT * FROM upsert_many_key_columns_view
  148. A1XYZ A1000XYZ Z
  149. """
  150. )
  151. )