upsert_many_rows.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. from textwrap import dedent
  10. from materialize.checks.actions import Testdrive
  11. from materialize.checks.checks import Check
  12. from materialize.checks.common import KAFKA_SCHEMA_WITH_SINGLE_STRING_FIELD
  13. from materialize.checks.executors import Executor
  14. from materialize.mz_version import MzVersion
  15. class UpsertManyRows(Check):
  16. """Upsert 1M rows"""
  17. def _can_run(self, e: Executor) -> bool:
  18. # Was broken in v0.144, see https://github.com/MaterializeInc/database-issues/issues/8106#issuecomment-3013859893
  19. return self.base_version >= MzVersion.parse_mz("v0.146.0")
  20. def initialize(self) -> Testdrive:
  21. return Testdrive(
  22. dedent(KAFKA_SCHEMA_WITH_SINGLE_STRING_FIELD)
  23. + dedent(
  24. """
  25. $ kafka-create-topic topic=upsert-many-rows
  26. $ kafka-ingest format=avro key-format=avro topic=upsert-many-rows key-schema=${keyschema} schema=${schema} repeat=1000000
  27. {"key1": "A${kafka-ingest.iteration}"} {"f1": "X"}
  28. {"key1": "B${kafka-ingest.iteration}"} {"f1": "X"}
  29. {"key1": "C${kafka-ingest.iteration}"} {"f1": "X"}
  30. > CREATE SOURCE upsert_many_rows_src
  31. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-upsert-many-rows-${testdrive.seed}')
  32. > CREATE TABLE upsert_many_rows FROM SOURCE upsert_many_rows_src (REFERENCE "testdrive-upsert-many-rows-${testdrive.seed}")
  33. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  34. ENVELOPE UPSERT
  35. > CREATE MATERIALIZED VIEW upsert_many_rows_view AS
  36. SELECT f1, COUNT(*) AS count_rows, COUNT(DISTINCT key1) AS count_keys
  37. FROM upsert_many_rows
  38. GROUP BY f1
  39. """
  40. )
  41. )
  42. def manipulate(self) -> list[Testdrive]:
  43. return [
  44. Testdrive(dedent(KAFKA_SCHEMA_WITH_SINGLE_STRING_FIELD) + dedent(s))
  45. for s in [
  46. """
  47. # Update the As
  48. $ kafka-ingest format=avro key-format=avro topic=upsert-many-rows key-schema=${keyschema} schema=${schema} repeat=1000000
  49. {"key1": "A${kafka-ingest.iteration}"} {"f1": "Y"}
  50. # Delete the Bs
  51. $ kafka-ingest format=avro key-format=avro topic=upsert-many-rows key-schema=${keyschema} schema=${schema} repeat=1000000
  52. {"key1": "B${kafka-ingest.iteration}"}
  53. """,
  54. """
  55. # Update the As again
  56. $ kafka-ingest format=avro key-format=avro topic=upsert-many-rows key-schema=${keyschema} schema=${schema} repeat=1000000
  57. {"key1": "A${kafka-ingest.iteration}"} {"f1": "Z"}
  58. # Delete the Cs
  59. $ kafka-ingest format=avro key-format=avro topic=upsert-many-rows key-schema=${keyschema} schema=${schema} repeat=1000000
  60. {"key1": "C${kafka-ingest.iteration}"}
  61. # Insert some more
  62. $ kafka-ingest format=avro key-format=avro topic=upsert-many-rows key-schema=${keyschema} schema=${schema} repeat=1000000
  63. {"key1": "D${kafka-ingest.iteration}"} {"f1": "Z"}
  64. """,
  65. ]
  66. ]
  67. def validate(self) -> Testdrive:
  68. return Testdrive(
  69. dedent(
  70. """
  71. > SELECT * FROM upsert_many_rows_view
  72. Z 2000000 2000000
  73. """
  74. )
  75. )