multiple_partitions.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. from textwrap import dedent
  10. from materialize.checks.actions import Testdrive
  11. from materialize.checks.checks import Check, externally_idempotent
  12. from materialize.checks.common import KAFKA_SCHEMA_WITH_SINGLE_STRING_FIELD
  13. def schemas() -> str:
  14. return dedent(KAFKA_SCHEMA_WITH_SINGLE_STRING_FIELD)
  15. @externally_idempotent(False)
  16. class MultiplePartitions(Check):
  17. """Test that adds new partitions to a Kafka source"""
  18. def initialize(self) -> Testdrive:
  19. return Testdrive(
  20. schemas()
  21. + dedent(
  22. """
  23. $ postgres-execute connection=postgres://mz_system:materialize@${testdrive.materialize-internal-sql-addr}
  24. $ kafka-create-topic topic=multiple-partitions-topic
  25. # ingest A-key entries
  26. $ kafka-ingest format=avro key-format=avro topic=multiple-partitions-topic key-schema=${keyschema} schema=${schema} repeat=100
  27. {"key1": "A${kafka-ingest.iteration}"} {"f1": "A${kafka-ingest.iteration}"}
  28. > CREATE SOURCE multiple_partitions_source_src FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-multiple-partitions-topic-${testdrive.seed}');
  29. > CREATE TABLE multiple_partitions_source FROM SOURCE multiple_partitions_source_src (REFERENCE "testdrive-multiple-partitions-topic-${testdrive.seed}")
  30. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn ENVELOPE UPSERT;
  31. $ kafka-add-partitions topic=multiple-partitions-topic total-partitions=2
  32. > CREATE MATERIALIZED VIEW mv_multiple_partitions AS SELECT * FROM multiple_partitions_source;
  33. """
  34. )
  35. )
  36. def manipulate(self) -> list[Testdrive]:
  37. return [
  38. Testdrive(schemas() + dedent(s))
  39. for s in [
  40. """
  41. # ingest B-key entries
  42. $ kafka-ingest format=avro key-format=avro topic=multiple-partitions-topic key-schema=${keyschema} schema=${schema} repeat=60
  43. {"key1": "B${kafka-ingest.iteration}"} {"f1": "B${kafka-ingest.iteration}"}
  44. # Make sure that source is up and complete
  45. > SELECT LEFT(f1, 1), COUNT(*) FROM multiple_partitions_source GROUP BY LEFT(f1, 1);
  46. A 100
  47. B 60
  48. $ kafka-add-partitions topic=multiple-partitions-topic total-partitions=3
  49. # ingest some more B-key entries
  50. $ kafka-ingest format=avro key-format=avro topic=multiple-partitions-topic key-schema=${keyschema} schema=${schema} repeat=60
  51. {"key1": "B${kafka-ingest.iteration}"} {"f1": "B${kafka-ingest.iteration}"}
  52. # delete some A-key entries
  53. $ kafka-ingest format=avro key-format=avro topic=multiple-partitions-topic key-schema=${keyschema} schema=${schema} repeat=50
  54. {"key1": "A${kafka-ingest.iteration}"}
  55. """,
  56. """
  57. # ingest C-key entries
  58. $ kafka-ingest format=avro key-format=avro topic=multiple-partitions-topic key-schema=${keyschema} schema=${schema} repeat=60
  59. {"key1": "C${kafka-ingest.iteration}"} {"f1": "C${kafka-ingest.iteration}"}
  60. # Make sure that source is up and complete
  61. > SELECT LEFT(f1, 1), COUNT(*) FROM multiple_partitions_source GROUP BY LEFT(f1, 1);
  62. A 50
  63. B 60
  64. C 60
  65. $ kafka-add-partitions topic=multiple-partitions-topic total-partitions=4
  66. # ingest some more C-key entries
  67. $ kafka-ingest format=avro key-format=avro topic=multiple-partitions-topic key-schema=${keyschema} schema=${schema} repeat=40
  68. {"key1": "C${kafka-ingest.iteration}"} {"f1": "C${kafka-ingest.iteration}"}
  69. # delete some A-key entries
  70. $ kafka-ingest format=avro key-format=avro topic=multiple-partitions-topic key-schema=${keyschema} schema=${schema} repeat=50
  71. {"key1": "A${kafka-ingest.iteration}"}
  72. """,
  73. ]
  74. ]
  75. def validate(self) -> Testdrive:
  76. return Testdrive(
  77. dedent(
  78. """
  79. > SELECT partition FROM multiple_partitions_source_src_progress;
  80. (3,)
  81. [0,0]
  82. [1,1]
  83. [2,2]
  84. [3,3]
  85. # alias is needed to avoid error due to reserved keyword
  86. > SELECT SUM(p.offset) FROM multiple_partitions_source_src_progress p;
  87. 420
  88. > SELECT status FROM mz_internal.mz_source_statuses WHERE name = 'multiple_partitions_source';
  89. running
  90. > SELECT LEFT(f1, 1), COUNT(*) FROM multiple_partitions_source GROUP BY LEFT(f1, 1);
  91. A 50
  92. B 60
  93. C 60
  94. > SELECT LEFT(f1, 1), COUNT(*) FROM mv_multiple_partitions GROUP BY LEFT(f1, 1);
  95. A 50
  96. B 60
  97. C 60
  98. """
  99. )
  100. )