after.td 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. # Reading all that data after starting up can take longer than the default timeout.
  10. $ set-sql-timeout duration=60s
  11. $ set keyschema={
  12. "type": "record",
  13. "name": "Key",
  14. "fields": [
  15. {"name": "f1", "type": "string"}
  16. ]
  17. }
  18. $ set schema={
  19. "type" : "record",
  20. "name" : "test",
  21. "fields" : [
  22. {"name":"f2", "type":"string"}
  23. ]
  24. }
  25. > SELECT COUNT(*), COUNT(DISTINCT f1), COUNT(DISTINCT f2) FROM failpoint;
  26. 100000 100000 100000
  27. # We expect that we have successfully read and persisted some messages before the failpoint was activated
  28. > SELECT SUM(CAST(statistics->'topics'->'testdrive-failpoint-${testdrive.seed}'->'partitions'->'0'->'msgs' AS INT)) < 100000 FROM mz_kafka_source_statistics;
  29. true
  30. # Make sure that ingestion can continue
  31. $ kafka-ingest format=avro topic=failpoint key-format=avro key-schema=${keyschema} schema=${schema} repeat=10000
  32. {"f1": "k${kafka-ingest.iteration}"} {"f2": "k${kafka-ingest.iteration}"}
  33. $ kafka-ingest format=avro topic=failpoint key-format=avro key-schema=${keyschema} schema=${schema} repeat=10000
  34. {"f1": "l${kafka-ingest.iteration}"} {"f2": "l${kafka-ingest.iteration}"}
  35. > SELECT COUNT(*), COUNT(DISTINCT f1), COUNT(DISTINCT f2) FROM failpoint;
  36. 120000 120000 120000
  37. # Validate the output of the sink as well
  38. > CREATE CONNECTION IF NOT EXISTS csr_conn TO CONFLUENT SCHEMA REGISTRY (
  39. URL '${testdrive.schema-registry-url}'
  40. );
  41. > CREATE CONNECTION kafka_conn
  42. TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT);
  43. > CREATE CLUSTER single_replica_cluster SIZE '${arg.default-replica-size}';
  44. > CREATE SOURCE failpoint_from_sink
  45. IN CLUSTER single_replica_cluster
  46. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-failpoint-sink-${testdrive.seed}')
  47. > CREATE TABLE failpoint_from_sink_tbl FROM SOURCE failpoint_from_sink (REFERENCE "testdrive-failpoint-sink-${testdrive.seed}")
  48. KEY FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  49. VALUE FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  50. INCLUDE KEY AS f1
  51. ENVELOPE UPSERT
  52. > SELECT COUNT(*), COUNT(DISTINCT f1), COUNT(DISTINCT after) FROM failpoint_from_sink_tbl
  53. 120000 120000 120000
  54. # Delete some values inserted pre-restart
  55. $ kafka-ingest format=avro topic=failpoint key-format=avro key-schema=${keyschema} schema=${schema} repeat=10000
  56. {"f1": "a${kafka-ingest.iteration}"}
  57. $ kafka-ingest format=avro topic=failpoint key-format=avro key-schema=${keyschema} schema=${schema} repeat=10000
  58. {"f1": "b${kafka-ingest.iteration}"}
  59. # And validate again
  60. > SELECT COUNT(*), COUNT(DISTINCT f1), COUNT(DISTINCT f2) FROM failpoint;
  61. 100000 100000 100000
  62. > SELECT COUNT(*) FROM failpoint_from_sink_tbl;
  63. 120000