kafka-correctness.td 3.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. $ set-arg-default default-storage-size=1
  10. $ set keyschema={
  11. "type": "record",
  12. "name": "Key",
  13. "fields": [
  14. {"name": "key", "type": "string"}
  15. ]
  16. }
  17. $ set schema={
  18. "type" : "record",
  19. "name" : "test",
  20. "fields" : [
  21. {"name":"f1", "type":"string"},
  22. {"name":"f2", "type":"long"}
  23. ]
  24. }
  25. # Create a topic that is large enough to fill librdkafka's buffer, which will force some yielding to
  26. # happen. Each message is at least 128 bytes long so writing 1M of them produces at least 128MB of
  27. # data. Each of these million records sets the key="1" to the current iteration index.
  28. $ set count=1000000
  29. $ kafka-create-topic topic=correctness-data
  30. $ kafka-ingest format=avro topic=correctness-data key-format=avro key-schema=${keyschema} schema=${schema} repeat=${count} start-iteration=1
  31. {"key": "1"} {"f1": "some value that is 128 bytes loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong", "f2": ${kafka-ingest.iteration} }
  32. > CREATE CONNECTION kafka_conn
  33. TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT)
  34. > CREATE CONNECTION IF NOT EXISTS csr_conn TO CONFLUENT SCHEMA REGISTRY (
  35. URL '${testdrive.schema-registry-url}'
  36. );
  37. # Now create an UPSERT source and immediately after that a sink. The goal here is for the sink to
  38. # get an AS_OF timestamp immediately, before the source has had the chance to produce data and
  39. # compact. This means that the sink will observe all the state changes.
  40. > CREATE CLUSTER correctness_data_cluster SIZE '${arg.default-storage-size}';
  41. > CREATE SOURCE correctness_data
  42. IN CLUSTER correctness_data_cluster
  43. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-correctness-data-${testdrive.seed}')
  44. > CREATE TABLE correctness_data_tbl FROM SOURCE correctness_data (REFERENCE "testdrive-correctness-data-${testdrive.seed}")
  45. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  46. ENVELOPE UPSERT
  47. > CREATE CLUSTER correctness_sink_cluster SIZE '${arg.default-storage-size}';
  48. > CREATE SINK correctness_sink
  49. IN CLUSTER correctness_sink_cluster
  50. FROM correctness_data_tbl
  51. INTO KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-correctness-sink-${testdrive.seed}')
  52. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  53. ENVELOPE DEBEZIUM
  54. # If we upheld correctness property 2 then the sink should produce exactly *one* record. The record
  55. # should be the accumulation of the snapshot
  56. $ kafka-verify-data format=avro sink=materialize.public.correctness_sink sort-messages=true
  57. {"before": null, "after": {"row": {"key": "1", "f1": "some value that is 128 bytes loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong", "f2": ${count} }}}