kafka-upsert-debezium-sources.td 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. $ set-arg-default single-replica-cluster=quickstart
  10. # must be a subset of the keys in the rows
  11. $ set keyschema={
  12. "type": "record",
  13. "name": "Key",
  14. "fields": [
  15. {"name": "id", "type": "long"}
  16. ]
  17. }
  18. $ set schema={
  19. "type" : "record",
  20. "name" : "envelope",
  21. "fields" : [
  22. {
  23. "name": "before",
  24. "type": [
  25. {
  26. "name": "row",
  27. "type": "record",
  28. "fields": [
  29. {
  30. "name": "id",
  31. "type": "long"
  32. },
  33. {
  34. "name": "creature",
  35. "type": "string"
  36. }]
  37. },
  38. "null"
  39. ]
  40. },
  41. { "name": "op", "type": "string" },
  42. {
  43. "name": "after",
  44. "type": ["row", "null"]
  45. },
  46. {
  47. "name": "source",
  48. "type": {
  49. "type": "record",
  50. "name": "Source",
  51. "namespace": "io.debezium.connector.mysql",
  52. "fields": [
  53. {
  54. "name": "file",
  55. "type": "string"
  56. },
  57. {
  58. "name": "pos",
  59. "type": "long"
  60. },
  61. {
  62. "name": "row",
  63. "type": "int"
  64. },
  65. {
  66. "name": "snapshot",
  67. "type": [
  68. {
  69. "type": "boolean",
  70. "connect.default": false
  71. },
  72. "null"
  73. ],
  74. "default": false
  75. }
  76. ],
  77. "connect.name": "io.debezium.connector.mysql.Source"
  78. }
  79. }
  80. ]
  81. }
  82. > CREATE CONNECTION kafka_conn
  83. TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT);
  84. > CREATE CONNECTION IF NOT EXISTS csr_conn TO CONFLUENT SCHEMA REGISTRY (
  85. URL '${testdrive.schema-registry-url}'
  86. );
  87. $ kafka-create-topic topic=dbzupsert partitions=1
  88. $ kafka-ingest format=avro topic=dbzupsert key-format=avro key-schema=${keyschema} schema=${schema} timestamp=1
  89. {"id": 1} {"before": {"row": {"id": 1, "creature": "fish"}}, "after": {"row": {"id": 1, "creature": "mudskipper"}}, "op": "u", "source": {"file": "binlog1", "pos": 1, "row": 1, "snapshot": {"boolean": false}}}
  90. {"id": 1} {"before": {"row": {"id": 1, "creature": "mudskipper"}}, "after": {"row": {"id": 1, "creature": "salamander"}}, "op": "u", "source": {"file": "binlog2", "pos": 1, "row": 1, "snapshot": {"boolean": false}}}
  91. {"id": 1} {"before": {"row": {"id": 1, "creature": "salamander"}}, "after": {"row": {"id": 1, "creature": "lizard"}}, "op": "u", "source": {"file": "binlog3", "pos": 1, "row": 1, "snapshot": {"boolean": false}}}
  92. ! CREATE SOURCE doin_upsert
  93. IN CLUSTER ${arg.single-replica-cluster}
  94. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-dbzupsert-${testdrive.seed}')
  95. FORMAT AVRO USING SCHEMA '${schema}'
  96. ENVELOPE DEBEZIUM
  97. contains:ENVELOPE [DEBEZIUM] UPSERT requires that KEY FORMAT be specified
  98. ! CREATE SOURCE doin_upsert
  99. IN CLUSTER ${arg.single-replica-cluster}
  100. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-dbzupsert-${testdrive.seed}')
  101. KEY FORMAT JSON VALUE FORMAT JSON
  102. ENVELOPE DEBEZIUM
  103. contains:ENVELOPE DEBEZIUM requires that VALUE FORMAT is set to AVRO
  104. > CREATE SOURCE doin_upsert
  105. IN CLUSTER ${arg.single-replica-cluster}
  106. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-dbzupsert-${testdrive.seed}')
  107. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  108. ENVELOPE DEBEZIUM
  109. > SELECT * FROM doin_upsert
  110. id creature
  111. -----------
  112. 1 lizard
  113. $ kafka-ingest format=avro topic=dbzupsert key-format=avro key-schema=${keyschema} schema=${schema} timestamp=2
  114. {"id": 1} {"before": {"row": {"id": 1, "creature": "lizard"}}, "after": {"row": {"id": 1, "creature": "dino"}}, "op": "u", "source": {"file": "binlog4", "pos": 1, "row": 1, "snapshot": {"boolean": false}}}
  115. > SELECT * FROM doin_upsert
  116. id creature
  117. -----------
  118. 1 dino
  119. $ kafka-ingest format=avro topic=dbzupsert key-format=avro key-schema=${keyschema} schema=${schema} timestamp=3
  120. {"id": 2} {"before": null, "after": {"row": {"id": 2, "creature": "archeopteryx"}}, "op": "c", "source": {"file": "binlog5", "pos": 1, "row": 1, "snapshot": {"boolean": false}}}
  121. {"id": 2} {"before": {"row": {"id": 2, "creature": "archeopteryx"}}, "after": {"row": {"id": 2, "creature": "velociraptor"}}, "op": "u", "source": {"file": "binlog6", "pos": 1, "row": 1, "snapshot": {"boolean": false}}}
  122. > SELECT * FROM doin_upsert ORDER BY creature
  123. id creature
  124. ------------
  125. 1 dino
  126. 2 velociraptor
  127. # test duplicates
  128. $ kafka-ingest format=avro topic=dbzupsert key-format=avro key-schema=${keyschema} schema=${schema} timestamp=4
  129. {"id": 3} {"before": {"row": {"id": 3, "creature": "protoceratops"}}, "after": {"row": {"id": 3, "creature": "triceratops"}}, "op": "u", "source": {"file": "binlog7", "pos": 1, "row": 1, "snapshot": {"boolean": false}}}
  130. {"id": 3} {"before": {"row": {"id": 3, "creature": "protoceratops"}}, "after": {"row": {"id": 3, "creature": "triceratops"}}, "op": "u", "source": {"file": "binlog8", "pos": 1, "row": 1, "snapshot": {"boolean": false}}}
  131. > SELECT * FROM doin_upsert WHERE id = 3
  132. id creature
  133. -----------
  134. 3 triceratops
  135. # test removal and reinsertion
  136. $ kafka-ingest format=avro topic=dbzupsert key-format=avro key-schema=${keyschema} schema=${schema} timestamp=5
  137. {"id": 4} {"before": null, "after": {"row": {"id": 4, "creature": "moros"}}, "op": "c", "source": {"file": "binlog9", "pos": 1, "row": 1, "snapshot": {"boolean": false}}}
  138. > SELECT creature FROM doin_upsert WHERE id = 4
  139. creature
  140. --------
  141. moros
  142. # [btv] uncomment if we bring back classic debezium mode
  143. # ! CREATE SOURCE doin_upsert_metadata
  144. # IN CLUSTER ${arg.single-replica-cluster}
  145. # FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-dbzupsert-${testdrive.seed}')
  146. # FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  147. # INCLUDE OFFSET
  148. # ENVELOPE DEBEZIUM
  149. # contains:INCLUDE OFFSET with Debezium requires UPSERT semantics
  150. > CREATE SOURCE doin_upsert_metadata
  151. IN CLUSTER ${arg.single-replica-cluster}
  152. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-dbzupsert-${testdrive.seed}')
  153. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  154. INCLUDE PARTITION, OFFSET AS test_kafka_offset
  155. ENVELOPE DEBEZIUM
  156. > SELECT * FROM doin_upsert_metadata WHERE id = 4
  157. id creature partition test_kafka_offset
  158. ---------------------------------------
  159. 4 moros 0 8
  160. $ kafka-ingest format=avro topic=dbzupsert key-format=avro key-schema=${keyschema} schema=${schema} timestamp=6
  161. {"id": 4} {"before": {"row": {"id": 4, "creature": "trex"}}, "after": null, "op": "d", "source": {"file": "binlog10", "pos": 1, "row": 1, "snapshot": {"boolean": false}}}
  162. > SELECT creature FROM doin_upsert WHERE id = 4
  163. creature
  164. --------
  165. $ kafka-ingest format=avro topic=dbzupsert key-format=avro key-schema=${keyschema} schema=${schema} timestamp=7
  166. {"id": 4} {"before": {"row": {"id": 4, "creature": "trex"}}, "after": {"row": {"id": 4, "creature": "chicken"}}, "op": "u", "source": {"file": "binlog11", "pos": 1, "row": 1, "snapshot": {"boolean": false}}}
  167. > SELECT creature FROM doin_upsert WHERE id = 4
  168. creature
  169. --------
  170. chicken
  171. > SELECT * FROM doin_upsert WHERE id = 3
  172. id creature
  173. -----------
  174. 3 triceratops
  175. # Test that `WITH (START OFFSET=<whatever>)` works
  176. > CREATE SOURCE upsert_fast_forward
  177. IN CLUSTER ${arg.single-replica-cluster}
  178. FROM KAFKA CONNECTION kafka_conn (START OFFSET = [6], TOPIC 'testdrive-dbzupsert-${testdrive.seed}')
  179. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  180. ENVELOPE DEBEZIUM
  181. > SELECT * FROM upsert_fast_forward WHERE id = 3
  182. id creature
  183. -----------
  184. 3 triceratops
  185. # test include metadata
  186. > CREATE SOURCE upsert_metadata
  187. IN CLUSTER ${arg.single-replica-cluster}
  188. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-dbzupsert-${testdrive.seed}')
  189. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  190. INCLUDE OFFSET, PARTITION
  191. ENVELOPE DEBEZIUM
  192. > SELECT * FROM upsert_metadata
  193. id creature offset partition
  194. ------------------------------------
  195. 1 dino 3 0
  196. 2 velociraptor 5 0
  197. 3 triceratops 7 0
  198. 4 chicken 10 0
  199. # test include metadata respects metadata order
  200. > CREATE SOURCE upsert_metadata_reordered
  201. IN CLUSTER ${arg.single-replica-cluster}
  202. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-dbzupsert-${testdrive.seed}')
  203. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  204. INCLUDE PARTITION, OFFSET
  205. ENVELOPE DEBEZIUM
  206. > SELECT * FROM upsert_metadata_reordered
  207. id creature partition offset
  208. ------------------------------------
  209. 1 dino 0 3
  210. 2 velociraptor 0 5
  211. 3 triceratops 0 7
  212. 4 chicken 0 10