kafka-compression.td 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. $ set-arg-default default-storage-size=1
  10. $ set-arg-default single-replica-cluster=quickstart
  11. # Test support for compressed Kafka topics.
  12. $ postgres-execute connection=postgres://mz_system:materialize@${testdrive.materialize-internal-sql-addr}
  13. ALTER SYSTEM SET max_clusters = 20
  14. $ kafka-create-topic topic=gzip compression=gzip
  15. $ kafka-ingest format=bytes topic=gzip timestamp=1
  16. hello
  17. world
  18. > CREATE CONNECTION kafka_conn
  19. TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT);
  20. > CREATE CLUSTER gzip_cluster SIZE '${arg.default-storage-size}';
  21. > CREATE SOURCE gzip
  22. IN CLUSTER gzip_cluster
  23. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-gzip-${testdrive.seed}')
  24. > CREATE TABLE gzip_tbl FROM SOURCE gzip (REFERENCE "testdrive-gzip-${testdrive.seed}")
  25. FORMAT TEXT
  26. > SELECT text FROM gzip_tbl
  27. hello
  28. world
  29. $ kafka-create-topic topic=snappy compression=snappy
  30. $ kafka-ingest format=bytes topic=snappy timestamp=1
  31. hello
  32. world
  33. > CREATE CLUSTER snappy_cluster SIZE '${arg.default-storage-size}';
  34. > CREATE SOURCE snappy
  35. IN CLUSTER snappy_cluster
  36. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-snappy-${testdrive.seed}')
  37. > CREATE TABLE snappy_tbl FROM SOURCE snappy (REFERENCE "testdrive-snappy-${testdrive.seed}")
  38. FORMAT TEXT
  39. > SELECT text FROM snappy_tbl
  40. hello
  41. world
  42. $ kafka-create-topic topic=lz4 compression=lz4
  43. $ kafka-ingest format=bytes topic=lz4 timestamp=1
  44. hello
  45. world
  46. > CREATE CLUSTER lz4_cluster SIZE '${arg.default-storage-size}';
  47. > CREATE SOURCE lz4
  48. IN CLUSTER lz4_cluster
  49. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-lz4-${testdrive.seed}')
  50. > CREATE TABLE lz4_tbl FROM SOURCE lz4 (REFERENCE "testdrive-lz4-${testdrive.seed}")
  51. FORMAT TEXT
  52. > SELECT text FROM lz4_tbl
  53. hello
  54. world
  55. $ kafka-create-topic topic=zstd compression=zstd partitions=1
  56. $ kafka-ingest format=bytes topic=zstd timestamp=1
  57. hello
  58. world
  59. > CREATE CLUSTER zstd_cluster SIZE '${arg.default-storage-size}';
  60. > CREATE SOURCE zstd
  61. IN CLUSTER zstd_cluster
  62. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-zstd-${testdrive.seed}')
  63. > CREATE TABLE zstd_tbl FROM SOURCE zstd (REFERENCE "testdrive-zstd-${testdrive.seed}")
  64. FORMAT TEXT
  65. > SELECT text FROM zstd_tbl
  66. hello
  67. world
  68. > CREATE CLUSTER zstd_fast_forwarded_cluster SIZE '${arg.default-storage-size}';
  69. > CREATE SOURCE zstd_fast_forwarded
  70. IN CLUSTER zstd_fast_forwarded_cluster
  71. FROM KAFKA CONNECTION kafka_conn (START OFFSET=[1], TOPIC 'testdrive-zstd-${testdrive.seed}')
  72. > CREATE TABLE zstd_fast_forwarded_tbl FROM SOURCE zstd_fast_forwarded (REFERENCE "testdrive-zstd-${testdrive.seed}")
  73. FORMAT TEXT
  74. > SELECT text FROM zstd_fast_forwarded_tbl
  75. world
  76. # Test compression with sinks.
  77. > CREATE TABLE feed (a text)
  78. > INSERT INTO feed VALUES ('hello'), ('world')
  79. ! CREATE SINK invalid_sink
  80. IN CLUSTER ${arg.single-replica-cluster}
  81. FROM feed
  82. INTO KAFKA CONNECTION kafka_conn (TOPIC 'sink-compression', COMPRESSION TYPE 'pied-piper')
  83. KEY (a) NOT ENFORCED
  84. FORMAT JSON ENVELOPE UPSERT
  85. contains:invalid COMPRESSION TYPE: pied-piper
  86. > CREATE CLUSTER lz4_sink_implicit_cluster SIZE '${arg.default-storage-size}';
  87. > CREATE SINK lz4_sink_implicit
  88. IN CLUSTER lz4_sink_implicit_cluster
  89. FROM feed
  90. INTO KAFKA CONNECTION kafka_conn (TOPIC 'sink-compression', COMPRESSION TYPE 'none')
  91. KEY (a) NOT ENFORCED
  92. FORMAT JSON ENVELOPE UPSERT
  93. > CREATE CLUSTER none_sink_explicit_cluster SIZE '${arg.default-storage-size}';
  94. > CREATE SINK none_sink_explicit
  95. IN CLUSTER none_sink_explicit_cluster
  96. FROM feed
  97. INTO KAFKA CONNECTION kafka_conn (TOPIC 'sink-compression')
  98. KEY (a) NOT ENFORCED
  99. FORMAT JSON ENVELOPE UPSERT
  100. > CREATE CLUSTER gzip_sink_cluster SIZE '${arg.default-storage-size}';
  101. > CREATE SINK gzip_sink
  102. IN CLUSTER gzip_sink_cluster
  103. FROM feed
  104. INTO KAFKA CONNECTION kafka_conn (TOPIC 'sink-compression', COMPRESSION TYPE 'gzip')
  105. KEY (a) NOT ENFORCED
  106. FORMAT JSON ENVELOPE UPSERT
  107. > CREATE CLUSTER gzip_sink_spongebob_case_cluster SIZE '${arg.default-storage-size}';
  108. > CREATE SINK gzip_sink_spongebob_case
  109. IN CLUSTER gzip_sink_spongebob_case_cluster
  110. FROM feed
  111. INTO KAFKA CONNECTION kafka_conn (TOPIC 'sink-compression', COMPRESSION TYPE 'gZiP')
  112. KEY (a) NOT ENFORCED
  113. FORMAT JSON ENVELOPE UPSERT
  114. > CREATE CLUSTER lz4_sink_cluster SIZE '${arg.default-storage-size}';
  115. > CREATE SINK lz4_sink
  116. IN CLUSTER lz4_sink_cluster
  117. FROM feed
  118. INTO KAFKA CONNECTION kafka_conn (TOPIC 'sink-compression', COMPRESSION TYPE 'lz4')
  119. KEY (a) NOT ENFORCED
  120. FORMAT JSON ENVELOPE UPSERT
  121. > CREATE CLUSTER zstd_sink_cluster SIZE '${arg.default-storage-size}';
  122. > CREATE SINK zstd_sink
  123. IN CLUSTER zstd_sink_cluster
  124. FROM feed
  125. INTO KAFKA CONNECTION kafka_conn (TOPIC 'sink-compression', COMPRESSION TYPE 'zstd')
  126. KEY (a) NOT ENFORCED
  127. FORMAT JSON ENVELOPE UPSERT
  128. # The Kafka APIs do not make it possible to assess whether the compression
  129. # actually took place, so we settle for just validating that the data is
  130. # readable.
  131. $ kafka-verify-data format=json key=false sink=materialize.public.lz4_sink_implicit
  132. {"a": "hello"}
  133. {"a": "world"}
  134. {"a": "hello"}
  135. {"a": "world"}
  136. {"a": "hello"}
  137. {"a": "world"}
  138. {"a": "hello"}
  139. {"a": "world"}
  140. {"a": "hello"}
  141. {"a": "world"}
  142. {"a": "hello"}
  143. {"a": "world"}