wide-data-before.td 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. #
  10. # Ingest wide data
  11. #
  12. $ set keyschema={
  13. "type": "record",
  14. "name": "Key",
  15. "fields": [
  16. {"name": "f1", "type": "long"}
  17. ]
  18. }
  19. $ set schema={
  20. "type" : "record",
  21. "name" : "test",
  22. "fields" : [
  23. {"name":"f2", "type":"long"}
  24. ]
  25. }
  26. #
  27. # Generate the wide data via a convoluted mechanism so that we do not have to push a multi-MB file in
  28. # the repository.
  29. # 1. Create a topic + source that outputs numbers 0 to 9
  30. # 2. Create a materialized view that outputs 10 rows worth of wide data
  31. # 3. Have that view produce a new Kafka topic that has the final data we want to ingest
  32. #
  33. $ kafka-create-topic topic=wide-data-ten
  34. $ kafka-ingest format=avro topic=wide-data-ten key-format=avro key-schema=${keyschema} schema=${schema} repeat=10
  35. {"f1": ${kafka-ingest.iteration}} {"f2": ${kafka-ingest.iteration}}
  36. > CREATE CONNECTION IF NOT EXISTS kafka_conn TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT);
  37. > CREATE CONNECTION IF NOT EXISTS csr_conn TO CONFLUENT SCHEMA REGISTRY (
  38. URL '${testdrive.schema-registry-url}'
  39. );
  40. > CREATE SOURCE wide_data_ten
  41. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-wide-data-ten-${testdrive.seed}');
  42. > CREATE TABLE wide_data_ten_tbl FROM SOURCE wide_data_ten (REFERENCE "testdrive-wide-data-ten-${testdrive.seed}")
  43. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  44. ENVELOPE NONE;
  45. > CREATE MATERIALIZED VIEW wide_data_view AS SELECT wide_data_ten_tbl.f2 AS key, REPEAT('x', 512 * 1024) AS value FROM wide_data_ten_tbl;
  46. > CREATE SINK wide_data_sink FROM wide_data_view
  47. INTO KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-wide-data-${testdrive.seed}')
  48. KEY (key) NOT ENFORCED
  49. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  50. ENVELOPE UPSERT;
  51. $ kafka-verify-topic sink=materialize.public.wide_data_sink await-value-schema=true await-key-schema=true
  52. > CREATE SOURCE wide_data_source
  53. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-wide-data-${testdrive.seed}');
  54. > CREATE TABLE wide_data_source_tbl FROM SOURCE wide_data_source (REFERENCE "testdrive-wide-data-${testdrive.seed}")
  55. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  56. INCLUDE KEY AS key2
  57. ENVELOPE UPSERT;
  58. > SELECT COUNT(*) FROM wide_data_source_tbl;
  59. 10