snapshot-source-statistics.td 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. $ set-arg-default default-replica-size=1
  10. $ postgres-execute connection=postgres://mz_system:materialize@${testdrive.materialize-internal-sql-addr}
  11. ALTER SYSTEM SET storage_statistics_collection_interval = 1000
  12. ALTER SYSTEM SET storage_statistics_interval = 2000
  13. $ set keyschema={
  14. "type": "record",
  15. "name": "Key",
  16. "fields": [
  17. {"name": "key", "type": "string"}
  18. ]
  19. }
  20. $ set schema={
  21. "type" : "record",
  22. "name" : "test",
  23. "fields" : [
  24. {"name":"f1", "type":"string"},
  25. {"name":"f2", "type":"long"}
  26. ]
  27. }
  28. $ kafka-create-topic topic=upsert partitions=2
  29. $ kafka-ingest format=avro topic=upsert key-format=avro key-schema=${keyschema} schema=${schema}
  30. {"key": "fish"} {"f1": "fish", "f2": 1000}
  31. {"key": "bird1"} {"f1":"goose", "f2": 1}
  32. {"key": "birdmore"} {"f1":"geese", "f2": 2}
  33. {"key": "mammal1"} {"f1": "moose", "f2": 1}
  34. {"key": "bird1"}
  35. {"key": "birdmore"} {"f1":"geese", "f2": 56}
  36. {"key": "mammalmore"} {"f1": "moose", "f2": 42}
  37. {"key": "mammal1"}
  38. {"key": "mammalmore"} {"f1":"moose", "f2": 2}
  39. $ kafka-create-topic topic=metrics-test partitions=1
  40. $ kafka-ingest topic=metrics-test format=bytes
  41. jack,jill
  42. goofus,gallant
  43. > CREATE CONNECTION kafka_conn
  44. TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT);
  45. > CREATE CONNECTION IF NOT EXISTS csr_conn TO CONFLUENT SCHEMA REGISTRY (
  46. URL '${testdrive.schema-registry-url}'
  47. );
  48. > CREATE CLUSTER stats_cluster SIZE '${arg.default-replica-size}'
  49. > CREATE SOURCE upsert
  50. IN CLUSTER stats_cluster
  51. FROM KAFKA CONNECTION kafka_conn (TOPIC
  52. 'testdrive-upsert-${testdrive.seed}'
  53. )
  54. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  55. INCLUDE OFFSET
  56. ENVELOPE UPSERT
  57. # Adding a select here so that the ingests after this
  58. # triggers lookup from the upsert state
  59. > SELECT key, f1, f2 FROM upsert
  60. key f1 f2
  61. ------------------------
  62. fish fish 1000
  63. birdmore geese 56
  64. mammalmore moose 2
  65. > SELECT
  66. s.name,
  67. SUM(u.snapshot_records_known),
  68. SUM(u.snapshot_records_staged)
  69. FROM mz_sources s
  70. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  71. WHERE s.name IN ('upsert')
  72. GROUP BY s.name
  73. ORDER BY s.name
  74. upsert 9 9
  75. $ set-from-sql var=previous-offset-known
  76. SELECT
  77. (SUM(u.offset_known))::text
  78. FROM mz_sources s
  79. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  80. WHERE s.name IN ('upsert')
  81. $ kafka-ingest format=avro topic=upsert key-format=avro key-schema=${keyschema} schema=${schema}
  82. {"key": "mammalmore"}
  83. # Snapshot counts don't move...
  84. > SELECT
  85. s.name,
  86. SUM(u.offset_known) > ${previous-offset-known},
  87. SUM(u.snapshot_records_known),
  88. SUM(u.snapshot_records_staged)
  89. FROM mz_sources s
  90. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  91. WHERE s.name IN ('upsert')
  92. GROUP BY s.name
  93. ORDER BY s.name
  94. upsert true 9 9
  95. # ...even if we restart.
  96. $ set-from-sql var=pre-restart-offset-committed
  97. SELECT
  98. (SUM(u.offset_committed))::text
  99. FROM mz_sources s
  100. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  101. WHERE s.name IN ('upsert')
  102. > ALTER CLUSTER stats_cluster SET (REPLICATION FACTOR 0)
  103. $ kafka-ingest format=avro topic=upsert key-format=avro key-schema=${keyschema} schema=${schema}
  104. {"key": "mammalmore"} {"f1":"moose", "f2": 100}
  105. > ALTER CLUSTER stats_cluster SET (REPLICATION FACTOR 1)
  106. > SELECT
  107. s.name,
  108. SUM(u.offset_committed) > ${pre-restart-offset-committed},
  109. SUM(u.snapshot_records_known),
  110. SUM(u.snapshot_records_staged)
  111. FROM mz_sources s
  112. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  113. WHERE s.name IN ('upsert')
  114. GROUP BY s.name
  115. ORDER BY s.name
  116. upsert true 9 9
  117. > DROP SOURCE upsert CASCADE