snapshot-source-statistics.td 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. $ set-arg-default default-replica-size=1
  10. $ postgres-execute connection=postgres://mz_system:materialize@${testdrive.materialize-internal-sql-addr}
  11. ALTER SYSTEM SET storage_statistics_collection_interval = 1000
  12. ALTER SYSTEM SET storage_statistics_interval = 2000
  13. $ set keyschema={
  14. "type": "record",
  15. "name": "Key",
  16. "fields": [
  17. {"name": "key", "type": "string"}
  18. ]
  19. }
  20. $ set schema={
  21. "type" : "record",
  22. "name" : "test",
  23. "fields" : [
  24. {"name":"f1", "type":"string"},
  25. {"name":"f2", "type":"long"}
  26. ]
  27. }
  28. $ kafka-create-topic topic=upsert partitions=2
  29. $ kafka-ingest format=avro topic=upsert key-format=avro key-schema=${keyschema} schema=${schema}
  30. {"key": "fish"} {"f1": "fish", "f2": 1000}
  31. {"key": "bird1"} {"f1":"goose", "f2": 1}
  32. {"key": "birdmore"} {"f1":"geese", "f2": 2}
  33. {"key": "mammal1"} {"f1": "moose", "f2": 1}
  34. {"key": "bird1"}
  35. {"key": "birdmore"} {"f1":"geese", "f2": 56}
  36. {"key": "mammalmore"} {"f1": "moose", "f2": 42}
  37. {"key": "mammal1"}
  38. {"key": "mammalmore"} {"f1":"moose", "f2": 2}
  39. $ kafka-create-topic topic=metrics-test partitions=1
  40. $ kafka-ingest topic=metrics-test format=bytes
  41. jack,jill
  42. goofus,gallant
  43. > CREATE CONNECTION kafka_conn
  44. TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT);
  45. > CREATE CONNECTION IF NOT EXISTS csr_conn TO CONFLUENT SCHEMA REGISTRY (
  46. URL '${testdrive.schema-registry-url}'
  47. );
  48. > CREATE CLUSTER stats_cluster SIZE '${arg.default-replica-size}'
  49. > CREATE SOURCE upsert
  50. IN CLUSTER stats_cluster
  51. FROM KAFKA CONNECTION kafka_conn (TOPIC
  52. 'testdrive-upsert-${testdrive.seed}'
  53. )
  54. > CREATE TABLE upsert_tbl FROM SOURCE upsert (REFERENCE "testdrive-upsert-${testdrive.seed}")
  55. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  56. INCLUDE OFFSET
  57. ENVELOPE UPSERT
  58. # Adding a select here so that the ingests after this
  59. # triggers lookup from the upsert state
  60. > SELECT key, f1, f2 FROM upsert_tbl
  61. key f1 f2
  62. ------------------------
  63. fish fish 1000
  64. birdmore geese 56
  65. mammalmore moose 2
  66. > SELECT
  67. s.name,
  68. SUM(u.snapshot_records_known),
  69. SUM(u.snapshot_records_staged)
  70. FROM mz_sources s
  71. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  72. WHERE s.name IN ('upsert')
  73. GROUP BY s.name
  74. ORDER BY s.name
  75. upsert 9 9
  76. $ set-from-sql var=previous-offset-known
  77. SELECT
  78. (SUM(u.offset_known))::text
  79. FROM mz_sources s
  80. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  81. WHERE s.name IN ('upsert')
  82. $ kafka-ingest format=avro topic=upsert key-format=avro key-schema=${keyschema} schema=${schema}
  83. {"key": "mammalmore"}
  84. # Snapshot counts don't move...
  85. > SELECT
  86. s.name,
  87. SUM(u.offset_known) > ${previous-offset-known},
  88. SUM(u.snapshot_records_known),
  89. SUM(u.snapshot_records_staged)
  90. FROM mz_sources s
  91. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  92. WHERE s.name IN ('upsert')
  93. GROUP BY s.name
  94. ORDER BY s.name
  95. upsert true 9 9
  96. # ...even if we restart.
  97. $ set-from-sql var=pre-restart-offset-committed
  98. SELECT
  99. (SUM(u.offset_committed))::text
  100. FROM mz_sources s
  101. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  102. WHERE s.name IN ('upsert')
  103. > ALTER CLUSTER stats_cluster SET (REPLICATION FACTOR 0)
  104. $ kafka-ingest format=avro topic=upsert key-format=avro key-schema=${keyschema} schema=${schema}
  105. {"key": "mammalmore"} {"f1":"moose", "f2": 100}
  106. > ALTER CLUSTER stats_cluster SET (REPLICATION FACTOR 1)
  107. > SELECT
  108. s.name,
  109. SUM(u.offset_committed) > ${pre-restart-offset-committed},
  110. SUM(u.snapshot_records_known),
  111. SUM(u.snapshot_records_staged)
  112. FROM mz_sources s
  113. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  114. WHERE s.name IN ('upsert')
  115. GROUP BY s.name
  116. ORDER BY s.name
  117. upsert true 9 9
  118. > DROP SOURCE upsert CASCADE