statistics.td 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. $ set-sql-timeout duration=60s
  10. #
  11. # Test progress statistics
  12. #
  13. $ postgres-execute connection=postgres://mz_system:materialize@${testdrive.materialize-internal-sql-addr}
  14. ALTER SYSTEM SET storage_statistics_collection_interval = 1000
  15. ALTER SYSTEM SET storage_statistics_interval = 2000
  16. > CREATE SECRET pgpass AS 'postgres'
  17. > CREATE CONNECTION pg TO POSTGRES (
  18. HOST postgres,
  19. DATABASE postgres,
  20. USER postgres,
  21. PASSWORD SECRET pgpass
  22. )
  23. $ postgres-execute connection=postgres://postgres:postgres@postgres
  24. ALTER USER postgres WITH replication;
  25. DROP SCHEMA IF EXISTS public CASCADE;
  26. DROP PUBLICATION IF EXISTS mz_source;
  27. CREATE SCHEMA public;
  28. CREATE TABLE t1 (f1 TEXT);
  29. ALTER TABLE t1 REPLICA IDENTITY FULL;
  30. INSERT INTO t1 VALUES ('one');
  31. INSERT INTO t1 VALUES ('two');
  32. CREATE PUBLICATION mz_source FOR ALL TABLES;
  33. > CREATE CLUSTER stats_cluster SIZE '${arg.default-replica-size}'
  34. > CREATE SOURCE mz_source
  35. IN CLUSTER stats_cluster
  36. FROM POSTGRES CONNECTION pg (PUBLICATION 'mz_source')
  37. FOR TABLES ("t1")
  38. > SELECT COUNT(*) > 0 FROM t1;
  39. true
  40. # NOTE: we make sure that we have stats for a replica, otherwise the
  41. # set-from-sql below might fail because it doesn't do retries when a row is
  42. # missing.
  43. $ set-regex match=u\d+ replacement="<REPLICAID>"
  44. > SELECT cr.id
  45. FROM
  46. mz_clusters c,
  47. mz_cluster_replicas cr,
  48. mz_internal.mz_source_statistics_raw u,
  49. mz_sources s
  50. WHERE
  51. c.name = 'stats_cluster' AND c.id = cr.cluster_id AND cr.id = u.replica_id
  52. AND s.name IN ('mz_source') AND u.id = s.id
  53. ORDER BY cr.id
  54. LIMIT 1
  55. <REPLICAID>
  56. # Find the replica that is running the source, so that the stats query can be
  57. # very specific and not be confused by querying stats from multiple or older
  58. # replicas.
  59. $ set-from-sql var=replica_id
  60. SELECT cr.id
  61. FROM
  62. mz_clusters c,
  63. mz_cluster_replicas cr,
  64. mz_internal.mz_source_statistics_raw u,
  65. mz_sources s
  66. WHERE
  67. c.name = 'stats_cluster' AND c.id = cr.cluster_id AND cr.id = u.replica_id
  68. AND s.name IN ('mz_source') AND u.id = s.id
  69. ORDER BY cr.id
  70. LIMIT 1
  71. > SELECT
  72. s.name,
  73. u.offset_committed > 0,
  74. u.offset_known >= u.offset_committed,
  75. u.snapshot_records_known,
  76. u.snapshot_records_staged
  77. FROM mz_sources s
  78. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  79. WHERE s.name IN ('mz_source') AND u.replica_id = '${replica_id}'
  80. mz_source true true 2 2
  81. $ set-from-sql var=previous-offset-committed
  82. SELECT
  83. (u.offset_committed)::text
  84. FROM mz_sources s
  85. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  86. WHERE s.name IN ('mz_source') AND u.replica_id = '${replica_id}'
  87. $ postgres-execute connection=postgres://postgres:postgres@postgres
  88. INSERT INTO t1 VALUES ('three');
  89. > SELECT
  90. s.name,
  91. u.offset_committed > ${previous-offset-committed},
  92. u.offset_known >= u.offset_committed,
  93. u.snapshot_records_known,
  94. u.snapshot_records_staged
  95. FROM mz_sources s
  96. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  97. WHERE s.name IN ('mz_source') AND u.replica_id = '${replica_id}'
  98. mz_source true true 2 2
  99. $ set-from-sql var=pre-restart-offset-committed
  100. SELECT
  101. (u.offset_committed)::text
  102. FROM mz_sources s
  103. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  104. WHERE s.name IN ('mz_source') AND u.replica_id = '${replica_id}'
  105. > ALTER CLUSTER stats_cluster SET (REPLICATION FACTOR 0)
  106. $ postgres-execute connection=postgres://postgres:postgres@postgres
  107. INSERT INTO t1 VALUES ('four');
  108. > ALTER CLUSTER stats_cluster SET (REPLICATION FACTOR 1)
  109. # Ensure the snapshot stats stay there for the old replica, and don't change.
  110. > SELECT
  111. s.name,
  112. u.offset_committed >= ${pre-restart-offset-committed},
  113. u.offset_known >= u.offset_committed,
  114. u.snapshot_records_known,
  115. u.snapshot_records_staged
  116. FROM mz_sources s
  117. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  118. WHERE s.name IN ('mz_source') AND u.replica_id = '${replica_id}'
  119. mz_source true true 2 2
  120. $ set-regex match=u\d+ replacement="<REPLICAID>"
  121. > SELECT cr.id
  122. FROM
  123. mz_clusters c,
  124. mz_cluster_replicas cr,
  125. mz_internal.mz_source_statistics_raw u,
  126. mz_sources s
  127. WHERE
  128. c.name = 'stats_cluster' AND c.id = cr.cluster_id AND cr.id = u.replica_id
  129. AND s.name IN ('mz_source') AND u.id = s.id
  130. ORDER BY cr.id
  131. LIMIT 1
  132. <REPLICAID>
  133. $ set-from-sql var=replica_id
  134. SELECT cr.id
  135. FROM
  136. mz_clusters c,
  137. mz_cluster_replicas cr,
  138. mz_internal.mz_source_statistics_raw u,
  139. mz_sources s
  140. WHERE
  141. c.name = 'stats_cluster' AND c.id = cr.cluster_id AND cr.id = u.replica_id
  142. AND s.name IN ('mz_source') AND u.id = s.id
  143. ORDER BY cr.id
  144. LIMIT 1
  145. # The new replica will have different stats, because it never did a snapshot
  146. # and didn't read messages.
  147. > SELECT
  148. s.name,
  149. u.offset_committed > ${pre-restart-offset-committed},
  150. u.offset_known >= u.offset_committed,
  151. u.snapshot_records_known,
  152. u.snapshot_records_staged
  153. FROM mz_sources s
  154. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  155. WHERE s.name IN ('mz_source') AND u.replica_id = '${replica_id}'
  156. mz_source true true <null> <null>
  157. $ postgres-execute connection=postgres://postgres:postgres@postgres
  158. CREATE TABLE alt (f1 TEXT);
  159. ALTER TABLE alt REPLICA IDENTITY FULL;
  160. INSERT INTO alt VALUES ('one');
  161. > ALTER SOURCE mz_source ADD SUBSOURCE alt;
  162. > SELECT COUNT(*) > 0 FROM alt;
  163. true
  164. # When we add a table we snapshot that table, so now we will have snapshot stats.
  165. >[version>=14900] SELECT
  166. s.name,
  167. u.snapshot_records_known,
  168. u.snapshot_records_staged
  169. FROM mz_sources s
  170. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  171. WHERE s.name IN ('mz_source') AND u.replica_id = '${replica_id}'
  172. mz_source 1 1
  173. >[version<14900] SELECT
  174. s.name,
  175. u.snapshot_records_known,
  176. u.snapshot_records_staged
  177. FROM mz_sources s
  178. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  179. WHERE s.name IN ('mz_source') AND u.replica_id = '${replica_id}'
  180. mz_source 1 1
  181. # Ensure subsource stats show up, and then are removed when we drop subsources.
  182. > SELECT
  183. s.name,
  184. u.updates_committed > 0
  185. FROM mz_sources s
  186. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  187. WHERE s.name IN ('alt') AND u.replica_id = '${replica_id}'
  188. alt true
  189. > DROP SOURCE alt;
  190. > SELECT
  191. count(*)
  192. FROM mz_sources s
  193. JOIN mz_internal.mz_source_statistics_raw u ON s.id = u.id
  194. WHERE s.name IN ('alt')
  195. 0
  196. # TODO(guswynn): consider adding an envd restart test for pg statistics, not just kafka ones like in test/cluster.