introspection-sources.td 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. $ set-sql-timeout duration=300s
  10. $ set-arg-default default-replica-size=1
  11. # Test for a subset of the information returned by introspection sources.
  12. # The test focuses on computing answers that are independent of particular
  13. # timing measurement data, even if these stable answers may take some time
  14. # to appear in the results of queries to introspection sources.
  15. # Note that we count on the retry behavior of testdrive in this test
  16. # since introspection sources may take some time to catch up.
  17. $ set-sql-timeout duration=60s
  18. # Introspection subscribes add noise to the introspection sources, so disable them.
  19. $ postgres-execute connection=postgres://mz_system:materialize@${testdrive.materialize-internal-sql-addr}
  20. ALTER SYSTEM SET enable_introspection_subscribes = false
  21. # In case the environment has other replicas
  22. > CREATE CLUSTER test SIZE '4-4'
  23. > SET cluster = test
  24. > CREATE TABLE t (a int)
  25. > CREATE MATERIALIZED VIEW mv AS SELECT * FROM t
  26. > SELECT COUNT(*)
  27. FROM
  28. mz_materialized_views AS views,
  29. mz_introspection.mz_compute_exports AS compute_exports,
  30. mz_introspection.mz_compute_import_frontiers_per_worker AS import_frontiers
  31. WHERE
  32. views.name = 'mv' AND
  33. views.id = compute_exports.export_id AND
  34. compute_exports.export_id = import_frontiers.export_id AND
  35. time > 0
  36. 16
  37. > CREATE VIEW vv AS SELECT * FROM t
  38. > SELECT COUNT(*) FROM (
  39. SELECT import_frontiers.export_id, import_frontiers.import_id
  40. FROM mz_introspection.mz_compute_import_frontiers AS import_frontiers
  41. WHERE export_id LIKE 'u%'
  42. )
  43. 1
  44. > CREATE DEFAULT INDEX ON vv
  45. > SELECT COUNT(*) FROM (
  46. SELECT import_frontiers.export_id, import_frontiers.import_id
  47. FROM mz_introspection.mz_compute_import_frontiers AS import_frontiers
  48. WHERE export_id LIKE 'u%'
  49. )
  50. 2
  51. > SELECT COUNT(*)
  52. FROM
  53. mz_views AS views,
  54. mz_indexes AS indexes,
  55. mz_introspection.mz_compute_exports compute_exports,
  56. mz_introspection.mz_compute_import_frontiers_per_worker AS import_frontiers
  57. WHERE
  58. views.name = 'vv' AND
  59. views.id = indexes.on_id AND
  60. indexes.id = compute_exports.export_id AND
  61. compute_exports.export_id = import_frontiers.export_id AND
  62. time > 0
  63. 16
  64. > CREATE MATERIALIZED VIEW mvv AS SELECT * FROM vv
  65. > SELECT COUNT(*) FROM (
  66. SELECT import_frontiers.export_id, import_frontiers.import_id
  67. FROM mz_introspection.mz_compute_import_frontiers AS import_frontiers
  68. WHERE export_id LIKE 'u%'
  69. )
  70. 3
  71. > DROP MATERIALIZED VIEW mvv
  72. > SELECT COUNT(*) FROM (
  73. SELECT import_frontiers.export_id, import_frontiers.import_id
  74. FROM mz_introspection.mz_compute_import_frontiers AS import_frontiers
  75. WHERE export_id LIKE 'u%'
  76. )
  77. 2
  78. > DROP INDEX vv_primary_idx
  79. > SELECT COUNT(*) FROM (
  80. SELECT import_frontiers.export_id, import_frontiers.import_id
  81. FROM mz_introspection.mz_compute_import_frontiers AS import_frontiers
  82. WHERE export_id LIKE 'u%'
  83. )
  84. 1
  85. > DROP MATERIALIZED VIEW mv
  86. > SELECT COUNT(*) FROM (
  87. SELECT import_frontiers.export_id, import_frontiers.import_id
  88. FROM mz_introspection.mz_compute_import_frontiers AS import_frontiers
  89. WHERE export_id LIKE 'u%'
  90. )
  91. 0
  92. # Test that frontiers of introspection sources advance at all.
  93. ! SELECT * FROM mz_introspection.mz_active_peeks AS OF 0
  94. contains: Timestamp (0) is not valid for all inputs
  95. # Test that logged subscribe frontiers advance beyond 0.
  96. $ set-regex match=\d{13} replacement=<TIMESTAMP>
  97. > BEGIN
  98. > DECLARE c CURSOR FOR SUBSCRIBE (
  99. SELECT true
  100. FROM mz_introspection.mz_compute_frontiers f, mz_internal.mz_subscriptions s
  101. WHERE f.export_id = s.id AND time > 0)
  102. > FETCH 1 c WITH (timeout='20s')
  103. <TIMESTAMP> 1 true
  104. > COMMIT
  105. # Test that mz_compute_exports contains correct dataflow IDs.
  106. > CREATE MATERIALIZED VIEW my_unique_mv_name AS SELECT * FROM t
  107. > SELECT count(*)
  108. FROM
  109. mz_materialized_views mv,
  110. mz_introspection.mz_compute_exports exp,
  111. mz_introspection.mz_dataflows df
  112. WHERE
  113. mv.name = 'my_unique_mv_name' AND
  114. mv.id = exp.export_id AND
  115. exp.dataflow_id = df.id AND
  116. df.name LIKE '%my_unique_mv_name%'
  117. 1
  118. # Test that each operator has at most one parent
  119. > SELECT max(count) FROM (
  120. SELECT count(*)
  121. FROM mz_introspection.mz_dataflow_operator_parents
  122. GROUP BY id) counts
  123. 1
  124. # Test that certain `*_per_worker` relations include the same data for each
  125. # worker. The corresponding global relations rely on this to justify showing
  126. # only the data from worker 0.
  127. > SELECT DISTINCT count(*)
  128. FROM mz_introspection.mz_dataflows_per_worker
  129. GROUP BY id, name
  130. 16
  131. > SELECT DISTINCT count(*)
  132. FROM mz_introspection.mz_dataflow_addresses_per_worker
  133. GROUP BY id, address
  134. 16
  135. > SELECT DISTINCT count(*)
  136. FROM mz_introspection.mz_dataflow_channels_per_worker
  137. GROUP BY id, from_index, from_port, to_index, to_port
  138. 16
  139. > SELECT DISTINCT count(*)
  140. FROM mz_introspection.mz_dataflow_operators_per_worker
  141. GROUP BY id, name
  142. 16
  143. > SELECT DISTINCT count(*)
  144. FROM mz_introspection.mz_dataflow_operator_dataflows_per_worker
  145. GROUP BY id, name, dataflow_id, dataflow_name
  146. 16
  147. > SELECT DISTINCT count(*)
  148. FROM mz_introspection.mz_dataflow_channel_operators_per_worker
  149. GROUP BY id, from_operator_id, to_operator_id
  150. 16
  151. > SELECT DISTINCT count(*)
  152. FROM mz_introspection.mz_compute_exports_per_worker
  153. GROUP BY export_id, dataflow_id
  154. 16
  155. > CREATE DEFAULT INDEX ON t
  156. > SELECT DISTINCT count(*)
  157. FROM mz_introspection.mz_arrangement_sharing_per_worker
  158. GROUP BY operator_id, count
  159. 16
  160. > DROP INDEX t_primary_idx
  161. # Test that the roll-up of arrangement sizes is correct
  162. > INSERT INTO t SELECT * FROM generate_series(1, 100)
  163. > CREATE TABLE t2 (b int)
  164. > INSERT INTO t2 SELECT * FROM generate_series(1, 200)
  165. > CREATE VIEW vv_arr AS SELECT sum(a) FROM t JOIN t2 ON t.a = t2.b
  166. > CREATE MATERIALIZED VIEW mv_arr AS SELECT * FROM vv_arr
  167. > CREATE DEFAULT INDEX ii_arr ON vv_arr
  168. # It's hard to come up with precise bounds because we might de-duplicate some data in arrangements.
  169. > SELECT records >= 300, size > 0 FROM mz_introspection.mz_dataflow_arrangement_sizes WHERE name LIKE '%ii_arr' OR name LIKE '%mv_arr'
  170. true true
  171. true true
  172. # Test that non-arranging dataflows show up in `mz_dataflow_arrangement_sizes`
  173. > CREATE TABLE t3 (c int)
  174. > CREATE DEFAULT INDEX ii_empty ON t3
  175. > SELECT records, size < 16 * 1024, allocations < 512 FROM mz_introspection.mz_dataflow_arrangement_sizes WHERE name LIKE '%ii_empty'
  176. <null> true true
  177. # Tests that arrangement sizes are approximate
  178. > CREATE TABLE t4 (c int8)
  179. > CREATE INDEX ii_t4 ON t4(c)
  180. # We have 16 workers, and only want to ensure that the sizes are not egregious.
  181. > SELECT records, size < 16 * 1024, allocations < 512 FROM mz_introspection.mz_dataflow_arrangement_sizes WHERE name LIKE '%ii_t4'
  182. <null> true true
  183. > INSERT INTO t4 SELECT 1
  184. > SELECT records, size < 16 * 1024, allocations > 0 FROM mz_introspection.mz_dataflow_arrangement_sizes WHERE name LIKE '%ii_t4'
  185. 1 true true
  186. > INSERT INTO t4 SELECT generate_series(1, 1000)
  187. # Determining exact sizes is difficult because of deduplication in arrangements, so we just use safe values.
  188. > SELECT records >= 1000 AND records <= 1001, batches > 0, size > 0 AND size < 4*30000, capacity > 0, allocations > 0 FROM mz_introspection.mz_dataflow_arrangement_sizes WHERE name LIKE '%ii_t4'
  189. true true true true true
  190. > DROP INDEX ii_t4
  191. > SELECT records, batches, size, capacity, allocations FROM mz_introspection.mz_dataflow_arrangement_sizes WHERE name LIKE '%ii_t4'
  192. # Test arrangement size logging for error dataflows
  193. > CREATE TABLE t5(a int)
  194. > INSERT INTO t5 SELECT -a FROM generate_series(1, 10000) g(a)
  195. > CREATE VIEW vt5 AS SELECT a::uint2 FROM t5
  196. > CREATE INDEX vt5_idx ON vt5(a)
  197. # Cast to numeric to get its rounding behavior
  198. > SELECT records, (size::numeric/1024/1024)::int FROM mz_introspection.mz_dataflow_arrangement_sizes WHERE name LIKE '%vt5_idx'
  199. 10000 1
  200. > DROP TABLE t5 CASCADE
  201. # Test that `mz_dataflow_arrangement_sizes` shows dataflows not contained in the catalog.
  202. > CREATE TABLE t6 (a int)
  203. > INSERT INTO t6 SELECT generate_series(1, 1000)
  204. > CREATE INDEX ii_t6 ON t6 (a)
  205. > CREATE VIEW t6_plus_1 AS SELECT a + 1 AS b FROM t6
  206. > CREATE INDEX ii_t6_plus_1 ON t6_plus_1 (b)
  207. > DROP INDEX ii_t6
  208. > SELECT name, records > 0 FROM mz_introspection.mz_dataflow_arrangement_sizes WHERE name LIKE '%ii_t6%'
  209. "Dataflow: materialize.public.ii_t6" true
  210. "Dataflow: materialize.public.ii_t6_plus_1" true
  211. # Test that `mz_dataflow_arrangement_sizes` produces sensible results with accumulable reductions.
  212. > CREATE TABLE ten (f1 integer);
  213. > INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
  214. > CREATE MATERIALIZED VIEW c1 AS
  215. SELECT COUNT(DISTINCT 100 * a1.f1 + 10 * a2.f1 + a3.f1)
  216. FROM ten AS a1, ten AS a2, ten AS a3;
  217. > CREATE MATERIALIZED VIEW c2 AS
  218. SELECT 100 * a1.f1 + 10 * a2.f1 + a3.f1, COUNT(*)
  219. FROM ten AS a1, ten AS a2, ten AS a3
  220. GROUP BY 1
  221. HAVING COUNT(*) > 1;
  222. > SELECT
  223. records > 2 * 1000,
  224. records < 2 * 2 * 1000,
  225. size > 0,
  226. size < 4 * 130 * 1000,
  227. allocations < 2 * 2 * 1000
  228. FROM mz_introspection.mz_dataflow_arrangement_sizes
  229. WHERE name LIKE '%c1%';
  230. true true true true true
  231. > SELECT
  232. records > 1000,
  233. records < 2 * 1000,
  234. size > 0,
  235. size < 4 * 100 * 1024,
  236. allocations < 2 * 1000
  237. FROM mz_introspection.mz_dataflow_arrangement_sizes
  238. WHERE name LIKE '%c2%';
  239. true true true true true
  240. # For coverage, we also include a recursive materialized view to account for dynamic timestamps.
  241. > CREATE MATERIALIZED VIEW rec AS
  242. WITH MUTUALLY RECURSIVE (ERROR AT RECURSION LIMIT 3) term (key int, iter int) AS (
  243. SELECT 100 * a1.f1 + 10 * a2.f1 + a3.f1 AS key, 0 AS iter
  244. FROM ten AS a1, ten AS a2, ten AS a3
  245. UNION
  246. SELECT key, iter
  247. FROM term
  248. UNION
  249. SELECT key, MAX(iter) + 1 AS iter
  250. FROM term
  251. GROUP BY key
  252. HAVING MAX(iter) < 1
  253. OPTIONS (AGGREGATE INPUT GROUP SIZE = 1)
  254. )
  255. SELECT * FROM term;
  256. > SELECT
  257. records > 0,
  258. records < 2 * 12 * 1000,
  259. size > 0,
  260. size < 4 * 1000 * 1000,
  261. allocations < 2 * 12 * 1000
  262. FROM mz_introspection.mz_dataflow_arrangement_sizes
  263. WHERE name LIKE '%rec%';
  264. true true true true true
  265. > DROP TABLE ten CASCADE;
  266. # Test mz_dataflow_arrangement_sizes with hierarchical reductions and top-k over
  267. # monotonic inputs. The latter is when there is a possibility for memory misestimation
  268. # since we include monoids in the diff field.
  269. > CREATE CLUSTER counter_cluster SIZE = '1';
  270. > CREATE SOURCE counter IN CLUSTER counter_cluster FROM LOAD GENERATOR COUNTER (TICK INTERVAL '2ms');
  271. > CREATE VIEW input AS
  272. SELECT counter % 1000 + 1 AS f1, counter % 10 + 1 AS f2
  273. FROM counter;
  274. > CREATE DEFAULT INDEX ON input;
  275. > CREATE VIEW m_minmax AS
  276. SELECT f1, MIN(f2), MAX(f2)
  277. FROM input
  278. GROUP BY f1;
  279. > CREATE DEFAULT INDEX ON m_minmax;
  280. > CREATE VIEW m_top1 AS
  281. SELECT DISTINCT ON (f1) f1, f2
  282. FROM input
  283. ORDER BY f1, f2 DESC;
  284. > CREATE DEFAULT INDEX ON m_top1;
  285. > SELECT
  286. records >= 2 * 1000,
  287. records < 1.1 * 2 * 1000,
  288. size > 0,
  289. size < 4 * 200 * 1000,
  290. allocations < 2 * 2 * 1000
  291. FROM mz_introspection.mz_dataflow_arrangement_sizes
  292. WHERE name LIKE '%m_minmax%';
  293. true true true true true
  294. > SELECT
  295. records >= 2 * 1000,
  296. records < 2 * 2 * 1000,
  297. size > 0,
  298. size < 4 * 172 * 1000,
  299. allocations < 2 * 2 * 1000
  300. FROM mz_introspection.mz_dataflow_arrangement_sizes
  301. WHERE name LIKE '%m_top1%';
  302. true true true true true
  303. > DROP SOURCE counter CASCADE;
  304. # Test dataflow error introspection.
  305. > CREATE TABLE zeros (a int)
  306. > CREATE VIEW v_div_by_zero AS SELECT 1 / a AS x FROM zeros
  307. > CREATE INDEX idx1_div_by_zero ON v_div_by_zero (x)
  308. > CREATE MATERIALIZED VIEW mv_div_by_zero AS SELECT 1 / a AS x FROM zeros
  309. > CREATE INDEX idx2_div_by_zero ON mv_div_by_zero (x)
  310. > SELECT name, count
  311. FROM mz_introspection.mz_compute_error_counts c
  312. JOIN mz_objects o ON (c.export_id = o.id)
  313. ORDER BY name
  314. > INSERT INTO zeros VALUES (0)
  315. > SELECT name, count
  316. FROM mz_introspection.mz_compute_error_counts c
  317. JOIN mz_objects o ON (c.export_id = o.id)
  318. ORDER BY name
  319. idx1_div_by_zero 1
  320. idx2_div_by_zero 1
  321. mv_div_by_zero 1
  322. > INSERT INTO zeros VALUES (0), (0)
  323. > SELECT name, count
  324. FROM mz_introspection.mz_compute_error_counts c
  325. JOIN mz_objects o ON (c.export_id = o.id)
  326. ORDER BY name
  327. idx1_div_by_zero 3
  328. idx2_div_by_zero 3
  329. mv_div_by_zero 3
  330. > DELETE FROM zeros
  331. > SELECT name, count
  332. FROM mz_introspection.mz_compute_error_counts c
  333. JOIN mz_objects o ON (c.export_id = o.id)
  334. ORDER BY name
  335. # Test that error logging is retracted when objects are dropped.
  336. > INSERT INTO zeros VALUES (0), (0)
  337. > SELECT name, count
  338. FROM mz_introspection.mz_compute_error_counts c
  339. JOIN mz_objects o ON (c.export_id = o.id)
  340. ORDER BY name
  341. idx1_div_by_zero 2
  342. idx2_div_by_zero 2
  343. mv_div_by_zero 2
  344. > DROP INDEX idx1_div_by_zero
  345. > SELECT name, count
  346. FROM mz_introspection.mz_compute_error_counts c
  347. JOIN mz_objects o ON (c.export_id = o.id)
  348. ORDER BY name
  349. idx2_div_by_zero 2
  350. mv_div_by_zero 2
  351. > DROP MATERIALIZED VIEW mv_div_by_zero
  352. > SELECT name, count
  353. FROM mz_introspection.mz_compute_error_counts c
  354. JOIN mz_objects o ON (c.export_id = o.id)
  355. ORDER BY name
  356. # Test logging of errors in indexes that advance to the empty frontier.
  357. #
  358. # Note that the same is not expected to work for MVs that advance to the empty
  359. # frontier. There is no reason to keep completed MV dataflows around, so we
  360. # drop them immediately, removing all their associated logging in the process.
  361. > CREATE MATERIALIZED VIEW mv_zero AS SELECT 0 AS x
  362. > CREATE VIEW div_by_zero AS SELECT 1 / x FROM mv_zero
  363. > CREATE INDEX idx_div_by_zero ON div_by_zero ()
  364. > SELECT name, count
  365. FROM mz_introspection.mz_compute_error_counts c
  366. JOIN mz_objects o ON (c.export_id = o.id)
  367. ORDER BY name
  368. idx_div_by_zero 1
  369. > DROP MATERIALIZED VIEW mv_zero CASCADE
  370. # Test logging of errors in reused indexes.
  371. > CREATE INDEX idx1_div_by_zero ON v_div_by_zero (x)
  372. > CREATE INDEX idx2_div_by_zero ON v_div_by_zero (x)
  373. > CREATE INDEX idx3_div_by_zero ON v_div_by_zero (x)
  374. > SELECT name, count
  375. FROM mz_introspection.mz_compute_error_counts c
  376. JOIN mz_objects o ON (c.export_id = o.id)
  377. ORDER BY name
  378. idx1_div_by_zero 2
  379. idx2_div_by_zero 2
  380. idx3_div_by_zero 2
  381. > INSERT INTO zeros VALUES (0)
  382. > SELECT name, count
  383. FROM mz_introspection.mz_compute_error_counts c
  384. JOIN mz_objects o ON (c.export_id = o.id)
  385. ORDER BY name
  386. idx1_div_by_zero 3
  387. idx2_div_by_zero 3
  388. idx3_div_by_zero 3
  389. > DROP TABLE zeros CASCADE
  390. > SELECT name, count
  391. FROM mz_introspection.mz_compute_error_counts c
  392. JOIN mz_objects o ON (c.export_id = o.id)
  393. ORDER BY name
  394. # Cleanup.
  395. > DROP CLUSTER test CASCADE