materializations.td 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. $ set-arg-default default-replica-size=1
  10. $ set-arg-default single-replica-cluster=quickstart
  11. # Test creating and dropping various views and sources that depend upon
  12. # on another, and indices on those views and sources.
  13. $ set schema={
  14. "type": "record",
  15. "name": "row",
  16. "fields": [
  17. {"name": "a", "type": "long"},
  18. {"name": "b", "type": "long"}
  19. ]
  20. }
  21. $ kafka-create-topic topic=data
  22. > CREATE CONNECTION kafka_conn
  23. TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT);
  24. > CREATE CONNECTION IF NOT EXISTS csr_conn TO CONFLUENT SCHEMA REGISTRY (
  25. URL '${testdrive.schema-registry-url}'
  26. );
  27. > CREATE SOURCE data
  28. IN CLUSTER ${arg.single-replica-cluster}
  29. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-data-${testdrive.seed}')
  30. FORMAT AVRO USING SCHEMA '${schema}'
  31. > SELECT * FROM data
  32. > CREATE VIEW data_view as SELECT * from data
  33. > SELECT * FROM data_view
  34. > CREATE MATERIALIZED VIEW test1 AS
  35. SELECT b, sum(a) FROM data GROUP BY b
  36. > SHOW VIEWS
  37. name comment
  38. ------------------
  39. data_view ""
  40. > SHOW MATERIALIZED VIEWS
  41. name cluster comment
  42. ---------------------------
  43. test1 quickstart ""
  44. > SELECT * FROM test1
  45. b sum
  46. ------
  47. $ kafka-ingest format=avro topic=data schema=${schema} timestamp=1
  48. {"a": 1, "b": 1}
  49. {"a": 2, "b": 1}
  50. {"a": 3, "b": 1}
  51. {"a": 1, "b": 2}
  52. > SELECT * FROM test1
  53. b sum
  54. ------
  55. 1 6
  56. 2 1
  57. > SHOW COLUMNS FROM test1
  58. name nullable type comment
  59. ------------------------------
  60. b false bigint ""
  61. sum false numeric ""
  62. > SHOW VIEWS LIKE '%data%'
  63. data_view ""
  64. # Materialized view can be built on a not-materialized view.
  65. > CREATE MATERIALIZED VIEW test2 AS
  66. SELECT b, 1 + sum(a + 1) FROM data_view GROUP BY b
  67. > SELECT * FROM test2
  68. b ?column?
  69. -----------
  70. 1 10
  71. 2 3
  72. # Materialize data_view.
  73. > CREATE DEFAULT INDEX ON data_view
  74. > SELECT * FROM data_view
  75. a b
  76. ----
  77. 1 1
  78. 2 1
  79. 3 1
  80. 1 2
  81. > CREATE VIEW test3 AS
  82. SELECT b, min(a) FROM data_view GROUP BY b
  83. > SELECT * FROM test3
  84. b min
  85. ------
  86. 1 1
  87. 2 1
  88. > CREATE MATERIALIZED VIEW test4 AS
  89. SELECT b, max(a) FROM data_view GROUP BY b
  90. > SELECT * FROM test4
  91. b max
  92. ------
  93. 1 3
  94. 2 1
  95. # Unmaterialize data view.
  96. > DROP INDEX data_view_primary_idx
  97. # Can continue to select from view that depends on the unmaterialized view.
  98. > SELECT * FROM test4
  99. b max
  100. ------
  101. 1 3
  102. 2 1
  103. > SELECT * FROM test4 where b = 2
  104. b max
  105. ------
  106. 2 1
  107. > SELECT * from data_view
  108. a b
  109. ----
  110. 1 1
  111. 2 1
  112. 3 1
  113. 1 2
  114. # Cannot create sink from unmaterialized view.
  115. ! CREATE SINK not_mat_sink2
  116. IN CLUSTER ${arg.single-replica-cluster}
  117. FROM data_view
  118. INTO KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-data-view2-sink-${testdrive.seed}')
  119. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  120. ENVELOPE DEBEZIUM
  121. contains:data_view is a view, which cannot be exported as a sink
  122. # Can create indexed view from unmaterialized view.
  123. > CREATE VIEW test5 AS
  124. SELECT b, max(a) AS c FROM data_view GROUP BY b
  125. > CREATE DEFAULT INDEX ON test5
  126. # or from an indexed unmaterialized view
  127. ! CREATE SINK not_mat_sink2
  128. IN CLUSTER ${arg.single-replica-cluster}
  129. FROM test5
  130. INTO KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-data-view2-sink-${testdrive.seed}')
  131. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  132. ENVELOPE DEBEZIUM
  133. contains:test5 is a view, which cannot be exported as a sink
  134. $ set-regex match=(\s\(u\d+\)) replacement=
  135. ?[version>=13500] EXPLAIN OPTIMIZED PLAN AS VERBOSE TEXT FOR SELECT * FROM test5;
  136. Explained Query (fast path):
  137. ReadIndex on=materialize.public.test5 test5_primary_idx=[*** full scan ***]
  138. Used Indexes:
  139. - materialize.public.test5_primary_idx (*** full scan ***)
  140. Target cluster: quickstart
  141. ?[version<13500] EXPLAIN OPTIMIZED PLAN FOR SELECT * FROM test5;
  142. Explained Query (fast path):
  143. ReadIndex on=materialize.public.test5 test5_primary_idx=[*** full scan ***]
  144. Used Indexes:
  145. - materialize.public.test5_primary_idx (*** full scan ***)
  146. Target cluster: quickstart
  147. > SELECT * FROM test5
  148. b c
  149. ------
  150. 1 3
  151. 2 1
  152. > SELECT c+b from test5
  153. 4
  154. 3
  155. > CREATE INDEX idx1 ON test5(c)
  156. ! SELECT * FROM idx1
  157. contains:catalog item 'materialize.public.idx1' is an index and so cannot be depended upon
  158. # If there exists a second primary index, dropping one primary index will not
  159. # unmaterialize the view.
  160. > DROP INDEX test5_primary_idx
  161. ?[version>=13500] EXPLAIN OPTIMIZED PLAN AS VERBOSE TEXT FOR SELECT * FROM test5
  162. Explained Query (fast path):
  163. Project (#1, #0)
  164. ReadIndex on=materialize.public.test5 idx1=[*** full scan ***]
  165. Used Indexes:
  166. - materialize.public.idx1 (*** full scan ***)
  167. Target cluster: quickstart
  168. ?[version<13500] EXPLAIN OPTIMIZED PLAN FOR SELECT * FROM test5
  169. Explained Query (fast path):
  170. Project (#1, #0)
  171. ReadIndex on=materialize.public.test5 idx1=[*** full scan ***]
  172. Used Indexes:
  173. - materialize.public.idx1 (*** full scan ***)
  174. Target cluster: quickstart
  175. > SELECT * from test5
  176. b c
  177. ------
  178. 1 3
  179. 2 1
  180. > SELECT c-b from test5
  181. 2
  182. -1
  183. # Unmaterialize test5.
  184. > DROP INDEX idx1
  185. # Still works.
  186. > SELECT * from test5
  187. b c
  188. ------
  189. 1 3
  190. 2 1
  191. # Test that materialized views can be even if it requires multiple layers of
  192. # recursing through the AST to find a source.
  193. > CREATE MATERIALIZED VIEW test6 AS SELECT (-c + 2*b) AS d FROM test5
  194. > SELECT * from test6
  195. d
  196. ----
  197. -1
  198. 3
  199. # Dependencies are still queryable after creating a dependent
  200. # materialized view.
  201. > SELECT * from test5
  202. b c
  203. ------
  204. 1 3
  205. 2 1
  206. > SELECT * from data_view
  207. a b
  208. ----
  209. 1 1
  210. 2 1
  211. 3 1
  212. 1 2
  213. # Rematerialize data_view creating an index on it.
  214. > CREATE INDEX data_view_idx on data_view(a)
  215. > SELECT * from data_view
  216. a b
  217. ---
  218. 1 1
  219. 2 1
  220. 3 1
  221. 1 2
  222. # Existing materialized dependencies can be selected from as normal.
  223. > SELECT * from test6
  224. d
  225. ----
  226. -1
  227. 3
  228. # Dependencies can be selected from again if they do not depend on any other raw
  229. # source.
  230. > SELECT * from test5
  231. b c
  232. ------
  233. 1 3
  234. 2 1
  235. # Create a suboptimal second index on the same column in data_view.
  236. > CREATE INDEX data_view_idx2 on data_view(a)
  237. > SELECT * from data_view
  238. a b
  239. ---
  240. 1 1
  241. 2 1
  242. 3 1
  243. 1 2
  244. > SELECT * from test6
  245. d
  246. ----
  247. -1
  248. 3
  249. > SELECT * from test5
  250. b c
  251. ------
  252. 1 3
  253. 2 1
  254. # Delete the first copy of the same index and ensure everything selects as
  255. # normal.
  256. > DROP INDEX data_view_idx
  257. > SELECT * from data_view
  258. a b
  259. ---
  260. 1 1
  261. 2 1
  262. 3 1
  263. 1 2
  264. > SELECT * from test6
  265. d
  266. ----
  267. -1
  268. 3
  269. > SELECT * from test5
  270. b c
  271. ------
  272. 1 3
  273. 2 1
  274. # Materialized sources tests
  275. $ kafka-create-topic topic=mat
  276. > CREATE SOURCE mat_data
  277. IN CLUSTER ${arg.single-replica-cluster}
  278. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-mat-${testdrive.seed}')
  279. FORMAT AVRO USING SCHEMA '${schema}'
  280. > CREATE DEFAULT INDEX ON mat_data
  281. > SELECT * from mat_data
  282. $ kafka-ingest format=avro topic=mat schema=${schema} timestamp=1
  283. {"a": -1, "b": 0}
  284. {"a": -1, "b": 1}
  285. {"a": 3, "b": 4}
  286. {"a": 1, "b": 2}
  287. > SELECT * from mat_data
  288. a b
  289. ----
  290. -1 0
  291. -1 1
  292. 3 4
  293. 1 2
  294. > SHOW SOURCES
  295. name type cluster comment
  296. -------------------------------------------------------------------
  297. data kafka ${arg.single-replica-cluster} ""
  298. data_progress progress <null> ""
  299. mat_data kafka ${arg.single-replica-cluster} ""
  300. mat_data_progress progress <null> ""
  301. # If there exists another index, dropping the primary index will not #
  302. # unmaterialize the source. This also tests creating a default index when the
  303. # default index name is already taken.
  304. > CREATE DEFAULT INDEX ON mat_data
  305. > DROP INDEX mat_data_primary_idx
  306. > SELECT a+b from mat_data
  307. -1
  308. 0
  309. 7
  310. 3
  311. # Can create both materialized and unmaterialized views from materialized
  312. # source.
  313. > CREATE MATERIALIZED VIEW test7 as SELECT count(*) from mat_data
  314. > SELECT * from test7
  315. count
  316. -----
  317. 4
  318. > CREATE VIEW test8 as SELECT -b as c, -a as d from mat_data
  319. > SELECT * from test8
  320. c d
  321. -----
  322. 0 1
  323. -1 1
  324. -4 -3
  325. -2 -1
  326. # Unmaterialize source.
  327. > DROP INDEX mat_data_primary_idx1
  328. # Still works.
  329. > SELECT * from mat_data
  330. a b
  331. ----
  332. -1 0
  333. -1 1
  334. 3 4
  335. 1 2
  336. > SELECT * from test7
  337. count
  338. -----
  339. 4
  340. > SELECT * from test8
  341. c d
  342. -----
  343. 0 1
  344. -1 1
  345. -4 -3
  346. -2 -1
  347. $ kafka-ingest format=avro topic=mat schema=${schema} timestamp=2
  348. {"a": -3, "b": 0}
  349. {"a": -1, "b": 0}
  350. {"a": 0, "b": 4}
  351. {"a": 1, "b": 2}
  352. # Rematerialize source.
  353. > CREATE INDEX mat_data_idx3 on mat_data(b)
  354. > SELECT * from mat_data
  355. a b
  356. ----
  357. -1 0
  358. -1 1
  359. 3 4
  360. 1 2
  361. -3 0
  362. -1 0
  363. 0 4
  364. 1 2
  365. > SELECT * from test7
  366. count
  367. -----
  368. 8
  369. > SELECT * from test8
  370. c d
  371. ------
  372. 0 1
  373. -1 1
  374. -4 -3
  375. -2 -1
  376. 0 3
  377. 0 1
  378. -4 0
  379. -2 -1
  380. # Check arrangements, seeing new arrangements can mean a significant increase
  381. # in memory consumptions and should be understood before adapting the values.
  382. > SET cluster_replica = r1
  383. >[version>=15000] SELECT mdod.dataflow_name, mdod.name
  384. FROM mz_introspection.mz_arrangement_sharing mash
  385. JOIN mz_introspection.mz_dataflow_operator_dataflows mdod ON mash.operator_id = mdod.id
  386. JOIN mz_introspection.mz_compute_exports USING (dataflow_id)
  387. WHERE export_id LIKE 'u%'
  388. "Dataflow: materialize.public.data_view_idx" "ArrangeBy[[Column(0, \"a\")]]"
  389. "Dataflow: materialize.public.data_view_idx" "ArrangeBy[[Column(0, \"a\")]]-errors"
  390. "Dataflow: materialize.public.data_view_primary_idx" "ArrangeBy[[Column(0, \"a\"), Column(1, \"b\")]]"
  391. "Dataflow: materialize.public.data_view_primary_idx" "ArrangeBy[[Column(0, \"a\"), Column(1, \"b\")]]-errors"
  392. "Dataflow: materialize.public.mat_data_idx3" "ArrangeBy[[Column(1, \"b\")]]"
  393. "Dataflow: materialize.public.mat_data_idx3" "ArrangeBy[[Column(1, \"b\")]]-errors"
  394. "Dataflow: materialize.public.mat_data_primary_idx" "ArrangeBy[[Column(0, \"a\"), Column(1, \"b\")]]"
  395. "Dataflow: materialize.public.mat_data_primary_idx" "ArrangeBy[[Column(0, \"a\"), Column(1, \"b\")]]-errors"
  396. "Dataflow: materialize.public.test1" AccumulableErrorCheck
  397. "Dataflow: materialize.public.test1" "ArrangeAccumulable [val: empty]"
  398. "Dataflow: materialize.public.test1" ReduceAccumulable
  399. "Dataflow: materialize.public.test2" AccumulableErrorCheck
  400. "Dataflow: materialize.public.test2" "ArrangeAccumulable [val: empty]"
  401. "Dataflow: materialize.public.test2" ReduceAccumulable
  402. "Dataflow: materialize.public.test4" "ArrangeMonotonic [val: empty]"
  403. "Dataflow: materialize.public.test4" ReduceMonotonic
  404. "Dataflow: materialize.public.test6" "ArrangeMonotonic [val: empty]"
  405. "Dataflow: materialize.public.test6" ReduceMonotonic
  406. "Dataflow: materialize.public.test7" AccumulableErrorCheck
  407. "Dataflow: materialize.public.test7" "ArrangeAccumulable [val: empty]"
  408. "Dataflow: materialize.public.test7" ReduceAccumulable