top-1-monotonic.td 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. $ set-arg-default single-replica-cluster=quickstart
  10. #
  11. # Tests in support of https://github.com/MaterializeInc/materialize/pull/6471
  12. # "optimize topk when limit=1 and input is monotonic (append-only)"
  13. #
  14. $ postgres-execute connection=postgres://mz_system:materialize@${testdrive.materialize-internal-sql-addr}
  15. ALTER SYSTEM SET min_timestamp_interval = '100ms'
  16. #
  17. # Make sure that the general pattern of all queries in this file will use TopK and not some other future operator or optimization
  18. #
  19. # Remove references to internal table identifiers and "materialize.public" strings
  20. $ set-regex match=(\s\(u\d+\)|materialize\.public\.) replacement=
  21. ? EXPLAIN OPTIMIZED PLAN AS VERBOSE TEXT FOR SELECT (SELECT 'a' LIMIT 1);
  22. Explained Query (fast path):
  23. Constant
  24. - ("a")
  25. Target cluster: mz_catalog_server
  26. $ set schema={"type": "record", "name": "schema", "fields": [ {"name": "f1", "type": ["int", "null"]} , {"name": "f2", "type": ["int", "null"]}] }
  27. $ kafka-create-topic topic=top1
  28. $ kafka-ingest format=avro topic=top1 schema=${schema} timestamp=1
  29. > CREATE CONNECTION IF NOT EXISTS csr_conn TO CONFLUENT SCHEMA REGISTRY (
  30. URL '${testdrive.schema-registry-url}'
  31. );
  32. > CREATE CONNECTION kafka_conn
  33. TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT);
  34. > CREATE SOURCE t1
  35. IN CLUSTER ${arg.single-replica-cluster}
  36. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-top1-${testdrive.seed}')
  37. WITH (TIMESTAMP INTERVAL '100ms')
  38. > CREATE TABLE t1_tbl FROM SOURCE t1 (REFERENCE "testdrive-top1-${testdrive.seed}")
  39. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  40. ENVELOPE NONE
  41. #
  42. # Over constants
  43. #
  44. > SELECT (SELECT 'a' LIMIT 1);
  45. a
  46. > SELECT (SELECT 'a' ORDER BY 1 LIMIT 1);
  47. a
  48. > SELECT (SELECT 'a' GROUP BY 1 LIMIT 1);
  49. a
  50. > SELECT (SELECT 'a' ORDER BY 'a' LIMIT 1);
  51. a
  52. > SELECT (SELECT 'a' GROUP BY 'a' LIMIT 1);
  53. a
  54. #
  55. # And now some actual materialized views
  56. #
  57. > CREATE MATERIALIZED VIEW limit_only AS SELECT (SELECT f1 FROM t1_tbl LIMIT 1);
  58. > CREATE MATERIALIZED VIEW group_by_limit AS SELECT (SELECT f1 FROM t1_tbl GROUP BY f1 LIMIT 1);
  59. > CREATE MATERIALIZED VIEW order_by_limit AS SELECT (SELECT f1 FROM t1_tbl ORDER BY f1 LIMIT 1);
  60. > CREATE MATERIALIZED VIEW order_by_desc_limit AS SELECT (SELECT f1 FROM t1_tbl ORDER BY f1 DESC LIMIT 1);
  61. > CREATE MATERIALIZED VIEW group_by_in_top_1 AS SELECT (select f2 FROM t1_tbl AS inner WHERE inner.f1 = outer.f1 GROUP BY f2 LIMIT 1) FROM t1_tbl AS outer;
  62. > CREATE MATERIALIZED VIEW group_by_order_by_in_top_1 AS SELECT (select f2 FROM t1_tbl AS inner WHERE inner.f1 = outer.f1 ORDER BY f2 DESC LIMIT 1) FROM t1_tbl AS outer;
  63. #
  64. # Over an empty source
  65. #
  66. > SELECT * from limit_only;
  67. <null>
  68. > SELECT * from group_by_limit;
  69. <null>
  70. > SELECT * FROM order_by_limit;
  71. <null>
  72. > SELECT * from order_by_desc_limit;
  73. <null>
  74. > SELECT * FROM group_by_in_top_1;
  75. > SELECT * FROM group_by_order_by_in_top_1;
  76. #
  77. # Over a source with a single record
  78. #
  79. $ kafka-ingest format=avro topic=top1 schema=${schema} timestamp=1
  80. {"f1": {"int": 123}, "f2": {"int": -123} }
  81. > SELECT * from limit_only;
  82. 123
  83. > SELECT * from group_by_limit;
  84. 123
  85. > SELECT * FROM order_by_limit;
  86. 123
  87. > SELECT * from order_by_desc_limit;
  88. 123
  89. > SELECT * FROM group_by_in_top_1;
  90. -123
  91. > SELECT * FROM group_by_order_by_in_top_1;
  92. -123
  93. #
  94. # A second record arrives, causes the ORDER BY DESC view to change output
  95. #
  96. $ kafka-ingest format=avro topic=top1 schema=${schema} timestamp=2
  97. {"f1": {"int": 234}, "f2": {"int" : -234} }
  98. > SELECT * from limit_only;
  99. 123
  100. > SELECT * from group_by_limit;
  101. 123
  102. > SELECT * FROM order_by_limit;
  103. 123
  104. > SELECT * from order_by_desc_limit;
  105. 234
  106. > SELECT * FROM group_by_in_top_1;
  107. -123
  108. -234
  109. > SELECT * FROM group_by_order_by_in_top_1;
  110. -123
  111. -234
  112. #
  113. # The third record causes all other views to change outputs
  114. #
  115. $ kafka-ingest format=avro topic=top1 schema=${schema} timestamp=3
  116. {"f1": {"int": 0}, "f2": {"int": 0} }
  117. > SELECT * from limit_only;
  118. 0
  119. > SELECT * from group_by_limit;
  120. 0
  121. > SELECT * FROM order_by_limit;
  122. 0
  123. > SELECT * from order_by_desc_limit;
  124. 234
  125. > SELECT * FROM group_by_in_top_1;
  126. 0
  127. -123
  128. -234
  129. > SELECT * FROM group_by_order_by_in_top_1;
  130. 0
  131. -123
  132. -234
  133. #
  134. # Insert some more rows, mostly for the benefit of the "in_top_1" views
  135. #
  136. $ kafka-ingest format=avro topic=top1 schema=${schema} timestamp=4
  137. {"f1": {"int": 0}, "f2": {"int": 0}}
  138. {"f1": {"int": 234}, "f2": {"int": 0}}
  139. {"f1": {"int": 123}, "f2": {"int": -234} }
  140. > SELECT * from limit_only;
  141. 0
  142. > SELECT * from group_by_limit;
  143. 0
  144. > SELECT * FROM order_by_limit;
  145. 0
  146. > SELECT * from order_by_desc_limit;
  147. 234
  148. > SELECT * FROM group_by_in_top_1;
  149. 0
  150. 0
  151. -234
  152. -234
  153. -234
  154. -234
  155. > SELECT * FROM group_by_order_by_in_top_1;
  156. -123
  157. -123
  158. 0
  159. 0
  160. 0
  161. 0
  162. #
  163. # And finally, insert some NULL values
  164. #
  165. $ kafka-ingest format=avro topic=top1 schema=${schema} timestamp=5
  166. {"f1": null, "f2": null}
  167. {"f1": {"int":0}, "f2": null}
  168. {"f1": null, "f2": {"int": 0}}
  169. {"f1": null, "f2": {"int": -234}}
  170. > SELECT * from limit_only;
  171. 0
  172. > SELECT * from group_by_limit;
  173. 0
  174. > SELECT * FROM order_by_limit;
  175. 0
  176. > SELECT * from order_by_desc_limit;
  177. <null>
  178. > SELECT * FROM group_by_in_top_1;
  179. -234
  180. -234
  181. 0
  182. 0
  183. 0
  184. -234
  185. -234
  186. <null>
  187. <null>
  188. <null>
  189. > SELECT * FROM group_by_order_by_in_top_1;
  190. -123
  191. -123
  192. 0
  193. 0
  194. <null>
  195. <null>
  196. <null>
  197. <null>
  198. <null>
  199. <null>
  200. # Check arrangements, seeing new arrangements can mean a significant increase
  201. # in memory consumptions and should be understood before adapting the values.
  202. > SET cluster_replica = r1
  203. > SELECT
  204. mdod.dataflow_name,
  205. -- Simplify Column descriptions to avoid unnecessary test breakage when the
  206. -- optimizer evolves.
  207. regexp_replace(mdod.name, 'Column\((\d+), .+\)', 'Column($1)')
  208. FROM mz_introspection.mz_arrangement_sharing mash
  209. JOIN mz_introspection.mz_dataflow_operator_dataflows mdod ON mash.operator_id = mdod.id
  210. JOIN mz_introspection.mz_compute_exports USING (dataflow_id)
  211. WHERE export_id LIKE 'u%'
  212. "Dataflow: group_by_in_top_1" "Arranged TopK input"
  213. "Dataflow: group_by_in_top_1" "Arranged TopK input"
  214. "Dataflow: group_by_in_top_1" "Arranged TopK input"
  215. "Dataflow: group_by_in_top_1" "Arranged TopK input"
  216. "Dataflow: group_by_in_top_1" "Arranged TopK input"
  217. "Dataflow: group_by_in_top_1" "Arranged TopK input"
  218. "Dataflow: group_by_in_top_1" "Arranged TopK input"
  219. "Dataflow: group_by_in_top_1" "Arranged TopK input"
  220. "Dataflow: group_by_in_top_1" "Reduced TopK input"
  221. "Dataflow: group_by_in_top_1" "Reduced TopK input"
  222. "Dataflow: group_by_in_top_1" "Reduced TopK input"
  223. "Dataflow: group_by_in_top_1" "Reduced TopK input"
  224. "Dataflow: group_by_in_top_1" "Reduced TopK input"
  225. "Dataflow: group_by_in_top_1" "Reduced TopK input"
  226. "Dataflow: group_by_in_top_1" "Reduced TopK input"
  227. "Dataflow: group_by_in_top_1" "Reduced TopK input"
  228. "Dataflow: group_by_limit" "Arranged TopK input"
  229. "Dataflow: group_by_limit" "Arranged TopK input"
  230. "Dataflow: group_by_limit" "Arranged TopK input"
  231. "Dataflow: group_by_limit" "Arranged TopK input"
  232. "Dataflow: group_by_limit" "Arranged TopK input"
  233. "Dataflow: group_by_limit" "Arranged TopK input"
  234. "Dataflow: group_by_limit" "Arranged TopK input"
  235. "Dataflow: group_by_limit" "Arranged TopK input"
  236. "Dataflow: group_by_limit" "Reduced TopK input"
  237. "Dataflow: group_by_limit" "Reduced TopK input"
  238. "Dataflow: group_by_limit" "Reduced TopK input"
  239. "Dataflow: group_by_limit" "Reduced TopK input"
  240. "Dataflow: group_by_limit" "Reduced TopK input"
  241. "Dataflow: group_by_limit" "Reduced TopK input"
  242. "Dataflow: group_by_limit" "Reduced TopK input"
  243. "Dataflow: group_by_limit" "Reduced TopK input"
  244. "Dataflow: group_by_order_by_in_top_1" "Arranged TopK input"
  245. "Dataflow: group_by_order_by_in_top_1" "Arranged TopK input"
  246. "Dataflow: group_by_order_by_in_top_1" "Arranged TopK input"
  247. "Dataflow: group_by_order_by_in_top_1" "Arranged TopK input"
  248. "Dataflow: group_by_order_by_in_top_1" "Arranged TopK input"
  249. "Dataflow: group_by_order_by_in_top_1" "Arranged TopK input"
  250. "Dataflow: group_by_order_by_in_top_1" "Arranged TopK input"
  251. "Dataflow: group_by_order_by_in_top_1" "Arranged TopK input"
  252. "Dataflow: group_by_order_by_in_top_1" "Reduced TopK input"
  253. "Dataflow: group_by_order_by_in_top_1" "Reduced TopK input"
  254. "Dataflow: group_by_order_by_in_top_1" "Reduced TopK input"
  255. "Dataflow: group_by_order_by_in_top_1" "Reduced TopK input"
  256. "Dataflow: group_by_order_by_in_top_1" "Reduced TopK input"
  257. "Dataflow: group_by_order_by_in_top_1" "Reduced TopK input"
  258. "Dataflow: group_by_order_by_in_top_1" "Reduced TopK input"
  259. "Dataflow: group_by_order_by_in_top_1" "Reduced TopK input"
  260. "Dataflow: limit_only" "Arranged TopK input"
  261. "Dataflow: limit_only" "Arranged TopK input"
  262. "Dataflow: limit_only" "Arranged TopK input"
  263. "Dataflow: limit_only" "Arranged TopK input"
  264. "Dataflow: limit_only" "Arranged TopK input"
  265. "Dataflow: limit_only" "Arranged TopK input"
  266. "Dataflow: limit_only" "Arranged TopK input"
  267. "Dataflow: limit_only" "Arranged TopK input"
  268. "Dataflow: limit_only" "Reduced TopK input"
  269. "Dataflow: limit_only" "Reduced TopK input"
  270. "Dataflow: limit_only" "Reduced TopK input"
  271. "Dataflow: limit_only" "Reduced TopK input"
  272. "Dataflow: limit_only" "Reduced TopK input"
  273. "Dataflow: limit_only" "Reduced TopK input"
  274. "Dataflow: limit_only" "Reduced TopK input"
  275. "Dataflow: limit_only" "Reduced TopK input"
  276. "Dataflow: order_by_desc_limit" "Arranged TopK input"
  277. "Dataflow: order_by_desc_limit" "Arranged TopK input"
  278. "Dataflow: order_by_desc_limit" "Arranged TopK input"
  279. "Dataflow: order_by_desc_limit" "Arranged TopK input"
  280. "Dataflow: order_by_desc_limit" "Arranged TopK input"
  281. "Dataflow: order_by_desc_limit" "Arranged TopK input"
  282. "Dataflow: order_by_desc_limit" "Arranged TopK input"
  283. "Dataflow: order_by_desc_limit" "Arranged TopK input"
  284. "Dataflow: order_by_desc_limit" "Reduced TopK input"
  285. "Dataflow: order_by_desc_limit" "Reduced TopK input"
  286. "Dataflow: order_by_desc_limit" "Reduced TopK input"
  287. "Dataflow: order_by_desc_limit" "Reduced TopK input"
  288. "Dataflow: order_by_desc_limit" "Reduced TopK input"
  289. "Dataflow: order_by_desc_limit" "Reduced TopK input"
  290. "Dataflow: order_by_desc_limit" "Reduced TopK input"
  291. "Dataflow: order_by_desc_limit" "Reduced TopK input"
  292. "Dataflow: order_by_limit" "Arranged TopK input"
  293. "Dataflow: order_by_limit" "Arranged TopK input"
  294. "Dataflow: order_by_limit" "Arranged TopK input"
  295. "Dataflow: order_by_limit" "Arranged TopK input"
  296. "Dataflow: order_by_limit" "Arranged TopK input"
  297. "Dataflow: order_by_limit" "Arranged TopK input"
  298. "Dataflow: order_by_limit" "Arranged TopK input"
  299. "Dataflow: order_by_limit" "Arranged TopK input"
  300. "Dataflow: order_by_limit" "Reduced TopK input"
  301. "Dataflow: order_by_limit" "Reduced TopK input"
  302. "Dataflow: order_by_limit" "Reduced TopK input"
  303. "Dataflow: order_by_limit" "Reduced TopK input"
  304. "Dataflow: order_by_limit" "Reduced TopK input"
  305. "Dataflow: order_by_limit" "Reduced TopK input"
  306. "Dataflow: order_by_limit" "Reduced TopK input"
  307. "Dataflow: order_by_limit" "Reduced TopK input"
  308. "Dataflow: group_by_in_top_1" "ArrangeBy[[Column(0)]]"
  309. "Dataflow: group_by_in_top_1" "ArrangeBy[[Column(0)]]"
  310. "Dataflow: group_by_in_top_1" "ArrangeBy[[Column(0)]]"
  311. "Dataflow: group_by_in_top_1" "Arranged DistinctBy"
  312. "Dataflow: group_by_in_top_1" "Arranged DistinctBy"
  313. "Dataflow: group_by_in_top_1" DistinctBy
  314. "Dataflow: group_by_in_top_1" DistinctBy
  315. "Dataflow: group_by_in_top_1" DistinctByErrorCheck
  316. "Dataflow: group_by_in_top_1" DistinctByErrorCheck
  317. "Dataflow: group_by_limit" "Arranged DistinctBy"
  318. "Dataflow: group_by_limit" DistinctBy
  319. "Dataflow: group_by_limit" DistinctByErrorCheck
  320. "Dataflow: group_by_order_by_in_top_1" "ArrangeBy[[Column(0)]]"
  321. "Dataflow: group_by_order_by_in_top_1" "ArrangeBy[[Column(0)]]"
  322. "Dataflow: group_by_order_by_in_top_1" "ArrangeBy[[Column(0)]]"
  323. "Dataflow: group_by_order_by_in_top_1" "Arranged DistinctBy"
  324. "Dataflow: group_by_order_by_in_top_1" DistinctBy
  325. "Dataflow: group_by_order_by_in_top_1" DistinctByErrorCheck