top-1-monotonic.td 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. $ set-arg-default single-replica-cluster=quickstart
  10. #
  11. # Tests in support of https://github.com/MaterializeInc/materialize/pull/6471
  12. # "optimize topk when limit=1 and input is monotonic (append-only)"
  13. #
  14. $ postgres-execute connection=postgres://mz_system:materialize@${testdrive.materialize-internal-sql-addr}
  15. ALTER SYSTEM SET min_timestamp_interval = '100ms'
  16. #
  17. # Make sure that the general pattern of all queries in this file will use TopK and not some other future operator or optimization
  18. #
  19. # Remove references to internal table identifiers and "materialize.public" strings
  20. $ set-regex match=(\s\(u\d+\)|materialize\.public\.) replacement=
  21. ?[version>=13500] EXPLAIN OPTIMIZED PLAN AS VERBOSE TEXT FOR SELECT (SELECT 'a' LIMIT 1);
  22. Explained Query (fast path):
  23. Constant
  24. - ("a")
  25. Target cluster: mz_catalog_server
  26. ?[version<13500] EXPLAIN OPTIMIZED PLAN FOR SELECT (SELECT 'a' LIMIT 1);
  27. Explained Query (fast path):
  28. Constant
  29. - ("a")
  30. Target cluster: mz_catalog_server
  31. $ set schema={"type": "record", "name": "schema", "fields": [ {"name": "f1", "type": ["int", "null"]} , {"name": "f2", "type": ["int", "null"]}] }
  32. $ kafka-create-topic topic=top1
  33. $ kafka-ingest format=avro topic=top1 schema=${schema} timestamp=1
  34. > CREATE CONNECTION IF NOT EXISTS csr_conn TO CONFLUENT SCHEMA REGISTRY (
  35. URL '${testdrive.schema-registry-url}'
  36. );
  37. > CREATE CONNECTION kafka_conn
  38. TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT);
  39. > CREATE SOURCE t1
  40. IN CLUSTER ${arg.single-replica-cluster}
  41. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-top1-${testdrive.seed}')
  42. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  43. ENVELOPE NONE
  44. WITH (TIMESTAMP INTERVAL '100ms')
  45. #
  46. # Over constants
  47. #
  48. > SELECT (SELECT 'a' LIMIT 1);
  49. a
  50. > SELECT (SELECT 'a' ORDER BY 1 LIMIT 1);
  51. a
  52. > SELECT (SELECT 'a' GROUP BY 1 LIMIT 1);
  53. a
  54. > SELECT (SELECT 'a' ORDER BY 'a' LIMIT 1);
  55. a
  56. > SELECT (SELECT 'a' GROUP BY 'a' LIMIT 1);
  57. a
  58. #
  59. # And now some actual materialized views
  60. #
  61. > CREATE MATERIALIZED VIEW limit_only AS SELECT (SELECT f1 FROM t1 LIMIT 1);
  62. > CREATE MATERIALIZED VIEW group_by_limit AS SELECT (SELECT f1 FROM t1 GROUP BY f1 LIMIT 1);
  63. > CREATE MATERIALIZED VIEW order_by_limit AS SELECT (SELECT f1 FROM t1 ORDER BY f1 LIMIT 1);
  64. > CREATE MATERIALIZED VIEW order_by_desc_limit AS SELECT (SELECT f1 FROM t1 ORDER BY f1 DESC LIMIT 1);
  65. > CREATE MATERIALIZED VIEW group_by_in_top_1 AS SELECT (select f2 FROM t1 AS inner WHERE inner.f1 = outer.f1 GROUP BY f2 LIMIT 1) FROM t1 AS outer;
  66. > CREATE MATERIALIZED VIEW group_by_order_by_in_top_1 AS SELECT (select f2 FROM t1 AS inner WHERE inner.f1 = outer.f1 ORDER BY f2 DESC LIMIT 1) FROM t1 AS outer;
  67. #
  68. # Over an empty source
  69. #
  70. > SELECT * from limit_only;
  71. <null>
  72. > SELECT * from group_by_limit;
  73. <null>
  74. > SELECT * FROM order_by_limit;
  75. <null>
  76. > SELECT * from order_by_desc_limit;
  77. <null>
  78. > SELECT * FROM group_by_in_top_1;
  79. > SELECT * FROM group_by_order_by_in_top_1;
  80. #
  81. # Over a source with a single record
  82. #
  83. $ kafka-ingest format=avro topic=top1 schema=${schema} timestamp=1
  84. {"f1": {"int": 123}, "f2": {"int": -123} }
  85. > SELECT * from limit_only;
  86. 123
  87. > SELECT * from group_by_limit;
  88. 123
  89. > SELECT * FROM order_by_limit;
  90. 123
  91. > SELECT * from order_by_desc_limit;
  92. 123
  93. > SELECT * FROM group_by_in_top_1;
  94. -123
  95. > SELECT * FROM group_by_order_by_in_top_1;
  96. -123
  97. #
  98. # A second record arrives, causes the ORDER BY DESC view to change output
  99. #
  100. $ kafka-ingest format=avro topic=top1 schema=${schema} timestamp=2
  101. {"f1": {"int": 234}, "f2": {"int" : -234} }
  102. > SELECT * from limit_only;
  103. 123
  104. > SELECT * from group_by_limit;
  105. 123
  106. > SELECT * FROM order_by_limit;
  107. 123
  108. > SELECT * from order_by_desc_limit;
  109. 234
  110. > SELECT * FROM group_by_in_top_1;
  111. -123
  112. -234
  113. > SELECT * FROM group_by_order_by_in_top_1;
  114. -123
  115. -234
  116. #
  117. # The third record causes all other views to change outputs
  118. #
  119. $ kafka-ingest format=avro topic=top1 schema=${schema} timestamp=3
  120. {"f1": {"int": 0}, "f2": {"int": 0} }
  121. > SELECT * from limit_only;
  122. 0
  123. > SELECT * from group_by_limit;
  124. 0
  125. > SELECT * FROM order_by_limit;
  126. 0
  127. > SELECT * from order_by_desc_limit;
  128. 234
  129. > SELECT * FROM group_by_in_top_1;
  130. 0
  131. -123
  132. -234
  133. > SELECT * FROM group_by_order_by_in_top_1;
  134. 0
  135. -123
  136. -234
  137. #
  138. # Insert some more rows, mostly for the benefit of the "in_top_1" views
  139. #
  140. $ kafka-ingest format=avro topic=top1 schema=${schema} timestamp=4
  141. {"f1": {"int": 0}, "f2": {"int": 0}}
  142. {"f1": {"int": 234}, "f2": {"int": 0}}
  143. {"f1": {"int": 123}, "f2": {"int": -234} }
  144. > SELECT * from limit_only;
  145. 0
  146. > SELECT * from group_by_limit;
  147. 0
  148. > SELECT * FROM order_by_limit;
  149. 0
  150. > SELECT * from order_by_desc_limit;
  151. 234
  152. > SELECT * FROM group_by_in_top_1;
  153. 0
  154. 0
  155. -234
  156. -234
  157. -234
  158. -234
  159. > SELECT * FROM group_by_order_by_in_top_1;
  160. -123
  161. -123
  162. 0
  163. 0
  164. 0
  165. 0
  166. #
  167. # And finally, insert some NULL values
  168. #
  169. $ kafka-ingest format=avro topic=top1 schema=${schema} timestamp=5
  170. {"f1": null, "f2": null}
  171. {"f1": {"int":0}, "f2": null}
  172. {"f1": null, "f2": {"int": 0}}
  173. {"f1": null, "f2": {"int": -234}}
  174. > SELECT * from limit_only;
  175. 0
  176. > SELECT * from group_by_limit;
  177. 0
  178. > SELECT * FROM order_by_limit;
  179. 0
  180. > SELECT * from order_by_desc_limit;
  181. <null>
  182. > SELECT * FROM group_by_in_top_1;
  183. -234
  184. -234
  185. 0
  186. 0
  187. 0
  188. -234
  189. -234
  190. <null>
  191. <null>
  192. <null>
  193. > SELECT * FROM group_by_order_by_in_top_1;
  194. -123
  195. -123
  196. 0
  197. 0
  198. <null>
  199. <null>
  200. <null>
  201. <null>
  202. <null>
  203. <null>
  204. # Check arrangements, seeing new arrangements can mean a significant increase
  205. # in memory consumptions and should be understood before adapting the values.
  206. > SET cluster_replica = r1
  207. >[version>=15000] SELECT
  208. mdod.dataflow_name,
  209. -- Simplify Column descriptions to avoid unnecessary test breakage when the
  210. -- optimizer evolves.
  211. regexp_replace(mdod.name, 'Column\((\d+), .+\)', 'Column($1)')
  212. FROM mz_introspection.mz_arrangement_sharing mash
  213. JOIN mz_introspection.mz_dataflow_operator_dataflows mdod ON mash.operator_id = mdod.id
  214. JOIN mz_introspection.mz_compute_exports USING (dataflow_id)
  215. WHERE export_id LIKE 'u%'
  216. "Dataflow: group_by_in_top_1" "ArrangeBy[[Column(0)]]"
  217. "Dataflow: group_by_in_top_1" "ArrangeBy[[Column(0)]]"
  218. "Dataflow: group_by_in_top_1" "ArrangeBy[[Column(0)]]"
  219. "Dataflow: group_by_in_top_1" "Arranged DistinctBy"
  220. "Dataflow: group_by_in_top_1" "Arranged DistinctBy"
  221. "Dataflow: group_by_in_top_1" "Arranged MonotonicTop1 partial [val: empty]"
  222. "Dataflow: group_by_in_top_1" DistinctBy
  223. "Dataflow: group_by_in_top_1" DistinctBy
  224. "Dataflow: group_by_in_top_1" DistinctByErrorCheck
  225. "Dataflow: group_by_in_top_1" DistinctByErrorCheck
  226. "Dataflow: group_by_in_top_1" MonotonicTop1
  227. "Dataflow: group_by_limit" "Arranged DistinctBy"
  228. "Dataflow: group_by_limit" "Arranged MonotonicTop1 partial [val: empty]"
  229. "Dataflow: group_by_limit" DistinctBy
  230. "Dataflow: group_by_limit" DistinctByErrorCheck
  231. "Dataflow: group_by_limit" MonotonicTop1
  232. "Dataflow: group_by_order_by_in_top_1" "ArrangeBy[[Column(0)]]"
  233. "Dataflow: group_by_order_by_in_top_1" "ArrangeBy[[Column(0)]]"
  234. "Dataflow: group_by_order_by_in_top_1" "ArrangeBy[[Column(0)]]"
  235. "Dataflow: group_by_order_by_in_top_1" "Arranged DistinctBy"
  236. "Dataflow: group_by_order_by_in_top_1" "Arranged MonotonicTop1 partial [val: empty]"
  237. "Dataflow: group_by_order_by_in_top_1" DistinctBy
  238. "Dataflow: group_by_order_by_in_top_1" DistinctByErrorCheck
  239. "Dataflow: group_by_order_by_in_top_1" MonotonicTop1
  240. "Dataflow: limit_only" "Arranged MonotonicTop1 partial [val: empty]"
  241. "Dataflow: limit_only" MonotonicTop1
  242. "Dataflow: order_by_desc_limit" "Arranged MonotonicTop1 partial [val: empty]"
  243. "Dataflow: order_by_desc_limit" MonotonicTop1
  244. "Dataflow: order_by_limit" "Arranged MonotonicTop1 partial [val: empty]"
  245. "Dataflow: order_by_limit" MonotonicTop1