top-k-monotonic.td 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. $ set-arg-default single-replica-cluster=quickstart
  10. $ postgres-execute connection=postgres://mz_system:materialize@${testdrive.materialize-internal-sql-addr}
  11. ALTER SYSTEM SET enable_expressions_in_limit_syntax TO true;
  12. # Test monotonic top-k processing with k > 1.
  13. $ set non-dbz-schema={
  14. "type": "record",
  15. "name": "cpx",
  16. "fields": [
  17. {"name": "a", "type": [ "null", "long" ]},
  18. {"name": "b", "type": "long"}
  19. ]
  20. }
  21. $ kafka-create-topic topic=non-dbz-data
  22. $ kafka-ingest format=avro topic=non-dbz-data schema=${non-dbz-schema} timestamp=1
  23. {"a": {"long": 1}, "b": 1}
  24. {"a": {"long": 1}, "b": 2}
  25. {"a": {"long": 1}, "b": 3}
  26. {"a": {"long": 1}, "b": 4}
  27. {"a": {"long": 1}, "b": 5}
  28. {"a": {"long": 2}, "b": 1000}
  29. {"a": {"long": 2}, "b": 1001}
  30. {"a": {"long": 2}, "b": 1002}
  31. {"a": {"long": 2}, "b": 1003}
  32. {"a": {"long": 2}, "b": 1004}
  33. {"a": {"long": 3}, "b": 2000}
  34. {"a": {"long": 3}, "b": 2000}
  35. {"a": {"long": 4}, "b": 3001}
  36. > CREATE CONNECTION kafka_conn
  37. TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT);
  38. > CREATE SOURCE non_dbz_data
  39. IN CLUSTER ${arg.single-replica-cluster}
  40. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-non-dbz-data-${testdrive.seed}')
  41. > CREATE TABLE non_dbz_data_tbl FROM SOURCE non_dbz_data (REFERENCE "testdrive-non-dbz-data-${testdrive.seed}")
  42. FORMAT AVRO USING SCHEMA '${non-dbz-schema}'
  43. ENVELOPE NONE
  44. # Create a monotonic topk plan that has both a limit and a group to test that thinning works as expected
  45. > SELECT * FROM (SELECT DISTINCT a FROM non_dbz_data_tbl) grp, LATERAL (SELECT b FROM non_dbz_data_tbl WHERE a = grp.a ORDER BY b LIMIT 2);
  46. a b
  47. ---------
  48. 1 1
  49. 1 2
  50. 2 1000
  51. 2 1001
  52. 3 2000
  53. 3 2000
  54. 4 3001
  55. # Create a topk plan that has a limit expression that will initially not fail, but
  56. # then eventually fail after the addition of offending data to a monotonic input.
  57. # We observe that a non-monotonic top-k plan can recover from this condition after
  58. # the addition of even more data, but a monotonic top-k plan cannot.
  59. > CREATE VIEW v_basic AS
  60. SELECT *
  61. FROM
  62. (SELECT sum(a) AS s FROM non_dbz_data_tbl GROUP BY a) grp,
  63. LATERAL (
  64. SELECT b FROM non_dbz_data_tbl
  65. WHERE (grp.s IS NULL AND a IS NULL) OR a = grp.s
  66. ORDER BY b LIMIT 6 / abs(grp.s+2)
  67. );
  68. > CREATE VIEW v_monotonic AS
  69. SELECT *
  70. FROM
  71. (SELECT DISTINCT a::numeric FROM non_dbz_data_tbl) grp,
  72. LATERAL (
  73. SELECT b FROM non_dbz_data_tbl
  74. WHERE (grp.a IS NULL AND a IS NULL) OR a = grp.a
  75. ORDER BY b LIMIT 6 / abs(grp.a+2)
  76. );
  77. > CREATE DEFAULT INDEX ON v_basic;
  78. > CREATE DEFAULT INDEX ON v_monotonic;
  79. > SELECT * FROM v_basic ORDER BY s;
  80. s b
  81. ----
  82. 4 3001
  83. > SELECT * FROM v_monotonic ORDER BY a;
  84. a b
  85. ----
  86. 1 1
  87. 1 2
  88. 2 1000
  89. 2 1001
  90. 3 2000
  91. 4 3001
  92. $ kafka-ingest format=avro topic=non-dbz-data schema=${non-dbz-schema} timestamp=2
  93. {"a": {"long": 5}, "b": 4001}
  94. {"a": {"long": 5}, "b": 4002}
  95. {"a": null, "b": 0}
  96. {"a": null, "b": 1}
  97. > SELECT * FROM v_basic ORDER BY s;
  98. s b
  99. ----
  100. 4 3001
  101. 5 4001
  102. <null> 0
  103. <null> 1
  104. > SELECT * FROM v_monotonic ORDER BY a;
  105. a b
  106. ----
  107. 1 1
  108. 1 2
  109. 2 1000
  110. 2 1001
  111. 3 2000
  112. 4 3001
  113. 5 4001
  114. <null> 0
  115. <null> 1
  116. $ kafka-ingest format=avro topic=non-dbz-data schema=${non-dbz-schema} timestamp=3
  117. {"a": {"long": -1}, "b": -1}
  118. {"a": {"long": -1}, "b": -2}
  119. {"a": {"long": -2}, "b": -1001}
  120. ! SELECT * FROM v_basic ORDER BY s;
  121. contains:division by zero
  122. ! SELECT * FROM v_monotonic ORDER BY a;
  123. contains:division by zero
  124. $ kafka-ingest format=avro topic=non-dbz-data schema=${non-dbz-schema} timestamp=4
  125. {"a": {"long": -1}, "b": -3}
  126. {"a": {"long": -2}, "b": -1002}
  127. {"a": {"long": -3}, "b": -2001}
  128. > SELECT * FROM v_basic ORDER BY s;
  129. s b
  130. ----
  131. -3 -2001
  132. -3 -2001
  133. 4 3001
  134. 5 4001
  135. <null> 0
  136. <null> 1
  137. ! SELECT * FROM v_monotonic ORDER BY a;
  138. contains:division by zero
  139. > DROP VIEW v_basic;
  140. > DROP VIEW v_monotonic;
  141. # The following tests repeat the scenario in database-issues#5442.
  142. $ set other-non-dbz-schema={
  143. "type": "record",
  144. "name": "cpx",
  145. "fields": [
  146. {"name": "a", "type": "long" },
  147. {"name": "b", "type": "long"}
  148. ]
  149. }
  150. $ kafka-create-topic topic=other-non-dbz-data
  151. $ kafka-ingest format=avro topic=other-non-dbz-data schema=${other-non-dbz-schema} timestamp=1
  152. {"a": 1, "b": 42}
  153. {"a": 2, "b": 42}
  154. {"a": 3, "b": 42}
  155. {"a": 4, "b": 42}
  156. {"a": 5, "b": 42}
  157. {"a": 6, "b": 42}
  158. {"a": 7, "b": 42}
  159. {"a": 8, "b": 42}
  160. {"a": 9, "b": 42}
  161. {"a": 10, "b": 42}
  162. {"a": 11, "b": 42}
  163. {"a": 12, "b": 42}
  164. {"a": 13, "b": 42}
  165. {"a": 14, "b": 42}
  166. {"a": 15, "b": 42}
  167. {"a": 16, "b": 42}
  168. {"a": 17, "b": 42}
  169. {"a": 18, "b": 42}
  170. {"a": 19, "b": 42}
  171. {"a": 20, "b": 42}
  172. {"a": 21, "b": 42}
  173. {"a": 22, "b": 42}
  174. {"a": 23, "b": 42}
  175. {"a": 24, "b": 42}
  176. {"a": 25, "b": 42}
  177. {"a": 26, "b": 42}
  178. {"a": 27, "b": 42}
  179. {"a": 28, "b": 42}
  180. {"a": 29, "b": 42}
  181. {"a": 30, "b": 42}
  182. {"a": 31, "b": 42}
  183. {"a": 32, "b": 42}
  184. {"a": 33, "b": 42}
  185. {"a": 34, "b": 42}
  186. {"a": 35, "b": 42}
  187. {"a": 36, "b": 42}
  188. {"a": 37, "b": 42}
  189. {"a": 38, "b": 42}
  190. {"a": 39, "b": 42}
  191. > CREATE SOURCE other_non_dbz_data
  192. IN CLUSTER ${arg.single-replica-cluster}
  193. FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-other-non-dbz-data-${testdrive.seed}')
  194. > CREATE TABLE other_non_dbz_data_tbl FROM SOURCE other_non_dbz_data (REFERENCE "testdrive-other-non-dbz-data-${testdrive.seed}")
  195. FORMAT AVRO USING SCHEMA '${other-non-dbz-schema}'
  196. ENVELOPE NONE
  197. > SELECT sum(a) FROM (SELECT a FROM other_non_dbz_data_tbl ORDER BY b LIMIT 37);
  198. sum
  199. ----
  200. 703
  201. > CREATE VIEW v_other AS
  202. SELECT a FROM other_non_dbz_data_tbl ORDER BY b LIMIT 37;
  203. > CREATE DEFAULT INDEX ON v_other;
  204. > SELECT * FROM v_other;
  205. a
  206. ----
  207. 1
  208. 2
  209. 3
  210. 4
  211. 5
  212. 6
  213. 7
  214. 8
  215. 9
  216. 10
  217. 11
  218. 12
  219. 13
  220. 14
  221. 15
  222. 16
  223. 17
  224. 18
  225. 19
  226. 20
  227. 21
  228. 22
  229. 23
  230. 24
  231. 25
  232. 26
  233. 27
  234. 28
  235. 29
  236. 30
  237. 31
  238. 32
  239. 33
  240. 34
  241. 35
  242. 36
  243. 37
  244. # Check arrangements, seeing new arrangements can mean a significant increase
  245. # in memory consumptions and should be understood before adapting the values.
  246. > SET cluster_replica = r1
  247. > SELECT mdod.dataflow_name, mdod.name
  248. FROM mz_introspection.mz_arrangement_sharing mash
  249. JOIN mz_introspection.mz_dataflow_operator_dataflows mdod ON mash.operator_id = mdod.id
  250. JOIN mz_introspection.mz_compute_exports USING (dataflow_id)
  251. WHERE export_id LIKE 'u%'
  252. "Dataflow: materialize.public.v_other_primary_idx" "ArrangeBy[[Column(0, \"a\")]]"
  253. "Dataflow: materialize.public.v_other_primary_idx" "ArrangeBy[[Column(0, \"a\")]]-errors"
  254. "Dataflow: materialize.public.v_other_primary_idx" "Arranged TopK input"
  255. "Dataflow: materialize.public.v_other_primary_idx" "Reduced TopK input"
  256. "Dataflow: materialize.public.v_other_primary_idx" "Arranged TopK input"
  257. "Dataflow: materialize.public.v_other_primary_idx" "Arranged TopK input"
  258. "Dataflow: materialize.public.v_other_primary_idx" "Arranged TopK input"
  259. "Dataflow: materialize.public.v_other_primary_idx" "Arranged TopK input"
  260. "Dataflow: materialize.public.v_other_primary_idx" "Arranged TopK input"
  261. "Dataflow: materialize.public.v_other_primary_idx" "Arranged TopK input"
  262. "Dataflow: materialize.public.v_other_primary_idx" "Arranged TopK input"
  263. "Dataflow: materialize.public.v_other_primary_idx" "Reduced TopK input"
  264. "Dataflow: materialize.public.v_other_primary_idx" "Reduced TopK input"
  265. "Dataflow: materialize.public.v_other_primary_idx" "Reduced TopK input"
  266. "Dataflow: materialize.public.v_other_primary_idx" "Reduced TopK input"
  267. "Dataflow: materialize.public.v_other_primary_idx" "Reduced TopK input"
  268. "Dataflow: materialize.public.v_other_primary_idx" "Reduced TopK input"
  269. "Dataflow: materialize.public.v_other_primary_idx" "Reduced TopK input"