group_size_hints.slt 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. # Tests the new syntax for GROUP SIZE query hints proposed in:
  10. # https://github.com/MaterializeInc/materialize/blob/main/doc/developer/design/20230829_topk_size_hint.md
  11. # Additionally, the tests below include scenarios that validate backwards compability in the
  12. # hint syntax according to what is described in the design doc.
  13. statement ok
  14. CREATE TABLE teachers (id INT, name TEXT);
  15. statement ok
  16. CREATE TABLE sections (id INT, teacher_id INT, course_id INT, schedule TEXT);
  17. # Illustrates that the old hint applies to multiple operators in a single query block.
  18. statement ok
  19. CREATE MATERIALIZED VIEW distinct_on_group_by_limit AS
  20. SELECT DISTINCT ON(teacher_id) id, teacher_id, MAX(course_id)
  21. FROM sections
  22. GROUP BY id, teacher_id
  23. OPTIONS (EXPECTED GROUP SIZE = 1000)
  24. ORDER BY teacher_id, id
  25. LIMIT 2;
  26. query T multiline
  27. EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR MATERIALIZED VIEW distinct_on_group_by_limit;
  28. ----
  29. materialize.public.distinct_on_group_by_limit:
  30. TopK order_by=[#1{teacher_id} asc nulls_last, #0{id} asc nulls_last] limit=2 exp_group_size=1000
  31. TopK group_by=[#1{teacher_id}] order_by=[#0{id} asc nulls_last] limit=1 exp_group_size=1000
  32. Reduce group_by=[#0{id}, #1{teacher_id}] aggregates=[max(#2{course_id})] exp_group_size=1000
  33. Project (#0{id}..=#2{course_id})
  34. ReadStorage materialize.public.sections
  35. Source materialize.public.sections
  36. Target cluster: quickstart
  37. EOF
  38. statement ok
  39. DROP MATERIALIZED VIEW distinct_on_group_by_limit;
  40. # Illustrates a workaround with the old hint to apply different values to different operators in
  41. # a single query block.
  42. statement ok
  43. CREATE MATERIALIZED VIEW distinct_on_group_by_limit AS
  44. SELECT id, teacher_id, max_course_id
  45. FROM (
  46. SELECT DISTINCT ON(teacher_id) id, teacher_id, max_course_id
  47. FROM (
  48. SELECT id, teacher_id, MAX(course_id) AS max_course_id
  49. FROM sections
  50. GROUP BY id, teacher_id
  51. OPTIONS (EXPECTED GROUP SIZE = 1000)
  52. )
  53. OPTIONS (EXPECTED GROUP SIZE = 60)
  54. ORDER BY teacher_id, id
  55. )
  56. OPTIONS (EXPECTED GROUP SIZE = 50)
  57. ORDER BY teacher_id, id
  58. LIMIT 2;
  59. query T multiline
  60. EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR MATERIALIZED VIEW distinct_on_group_by_limit;
  61. ----
  62. materialize.public.distinct_on_group_by_limit:
  63. TopK order_by=[#1{teacher_id} asc nulls_last, #0{id} asc nulls_last] limit=2 exp_group_size=50
  64. TopK group_by=[#1{teacher_id}] order_by=[#0{id} asc nulls_last] limit=1 exp_group_size=60
  65. Reduce group_by=[#0{id}, #1{teacher_id}] aggregates=[max(#2{course_id})] exp_group_size=1000
  66. Project (#0{id}..=#2{course_id})
  67. ReadStorage materialize.public.sections
  68. Source materialize.public.sections
  69. Target cluster: quickstart
  70. EOF
  71. statement ok
  72. DROP MATERIALIZED VIEW distinct_on_group_by_limit;
  73. # Illustrates that new hints apply cleanly without ambiguity to different query blocks.
  74. statement ok
  75. CREATE MATERIALIZED VIEW distinct_on_group_by_limit AS
  76. SELECT id, teacher_id, max_course_id
  77. FROM (
  78. SELECT DISTINCT ON(teacher_id) id, teacher_id, max_course_id
  79. FROM (
  80. SELECT id, teacher_id, MAX(course_id) AS max_course_id
  81. FROM sections
  82. GROUP BY id, teacher_id
  83. OPTIONS (AGGREGATE INPUT GROUP SIZE = 1000)
  84. )
  85. OPTIONS (DISTINCT ON INPUT GROUP SIZE = 60)
  86. ORDER BY teacher_id, id
  87. )
  88. OPTIONS (LIMIT INPUT GROUP SIZE = 50)
  89. ORDER BY teacher_id, id
  90. LIMIT 2;
  91. query T multiline
  92. EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR MATERIALIZED VIEW distinct_on_group_by_limit;
  93. ----
  94. materialize.public.distinct_on_group_by_limit:
  95. TopK order_by=[#1{teacher_id} asc nulls_last, #0{id} asc nulls_last] limit=2 exp_group_size=50
  96. TopK group_by=[#1{teacher_id}] order_by=[#0{id} asc nulls_last] limit=1 exp_group_size=60
  97. Reduce group_by=[#0{id}, #1{teacher_id}] aggregates=[max(#2{course_id})] exp_group_size=1000
  98. Project (#0{id}..=#2{course_id})
  99. ReadStorage materialize.public.sections
  100. Source materialize.public.sections
  101. Target cluster: quickstart
  102. EOF
  103. statement ok
  104. DROP MATERIALIZED VIEW distinct_on_group_by_limit;
  105. # Illustrates that new hints apply without ambiguity in a single query block.
  106. statement ok
  107. CREATE MATERIALIZED VIEW distinct_on_group_by_limit AS
  108. SELECT DISTINCT ON(teacher_id) id, teacher_id, MAX(course_id)
  109. FROM sections
  110. GROUP BY id, teacher_id
  111. OPTIONS (
  112. AGGREGATE INPUT GROUP SIZE = 1000,
  113. DISTINCT ON INPUT GROUP SIZE = 60,
  114. LIMIT INPUT GROUP SIZE = 50)
  115. ORDER BY teacher_id, id
  116. LIMIT 2;
  117. query T multiline
  118. EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR MATERIALIZED VIEW distinct_on_group_by_limit;
  119. ----
  120. materialize.public.distinct_on_group_by_limit:
  121. TopK order_by=[#1{teacher_id} asc nulls_last, #0{id} asc nulls_last] limit=2 exp_group_size=50
  122. TopK group_by=[#1{teacher_id}] order_by=[#0{id} asc nulls_last] limit=1 exp_group_size=60
  123. Reduce group_by=[#0{id}, #1{teacher_id}] aggregates=[max(#2{course_id})] exp_group_size=1000
  124. Project (#0{id}..=#2{course_id})
  125. ReadStorage materialize.public.sections
  126. Source materialize.public.sections
  127. Target cluster: quickstart
  128. EOF
  129. statement ok
  130. DROP MATERIALIZED VIEW distinct_on_group_by_limit;
  131. # Illustrates that partial combinations of the new hints in a single query block,
  132. # namely AGGREGATE and LIMIT INPUT GROUP SIZE.
  133. statement ok
  134. CREATE MATERIALIZED VIEW distinct_on_group_by_limit AS
  135. SELECT DISTINCT ON(teacher_id) id, teacher_id, MAX(course_id)
  136. FROM sections
  137. GROUP BY id, teacher_id
  138. OPTIONS (
  139. AGGREGATE INPUT GROUP SIZE = 1000,
  140. LIMIT INPUT GROUP SIZE = 50)
  141. ORDER BY teacher_id, id
  142. LIMIT 2;
  143. query T multiline
  144. EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR MATERIALIZED VIEW distinct_on_group_by_limit;
  145. ----
  146. materialize.public.distinct_on_group_by_limit:
  147. TopK order_by=[#1{teacher_id} asc nulls_last, #0{id} asc nulls_last] limit=2 exp_group_size=50
  148. TopK group_by=[#1{teacher_id}] order_by=[#0{id} asc nulls_last] limit=1
  149. Reduce group_by=[#0{id}, #1{teacher_id}] aggregates=[max(#2{course_id})] exp_group_size=1000
  150. Project (#0{id}..=#2{course_id})
  151. ReadStorage materialize.public.sections
  152. Source materialize.public.sections
  153. Target cluster: quickstart
  154. EOF
  155. statement ok
  156. DROP MATERIALIZED VIEW distinct_on_group_by_limit;
  157. # Illustrates that partial combinations of the new hints in a single query block,
  158. # namely LIMIT and DISTINCT ON INPUT GROUP SIZE.
  159. statement ok
  160. CREATE MATERIALIZED VIEW distinct_on_group_by_limit AS
  161. SELECT DISTINCT ON(teacher_id) id, teacher_id, MAX(course_id)
  162. FROM sections
  163. GROUP BY id, teacher_id
  164. OPTIONS (
  165. LIMIT INPUT GROUP SIZE = 50,
  166. DISTINCT ON INPUT GROUP SIZE = 60)
  167. ORDER BY teacher_id, id
  168. LIMIT 2;
  169. query T multiline
  170. EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR MATERIALIZED VIEW distinct_on_group_by_limit;
  171. ----
  172. materialize.public.distinct_on_group_by_limit:
  173. TopK order_by=[#1{teacher_id} asc nulls_last, #0{id} asc nulls_last] limit=2 exp_group_size=50
  174. TopK group_by=[#1{teacher_id}] order_by=[#0{id} asc nulls_last] limit=1 exp_group_size=60
  175. Reduce group_by=[#0{id}, #1{teacher_id}] aggregates=[max(#2{course_id})]
  176. Project (#0{id}..=#2{course_id})
  177. ReadStorage materialize.public.sections
  178. Source materialize.public.sections
  179. Target cluster: quickstart
  180. EOF
  181. statement ok
  182. DROP MATERIALIZED VIEW distinct_on_group_by_limit;
  183. # Illustrates that mixing of the old and new syntax for hints raises an error.
  184. statement error EXPECTED GROUP SIZE cannot be provided simultaneously with any of AGGREGATE INPUT GROUP SIZE, DISTINCT ON INPUT GROUP SIZE, or LIMIT INPUT GROUP SIZE
  185. CREATE MATERIALIZED VIEW distinct_on_group_by_limit AS
  186. SELECT DISTINCT ON(teacher_id) id, teacher_id, MAX(course_id)
  187. FROM sections
  188. GROUP BY id, teacher_id
  189. OPTIONS (
  190. LIMIT INPUT GROUP SIZE = 50,
  191. EXPECTED GROUP SIZE = 1000)
  192. ORDER BY teacher_id, id
  193. LIMIT 2;
  194. # Illustrates that the new syntax for hints can be used with a LATERAL top-k pattern.
  195. statement ok
  196. CREATE MATERIALIZED VIEW sections_of_top_3_courses_per_teacher AS
  197. SELECT id AS teacher_id, section_id
  198. FROM teachers grp,
  199. LATERAL (SELECT id AS section_id
  200. FROM sections
  201. WHERE teacher_id = grp.id
  202. OPTIONS (LIMIT INPUT GROUP SIZE = 1000)
  203. ORDER BY course_id DESC
  204. LIMIT 3);
  205. query T multiline
  206. EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR MATERIALIZED VIEW sections_of_top_3_courses_per_teacher;
  207. ----
  208. materialize.public.sections_of_top_3_courses_per_teacher:
  209. Project (#0{id}, #2{id})
  210. Join on=(#0{id} = #1{id}) type=differential
  211. ArrangeBy keys=[[#0{id}]]
  212. Project (#0{id})
  213. ReadStorage materialize.public.teachers
  214. ArrangeBy keys=[[#0{id}]]
  215. Project (#0{id}, #1{id})
  216. TopK group_by=[#0{id}] order_by=[#2{course_id} desc nulls_first] limit=3 exp_group_size=1000
  217. Project (#0{id}, #1{id}, #3{course_id})
  218. Join on=(#0{id} = #2{teacher_id}) type=differential
  219. ArrangeBy keys=[[#0{id}]]
  220. Distinct project=[#0{id}]
  221. Project (#0{id})
  222. Filter (#0{id}) IS NOT NULL
  223. ReadStorage materialize.public.teachers
  224. ArrangeBy keys=[[#1{teacher_id}]]
  225. Project (#0{id}..=#2{course_id})
  226. Filter (#1{teacher_id}) IS NOT NULL
  227. ReadStorage materialize.public.sections
  228. Source materialize.public.teachers
  229. Source materialize.public.sections
  230. filter=((#1{teacher_id}) IS NOT NULL)
  231. Target cluster: quickstart
  232. EOF
  233. statement ok
  234. DROP MATERIALIZED VIEW sections_of_top_3_courses_per_teacher;
  235. # Illustrates that the new syntax for hints can be used with a LATERAL top-k pattern
  236. # and in conjunction with a min/max aggregation in the same query block.
  237. statement ok
  238. CREATE MATERIALIZED VIEW max_sections_of_top_3_courses_per_teacher AS
  239. SELECT id AS teacher_id, max_section_id
  240. FROM teachers grp,
  241. LATERAL (SELECT course_id, MAX(id) AS max_section_id
  242. FROM sections
  243. WHERE teacher_id = grp.id
  244. GROUP BY course_id
  245. OPTIONS (AGGREGATE INPUT GROUP SIZE = 1000, LIMIT INPUT GROUP SIZE = 20)
  246. ORDER BY course_id DESC
  247. LIMIT 3);
  248. query T multiline
  249. EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR MATERIALIZED VIEW max_sections_of_top_3_courses_per_teacher;
  250. ----
  251. materialize.public.max_sections_of_top_3_courses_per_teacher:
  252. Project (#0{id}, #2{max_id})
  253. Join on=(#0{id} = #1{id}) type=differential
  254. ArrangeBy keys=[[#0{id}]]
  255. Project (#0{id})
  256. ReadStorage materialize.public.teachers
  257. ArrangeBy keys=[[#0{id}]]
  258. Project (#0{id}, #2{max_id})
  259. TopK group_by=[#0{id}] order_by=[#1{course_id} desc nulls_first] limit=3 exp_group_size=20
  260. Reduce group_by=[#0{id}, #2{course_id}] aggregates=[max(#1{id})] exp_group_size=1000
  261. Project (#0{id}, #1{id}, #3{course_id})
  262. Join on=(#0{id} = #2{teacher_id}) type=differential
  263. ArrangeBy keys=[[#0{id}]]
  264. Distinct project=[#0{id}]
  265. Project (#0{id})
  266. Filter (#0{id}) IS NOT NULL
  267. ReadStorage materialize.public.teachers
  268. ArrangeBy keys=[[#1{teacher_id}]]
  269. Project (#0{id}..=#2{course_id})
  270. Filter (#1{teacher_id}) IS NOT NULL
  271. ReadStorage materialize.public.sections
  272. Source materialize.public.teachers
  273. Source materialize.public.sections
  274. filter=((#1{teacher_id}) IS NOT NULL)
  275. Target cluster: quickstart
  276. EOF
  277. statement ok
  278. DROP MATERIALIZED VIEW max_sections_of_top_3_courses_per_teacher;