123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330 |
- # Copyright Materialize, Inc. and contributors. All rights reserved.
- #
- # Use of this software is governed by the Business Source License
- # included in the LICENSE file at the root of this repository.
- #
- # As of the Change Date specified in that file, in accordance with
- # the Business Source License, use of this software will be governed
- # by the Apache License, Version 2.0.
- # Tests the new syntax for GROUP SIZE query hints proposed in:
- # https://github.com/MaterializeInc/materialize/blob/main/doc/developer/design/20230829_topk_size_hint.md
- # Additionally, the tests below include scenarios that validate backwards compability in the
- # hint syntax according to what is described in the design doc.
- statement ok
- CREATE TABLE teachers (id INT, name TEXT);
- statement ok
- CREATE TABLE sections (id INT, teacher_id INT, course_id INT, schedule TEXT);
- # Illustrates that the old hint applies to multiple operators in a single query block.
- statement ok
- CREATE MATERIALIZED VIEW distinct_on_group_by_limit AS
- SELECT DISTINCT ON(teacher_id) id, teacher_id, MAX(course_id)
- FROM sections
- GROUP BY id, teacher_id
- OPTIONS (EXPECTED GROUP SIZE = 1000)
- ORDER BY teacher_id, id
- LIMIT 2;
- query T multiline
- EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR MATERIALIZED VIEW distinct_on_group_by_limit;
- ----
- materialize.public.distinct_on_group_by_limit:
- TopK order_by=[#1{teacher_id} asc nulls_last, #0{id} asc nulls_last] limit=2 exp_group_size=1000
- TopK group_by=[#1{teacher_id}] order_by=[#0{id} asc nulls_last] limit=1 exp_group_size=1000
- Reduce group_by=[#0{id}, #1{teacher_id}] aggregates=[max(#2{course_id})] exp_group_size=1000
- Project (#0{id}..=#2{course_id})
- ReadStorage materialize.public.sections
- Source materialize.public.sections
- Target cluster: quickstart
- EOF
- statement ok
- DROP MATERIALIZED VIEW distinct_on_group_by_limit;
- # Illustrates a workaround with the old hint to apply different values to different operators in
- # a single query block.
- statement ok
- CREATE MATERIALIZED VIEW distinct_on_group_by_limit AS
- SELECT id, teacher_id, max_course_id
- FROM (
- SELECT DISTINCT ON(teacher_id) id, teacher_id, max_course_id
- FROM (
- SELECT id, teacher_id, MAX(course_id) AS max_course_id
- FROM sections
- GROUP BY id, teacher_id
- OPTIONS (EXPECTED GROUP SIZE = 1000)
- )
- OPTIONS (EXPECTED GROUP SIZE = 60)
- ORDER BY teacher_id, id
- )
- OPTIONS (EXPECTED GROUP SIZE = 50)
- ORDER BY teacher_id, id
- LIMIT 2;
- query T multiline
- EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR MATERIALIZED VIEW distinct_on_group_by_limit;
- ----
- materialize.public.distinct_on_group_by_limit:
- TopK order_by=[#1{teacher_id} asc nulls_last, #0{id} asc nulls_last] limit=2 exp_group_size=50
- TopK group_by=[#1{teacher_id}] order_by=[#0{id} asc nulls_last] limit=1 exp_group_size=60
- Reduce group_by=[#0{id}, #1{teacher_id}] aggregates=[max(#2{course_id})] exp_group_size=1000
- Project (#0{id}..=#2{course_id})
- ReadStorage materialize.public.sections
- Source materialize.public.sections
- Target cluster: quickstart
- EOF
- statement ok
- DROP MATERIALIZED VIEW distinct_on_group_by_limit;
- # Illustrates that new hints apply cleanly without ambiguity to different query blocks.
- statement ok
- CREATE MATERIALIZED VIEW distinct_on_group_by_limit AS
- SELECT id, teacher_id, max_course_id
- FROM (
- SELECT DISTINCT ON(teacher_id) id, teacher_id, max_course_id
- FROM (
- SELECT id, teacher_id, MAX(course_id) AS max_course_id
- FROM sections
- GROUP BY id, teacher_id
- OPTIONS (AGGREGATE INPUT GROUP SIZE = 1000)
- )
- OPTIONS (DISTINCT ON INPUT GROUP SIZE = 60)
- ORDER BY teacher_id, id
- )
- OPTIONS (LIMIT INPUT GROUP SIZE = 50)
- ORDER BY teacher_id, id
- LIMIT 2;
- query T multiline
- EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR MATERIALIZED VIEW distinct_on_group_by_limit;
- ----
- materialize.public.distinct_on_group_by_limit:
- TopK order_by=[#1{teacher_id} asc nulls_last, #0{id} asc nulls_last] limit=2 exp_group_size=50
- TopK group_by=[#1{teacher_id}] order_by=[#0{id} asc nulls_last] limit=1 exp_group_size=60
- Reduce group_by=[#0{id}, #1{teacher_id}] aggregates=[max(#2{course_id})] exp_group_size=1000
- Project (#0{id}..=#2{course_id})
- ReadStorage materialize.public.sections
- Source materialize.public.sections
- Target cluster: quickstart
- EOF
- statement ok
- DROP MATERIALIZED VIEW distinct_on_group_by_limit;
- # Illustrates that new hints apply without ambiguity in a single query block.
- statement ok
- CREATE MATERIALIZED VIEW distinct_on_group_by_limit AS
- SELECT DISTINCT ON(teacher_id) id, teacher_id, MAX(course_id)
- FROM sections
- GROUP BY id, teacher_id
- OPTIONS (
- AGGREGATE INPUT GROUP SIZE = 1000,
- DISTINCT ON INPUT GROUP SIZE = 60,
- LIMIT INPUT GROUP SIZE = 50)
- ORDER BY teacher_id, id
- LIMIT 2;
- query T multiline
- EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR MATERIALIZED VIEW distinct_on_group_by_limit;
- ----
- materialize.public.distinct_on_group_by_limit:
- TopK order_by=[#1{teacher_id} asc nulls_last, #0{id} asc nulls_last] limit=2 exp_group_size=50
- TopK group_by=[#1{teacher_id}] order_by=[#0{id} asc nulls_last] limit=1 exp_group_size=60
- Reduce group_by=[#0{id}, #1{teacher_id}] aggregates=[max(#2{course_id})] exp_group_size=1000
- Project (#0{id}..=#2{course_id})
- ReadStorage materialize.public.sections
- Source materialize.public.sections
- Target cluster: quickstart
- EOF
- statement ok
- DROP MATERIALIZED VIEW distinct_on_group_by_limit;
- # Illustrates that partial combinations of the new hints in a single query block,
- # namely AGGREGATE and LIMIT INPUT GROUP SIZE.
- statement ok
- CREATE MATERIALIZED VIEW distinct_on_group_by_limit AS
- SELECT DISTINCT ON(teacher_id) id, teacher_id, MAX(course_id)
- FROM sections
- GROUP BY id, teacher_id
- OPTIONS (
- AGGREGATE INPUT GROUP SIZE = 1000,
- LIMIT INPUT GROUP SIZE = 50)
- ORDER BY teacher_id, id
- LIMIT 2;
- query T multiline
- EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR MATERIALIZED VIEW distinct_on_group_by_limit;
- ----
- materialize.public.distinct_on_group_by_limit:
- TopK order_by=[#1{teacher_id} asc nulls_last, #0{id} asc nulls_last] limit=2 exp_group_size=50
- TopK group_by=[#1{teacher_id}] order_by=[#0{id} asc nulls_last] limit=1
- Reduce group_by=[#0{id}, #1{teacher_id}] aggregates=[max(#2{course_id})] exp_group_size=1000
- Project (#0{id}..=#2{course_id})
- ReadStorage materialize.public.sections
- Source materialize.public.sections
- Target cluster: quickstart
- EOF
- statement ok
- DROP MATERIALIZED VIEW distinct_on_group_by_limit;
- # Illustrates that partial combinations of the new hints in a single query block,
- # namely LIMIT and DISTINCT ON INPUT GROUP SIZE.
- statement ok
- CREATE MATERIALIZED VIEW distinct_on_group_by_limit AS
- SELECT DISTINCT ON(teacher_id) id, teacher_id, MAX(course_id)
- FROM sections
- GROUP BY id, teacher_id
- OPTIONS (
- LIMIT INPUT GROUP SIZE = 50,
- DISTINCT ON INPUT GROUP SIZE = 60)
- ORDER BY teacher_id, id
- LIMIT 2;
- query T multiline
- EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR MATERIALIZED VIEW distinct_on_group_by_limit;
- ----
- materialize.public.distinct_on_group_by_limit:
- TopK order_by=[#1{teacher_id} asc nulls_last, #0{id} asc nulls_last] limit=2 exp_group_size=50
- TopK group_by=[#1{teacher_id}] order_by=[#0{id} asc nulls_last] limit=1 exp_group_size=60
- Reduce group_by=[#0{id}, #1{teacher_id}] aggregates=[max(#2{course_id})]
- Project (#0{id}..=#2{course_id})
- ReadStorage materialize.public.sections
- Source materialize.public.sections
- Target cluster: quickstart
- EOF
- statement ok
- DROP MATERIALIZED VIEW distinct_on_group_by_limit;
- # Illustrates that mixing of the old and new syntax for hints raises an error.
- statement error EXPECTED GROUP SIZE cannot be provided simultaneously with any of AGGREGATE INPUT GROUP SIZE, DISTINCT ON INPUT GROUP SIZE, or LIMIT INPUT GROUP SIZE
- CREATE MATERIALIZED VIEW distinct_on_group_by_limit AS
- SELECT DISTINCT ON(teacher_id) id, teacher_id, MAX(course_id)
- FROM sections
- GROUP BY id, teacher_id
- OPTIONS (
- LIMIT INPUT GROUP SIZE = 50,
- EXPECTED GROUP SIZE = 1000)
- ORDER BY teacher_id, id
- LIMIT 2;
- # Illustrates that the new syntax for hints can be used with a LATERAL top-k pattern.
- statement ok
- CREATE MATERIALIZED VIEW sections_of_top_3_courses_per_teacher AS
- SELECT id AS teacher_id, section_id
- FROM teachers grp,
- LATERAL (SELECT id AS section_id
- FROM sections
- WHERE teacher_id = grp.id
- OPTIONS (LIMIT INPUT GROUP SIZE = 1000)
- ORDER BY course_id DESC
- LIMIT 3);
- query T multiline
- EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR MATERIALIZED VIEW sections_of_top_3_courses_per_teacher;
- ----
- materialize.public.sections_of_top_3_courses_per_teacher:
- Project (#0{id}, #2{id})
- Join on=(#0{id} = #1{id}) type=differential
- ArrangeBy keys=[[#0{id}]]
- Project (#0{id})
- ReadStorage materialize.public.teachers
- ArrangeBy keys=[[#0{id}]]
- Project (#0{id}, #1{id})
- TopK group_by=[#0{id}] order_by=[#2{course_id} desc nulls_first] limit=3 exp_group_size=1000
- Project (#0{id}, #1{id}, #3{course_id})
- Join on=(#0{id} = #2{teacher_id}) type=differential
- ArrangeBy keys=[[#0{id}]]
- Distinct project=[#0{id}]
- Project (#0{id})
- Filter (#0{id}) IS NOT NULL
- ReadStorage materialize.public.teachers
- ArrangeBy keys=[[#1{teacher_id}]]
- Project (#0{id}..=#2{course_id})
- Filter (#1{teacher_id}) IS NOT NULL
- ReadStorage materialize.public.sections
- Source materialize.public.teachers
- Source materialize.public.sections
- filter=((#1{teacher_id}) IS NOT NULL)
- Target cluster: quickstart
- EOF
- statement ok
- DROP MATERIALIZED VIEW sections_of_top_3_courses_per_teacher;
- # Illustrates that the new syntax for hints can be used with a LATERAL top-k pattern
- # and in conjunction with a min/max aggregation in the same query block.
- statement ok
- CREATE MATERIALIZED VIEW max_sections_of_top_3_courses_per_teacher AS
- SELECT id AS teacher_id, max_section_id
- FROM teachers grp,
- LATERAL (SELECT course_id, MAX(id) AS max_section_id
- FROM sections
- WHERE teacher_id = grp.id
- GROUP BY course_id
- OPTIONS (AGGREGATE INPUT GROUP SIZE = 1000, LIMIT INPUT GROUP SIZE = 20)
- ORDER BY course_id DESC
- LIMIT 3);
- query T multiline
- EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR MATERIALIZED VIEW max_sections_of_top_3_courses_per_teacher;
- ----
- materialize.public.max_sections_of_top_3_courses_per_teacher:
- Project (#0{id}, #2{max_id})
- Join on=(#0{id} = #1{id}) type=differential
- ArrangeBy keys=[[#0{id}]]
- Project (#0{id})
- ReadStorage materialize.public.teachers
- ArrangeBy keys=[[#0{id}]]
- Project (#0{id}, #2{max_id})
- TopK group_by=[#0{id}] order_by=[#1{course_id} desc nulls_first] limit=3 exp_group_size=20
- Reduce group_by=[#0{id}, #2{course_id}] aggregates=[max(#1{id})] exp_group_size=1000
- Project (#0{id}, #1{id}, #3{course_id})
- Join on=(#0{id} = #2{teacher_id}) type=differential
- ArrangeBy keys=[[#0{id}]]
- Distinct project=[#0{id}]
- Project (#0{id})
- Filter (#0{id}) IS NOT NULL
- ReadStorage materialize.public.teachers
- ArrangeBy keys=[[#1{teacher_id}]]
- Project (#0{id}..=#2{course_id})
- Filter (#1{teacher_id}) IS NOT NULL
- ReadStorage materialize.public.sections
- Source materialize.public.teachers
- Source materialize.public.sections
- filter=((#1{teacher_id}) IS NOT NULL)
- Target cluster: quickstart
- EOF
- statement ok
- DROP MATERIALIZED VIEW max_sections_of_top_3_courses_per_teacher;
|