# Copyright Materialize, Inc. and contributors. All rights reserved. # # Use of this software is governed by the Business Source License # included in the LICENSE file at the root of this repository. # # As of the Change Date specified in that file, in accordance with # the Business Source License, use of this software will be governed # by the Apache License, Version 2.0. # --------------------------------------------------------- # Non-literal expressions in LIMIT. # (For literals, see `order_by.slt`) # --------------------------------------------------------- mode cockroach simple conn=mz_system,user=mz_system ALTER SYSTEM SET enable_expressions_in_limit_syntax TO true; ---- COMPLETE 0 # Schema # ------ statement ok CREATE TABLE people (id int, first_name text, allowance int); statement ok CREATE TABLE preferred_fruits (person_id int, fruit text, preference int); statement ok INSERT INTO people VALUES (1, 'frank', 4), (2, 'rj' , 2), (3, 'nick' , 1); statement ok INSERT INTO preferred_fruits VALUES (1, 'apple' , 1), (1, 'banana', 2), (1, 'orange', 3), (1, 'kiwi' , 4), (1, 'mango' , 5), (2, 'apple' , 1), (2, 'orange', 2), (2, 'banana', 3), (2, 'kiwi' , 4), (3, 'mango' , 1), (3, 'pickle', 2); statement ok CREATE TABLE cities ( name text NOT NULL, state text NOT NULL, pop int NOT NULL ); statement ok INSERT INTO cities VALUES ('Los_Angeles', 'CA', 3979576), ('Phoenix', 'AZ', 1680992), ('Houston', 'TX', 2320268), ('San_Diego', 'CA', 1423851), ('San_Francisco', 'CA', 881549), ('New_York', 'NY', 8336817), ('Dallas', 'TX', 1343573), ('San_Antonio', 'TX', 1547253), ('San_Jose', 'CA', 1021795), ('Chicago', 'IL', 2695598), ('Austin', 'TX', 978908); statement ok CREATE TABLE limits (sl text, l int); statement ok INSERT INTO limits VALUES ('C', 2), ('T', NULL); # Happy cases # ----------- # Correlated inner join. # Result: query TT SELECT first_name, fruit FROM people, LATERAL (SELECT fruit, preference FROM preferred_fruits WHERE person_id = id ORDER BY preference ASC LIMIT allowance OFFSET 1) ORDER BY first_name, preference ---- frank banana frank orange frank kiwi frank mango nick pickle rj orange rj banana # Correlated inner join. # Plan: query T multiline EXPLAIN OPTIMIZED PLAN WITH(arity, humanized expressions) AS VERBOSE TEXT FOR SELECT first_name, fruit FROM people, LATERAL (SELECT fruit, preference FROM preferred_fruits WHERE person_id = id ORDER BY preference ASC LIMIT allowance OFFSET 1) ORDER BY first_name, preference ---- Explained Query: Finish order_by=[#1{first_name} asc nulls_last, #4{preference} asc nulls_last] output=[#1, #3] Project (#0{id}..=#2{allowance}, #5{fruit}, #6{preference}) // { arity: 5 } Join on=(#0{id} = #3{id} AND #2{allowance} = #4{allowance}) type=differential // { arity: 7 } ArrangeBy keys=[[#0{id}, #2{allowance}]] // { arity: 3 } ReadStorage materialize.public.people // { arity: 3 } ArrangeBy keys=[[#0{id}, #1{allowance}]] // { arity: 4 } TopK group_by=[#0{id}, #1{allowance}] order_by=[#3{preference} asc nulls_last] limit=integer_to_bigint(#1{allowance}) offset=1 // { arity: 4 } Project (#0{id}, #1{allowance}, #3{fruit}, #4{preference}) // { arity: 4 } Join on=(#0{id} = #2{person_id}) type=differential // { arity: 5 } ArrangeBy keys=[[#0{id}]] // { arity: 2 } Distinct project=[#0{id}, #1{allowance}] // { arity: 2 } Project (#0{id}, #2{allowance}) // { arity: 2 } Filter (#0{id}) IS NOT NULL // { arity: 3 } ReadStorage materialize.public.people // { arity: 3 } ArrangeBy keys=[[#0{person_id}]] // { arity: 3 } Filter (#0{person_id}) IS NOT NULL // { arity: 3 } ReadStorage materialize.public.preferred_fruits // { arity: 3 } Source materialize.public.people Source materialize.public.preferred_fruits filter=((#0{person_id}) IS NOT NULL) Target cluster: quickstart EOF # Outer context is a singleton literal collection. # Result: query TT SELECT first_name, fruit FROM (VALUES (1, 'frank', 4)) AS people(id, first_name, allowance), LATERAL (SELECT fruit, preference FROM preferred_fruits WHERE person_id = id ORDER BY preference ASC LIMIT allowance OFFSET 1) ORDER BY first_name, preference ---- frank banana frank orange frank kiwi frank mango # Outer context is a singleton literal collection. # Plan: query T multiline EXPLAIN OPTIMIZED PLAN WITH(arity, humanized expressions) AS VERBOSE TEXT FOR SELECT first_name, fruit FROM (VALUES (1, 'frank', -4)) AS people(id, first_name, allowance), LATERAL (SELECT fruit, preference FROM preferred_fruits WHERE person_id = id ORDER BY preference ASC LIMIT allowance OFFSET 1) ORDER BY first_name, preference ---- Explained Query: Finish order_by=[#1 asc nulls_last, #4{preference} asc nulls_last] output=[#1, #3] Project (#2..=#4, #0{fruit}, #1{preference}) // { arity: 5 } Map (1, "frank", -4) // { arity: 5 } TopK order_by=[#1{preference} asc nulls_last] limit=-4 offset=1 // { arity: 2 } Project (#1{fruit}, #2{preference}) // { arity: 2 } Filter (#0{person_id} = 1) // { arity: 3 } ReadStorage materialize.public.preferred_fruits // { arity: 3 } Source materialize.public.preferred_fruits filter=((#0{person_id} = 1)) Target cluster: quickstart EOF # More complex limit expression. # Result: query TT SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities WHERE state = s.state LIMIT ascii(substring(state, 1, 1)) - 64) c ORDER BY s.state, c.name LIMIT 3; ---- AZ Phoenix CA Los_Angeles CA San_Diego # More complex limit expression. # Plan: query T multiline EXPLAIN OPTIMIZED PLAN WITH(arity, humanized expressions) AS VERBOSE TEXT FOR SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities WHERE state = s.state LIMIT ascii(substring(state, 1, 1)) - 64) c ORDER BY s.state, c.name LIMIT 3; ---- Explained Query: Finish order_by=[#0{state} asc nulls_last, #1{name} asc nulls_last] limit=3 output=[#0, #1] TopK group_by=[#0{state}] limit=integer_to_bigint((ascii(substr(#0{state}, 1, 1)) - 64)) // { arity: 2 } Project (#1{state}, #0{name}) // { arity: 2 } ReadStorage materialize.public.cities // { arity: 3 } Source materialize.public.cities Target cluster: quickstart EOF # A correlated subquery in the limit clause. # Result: query TT SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL ( SELECT name FROM cities c WHERE state = s.state ORDER BY c.name LIMIT (SELECT l FROM limits WHERE sl = substring(s.state, 1, 1))) c ORDER BY s.state, c.name; ---- AZ Phoenix CA Los_Angeles CA San_Diego IL Chicago NY New_York TX Austin TX Dallas TX Houston TX San_Antonio # A correlated subquery in the limit clause. # Plan: query T multiline EXPLAIN OPTIMIZED PLAN WITH(arity, humanized expressions) AS VERBOSE TEXT FOR SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL ( SELECT name FROM cities c WHERE state = s.state ORDER BY c.name LIMIT (SELECT l FROM limits WHERE sl = substring(s.state, 1, 1))) c ORDER BY s.state, c.name; ---- Explained Query: Finish order_by=[#0{state} asc nulls_last, #1{name} asc nulls_last] output=[#0, #1] With cte l0 = Distinct project=[#0{state}] // { arity: 1 } Project (#1{state}) // { arity: 1 } ReadStorage materialize.public.cities // { arity: 3 } cte l1 = Project (#0{state}, #2{l}) // { arity: 2 } Join on=(#1{sl} = substr(#0{state}, 1, 1)) type=differential // { arity: 3 } ArrangeBy keys=[[substr(#0{state}, 1, 1)]] // { arity: 1 } Get l0 // { arity: 1 } ArrangeBy keys=[[#0{sl}]] // { arity: 2 } Filter (#0{sl}) IS NOT NULL // { arity: 2 } ReadStorage materialize.public.limits // { arity: 2 } cte l2 = Union // { arity: 2 } Get l1 // { arity: 2 } Project (#0{state}, #2) // { arity: 2 } FlatMap guard_subquery_size(#1{count}) // { arity: 3 } Reduce group_by=[#0{state}] aggregates=[count(*)] // { arity: 2 } Project (#0{state}) // { arity: 1 } Get l1 // { arity: 2 } Return // { arity: 2 } Project (#1{state}, #0{name}) // { arity: 2 } TopK group_by=[#1{state}, #2{l}] order_by=[#0{name} asc nulls_last] limit=integer_to_bigint(#2{l}) // { arity: 3 } Project (#0{name}, #1{state}, #3{l}) // { arity: 3 } Join on=(#1{state} = #2{state}) type=differential // { arity: 4 } ArrangeBy keys=[[#1{state}]] // { arity: 2 } Project (#0{name}, #1{state}) // { arity: 2 } ReadStorage materialize.public.cities // { arity: 3 } ArrangeBy keys=[[#0{state}]] // { arity: 2 } Union // { arity: 2 } Get l2 // { arity: 2 } Map (null) // { arity: 2 } Union // { arity: 1 } Negate // { arity: 1 } Distinct project=[#0{state}] // { arity: 1 } Project (#0{state}) // { arity: 1 } Get l2 // { arity: 2 } Get l0 // { arity: 1 } Source materialize.public.cities Source materialize.public.limits filter=((#0{sl}) IS NOT NULL) Target cluster: quickstart EOF # Limit is a cast. # Result: query TT SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities c LIMIT '1'::INT) c; ---- AZ Austin CA Austin IL Austin NY Austin TX Austin # Limit is a cast. # Result: query T multiline EXPLAIN OPTIMIZED PLAN WITH(arity, humanized expressions) AS VERBOSE TEXT FOR SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities c LIMIT '1'::INT) c; ---- Explained Query: CrossJoin type=differential // { arity: 2 } ArrangeBy keys=[[]] // { arity: 1 } Distinct project=[#0{state}] // { arity: 1 } Project (#1{state}) // { arity: 1 } ReadStorage materialize.public.cities // { arity: 3 } ArrangeBy keys=[[]] // { arity: 1 } TopK limit=1 // { arity: 1 } Project (#0{name}) // { arity: 1 } ReadStorage materialize.public.cities // { arity: 3 } Source materialize.public.cities Target cluster: quickstart EOF # A chain of lateral queries with different complex LIMIT expressions. # Result: query TTT rowsort SELECT s.state, c1.name, c2.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities c WHERE state = s.state LIMIT mod(length(s.state), 3)) c1, LATERAL (SELECT name FROM cities c WHERE state = s.state LIMIT mod(length(s.state), 4)) c2; ---- TX Austin Austin TX Austin Dallas TX Dallas Austin TX Dallas Dallas AZ Phoenix Phoenix IL Chicago Chicago NY New_York New_York CA San_Diego San_Diego CA San_Diego Los_Angeles CA Los_Angeles San_Diego CA Los_Angeles Los_Angeles # A chain of lateral queries with different complex LIMIT expressions. # # TODO: this can be further simplified: similarly to how l2 corresponds to the # first lateral join we can have an almost identical variant that corresponds to # the second one followed by a three-way inner join. # # Plan: query T multiline EXPLAIN OPTIMIZED PLAN WITH(arity, humanized expressions) AS VERBOSE TEXT FOR SELECT s.state, c1.name, c2.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities c WHERE state = s.state LIMIT mod(length(s.state), 3)) c1, LATERAL (SELECT name FROM cities c WHERE state = s.state LIMIT mod(length(s.state), 4)) c2; ---- Explained Query: With cte l0 = Project (#0{name}, #1{state}) // { arity: 2 } ReadStorage materialize.public.cities // { arity: 3 } cte l1 = TopK group_by=[#1{state}] limit=integer_to_bigint((char_length(#1{state}) % 3)) // { arity: 2 } Get l0 // { arity: 2 } Return // { arity: 3 } Project (#1{state}, #0{name}, #3{name}) // { arity: 3 } Join on=(#1{state} = #2{state}) type=differential // { arity: 4 } ArrangeBy keys=[[#1{state}]] // { arity: 2 } Get l1 // { arity: 2 } ArrangeBy keys=[[#0{state}]] // { arity: 2 } TopK group_by=[#0{state}] limit=integer_to_bigint((char_length(#0{state}) % 4)) // { arity: 2 } Project (#0{state}, #1{name}) // { arity: 2 } Join on=(#0{state} = #2{state}) type=differential // { arity: 3 } ArrangeBy keys=[[#0{state}]] // { arity: 1 } Distinct project=[#0{state}] // { arity: 1 } Project (#1{state}) // { arity: 1 } Get l1 // { arity: 2 } ArrangeBy keys=[[#1{state}]] // { arity: 2 } Get l0 // { arity: 2 } Source materialize.public.cities Target cluster: quickstart EOF # Corner cases # ------------ # Limit 0 produces no results. query TT SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities c LIMIT 0) c; ---- # Limit 0 reduces to the empty result during optimization. query T multiline EXPLAIN OPTIMIZED PLAN WITH(arity, humanized expressions) AS VERBOSE TEXT FOR SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities c LIMIT 0) c; ---- Explained Query (fast path): Constant Target cluster: quickstart EOF # Limit NULL produces all results. query TT rowsort SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities c LIMIT NULL) c; ---- AZ Austin AZ Dallas CA Austin CA Dallas IL Austin IL Dallas NY Austin NY Dallas TX Austin TX Dallas AZ Chicago AZ Houston AZ Phoenix CA Chicago CA Houston CA Phoenix IL Chicago IL Houston IL Phoenix NY Chicago NY Houston NY Phoenix TX Chicago TX Houston TX Phoenix AZ New_York AZ San_Jose CA New_York CA San_Jose IL New_York IL San_Jose NY New_York NY San_Jose TX New_York TX San_Jose AZ San_Diego CA San_Diego IL San_Diego NY San_Diego TX San_Diego AZ Los_Angeles AZ San_Antonio CA Los_Angeles CA San_Antonio IL Los_Angeles IL San_Antonio NY Los_Angeles NY San_Antonio TX Los_Angeles TX San_Antonio AZ San_Francisco CA San_Francisco IL San_Francisco NY San_Francisco TX San_Francisco # Limit NULL is removed during optimization. (TODO) query T multiline EXPLAIN OPTIMIZED PLAN WITH(arity, humanized expressions) AS VERBOSE TEXT FOR SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities c LIMIT NULL) c; ---- Explained Query: CrossJoin type=differential // { arity: 2 } ArrangeBy keys=[[]] // { arity: 1 } Distinct project=[#0{state}] // { arity: 1 } Project (#1{state}) // { arity: 1 } ReadStorage materialize.public.cities // { arity: 3 } ArrangeBy keys=[[]] // { arity: 1 } Project (#0{name}) // { arity: 1 } ReadStorage materialize.public.cities // { arity: 3 } Source materialize.public.cities Target cluster: quickstart EOF # Expected errors # --------------- # Simple column refs from `cities` are not in scope for `LIMIT `. query error column "name" does not exist SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities LIMIT pow(1000, length(name))) c; # Qualified column refs from `cities c` are not in scope for `LIMIT `. query error column "c.state" does not exist SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities c LIMIT pow(1000, length(c.state))) c; # Invalid LIMIT expression evaluated at runtime. query error Evaluation error: LIMIT must not be negative SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities c LIMIT length(state) - 100) c; # Invalid LIMIT expression evaluated at planning time. query error LIMIT must not be negative SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities c LIMIT -1) c; # Invalid LIMIT expression evaluated at planning time. query error LIMIT must not be negative SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities c LIMIT '-1') c; # Invalid LIMIT expression evaluated at planning time. query error LIMIT must not be negative SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities c LIMIT 3 * 4 - 13) c; # Invalid LIMIT expression evaluated at planning time. query error value out of range: overflow SELECT s.state, c.name FROM (SELECT DISTINCT state FROM cities) s, LATERAL (SELECT name FROM cities LIMIT pow(10000, 1000)) c;