# Copyright 2015 - 2019 The Cockroach Authors. All rights reserved. # Copyright Materialize, Inc. and contributors. All rights reserved. # # Use of this software is governed by the Business Source License # included in the LICENSE file at the root of this repository. # # As of the Change Date specified in that file, in accordance with # the Business Source License, use of this software will be governed # by the Apache License, Version 2.0. # # This file is derived from the logic test suite in CockroachDB. The # original file was retrieved on June 10, 2019 from: # # https://github.com/cockroachdb/cockroach/blob/d2f7fbf5dd1fc1a099bbad790a2e1f7c60a66cc3/pkg/sql/logictest/testdata/logic_test/distinct_on # # The original source code is subject to the terms of the Apache # 2.0 license, a copy of which can be found in the LICENSE file at the # root of this repository. mode cockroach simple conn=mz_system,user=mz_system ALTER SYSTEM SET unsafe_enable_table_keys = true ---- COMPLETE 0 statement ok CREATE TABLE xyz ( x INT, y INT, z INT, pk1 INT, pk2 INT, PRIMARY KEY (pk1, pk2) ) statement ok INSERT INTO xyz VALUES (1, 1, NULL, 1, 1), (1, 1, 2, 2, 2), (1, 1, 2, 3, 3), (1, 2, 1, 4, 4), (2, 2, 3, 5, 5), (4, 5, 6, 6, 6), (4, 1, 6, 7, 7) statement ok CREATE TABLE abc ( a STRING, b STRING, c STRING, PRIMARY KEY (a, b, c) ) statement ok INSERT INTO abc VALUES ('1', '1', '1'), ('1', '1', '2'), ('1', '2', '2') ################## # Simple queries # ################## # 3/3 columns query III rowsort SELECT DISTINCT ON (x, y, z) x, y, z FROM xyz ---- 1 1 NULL 1 1 2 1 2 1 2 2 3 4 5 6 4 1 6 query I rowsort SELECT DISTINCT ON (y, x, z) x FROM xyz ---- 1 1 1 2 4 4 query I rowsort SELECT DISTINCT ON (z, y, x) z FROM xyz ---- NULL 2 1 3 6 6 query TTT rowsort SELECT DISTINCT ON (b, c, a) a, c, b FROM abc ---- 1 1 1 1 2 1 1 2 2 query T rowsort SELECT DISTINCT ON (b, c, a) a FROM abc ---- 1 1 1 # We need to rowsort this since the ORDER BY isn't on the entire SELECT columns. query T rowsort SELECT DISTINCT ON (c, a, b) b FROM abc ORDER BY b ---- 1 1 2 # 2/3 columns query II rowsort SELECT DISTINCT ON (x, y) y, x FROM xyz ---- 1 1 2 1 2 2 5 4 1 4 query I rowsort SELECT DISTINCT ON (y, x) x FROM xyz ---- 1 1 2 4 4 query I rowsort SELECT DISTINCT ON (x, y) y FROM xyz ---- 1 2 2 5 1 query TT SELECT DISTINCT ON (a, c) a, b FROM abc ORDER BY a, c, b ---- 1 1 1 1 # We wrap this with an ORDER BY otherwise this would be non-deterministic. query TTT SELECT DISTINCT ON (c, a) b, c, a FROM abc ORDER BY c, a, b DESC ---- 1 1 1 2 2 1 # 1/3 columns query I rowsort SELECT DISTINCT ON (y) y FROM xyz ---- 1 2 5 query T rowsort SELECT DISTINCT ON (c) a FROM abc ---- 1 1 query T rowsort SELECT DISTINCT ON (b) b FROM abc ---- 1 2 # We wrap this with an ORDER BY otherwise this would be non-deterministic. query TTT SELECT DISTINCT ON (a) a, b, c FROM abc ORDER BY a, b, c ---- 1 1 1 query TT SELECT DISTINCT ON (a) a, c FROM abc ORDER BY a, c DESC, b ---- 1 2 ################# # With ORDER BY # ################# statement error SELECT DISTINCT ON expressions must match initial ORDER BY expressions SELECT DISTINCT ON (x) x, y, z FROM xyz ORDER BY y statement error SELECT DISTINCT ON expressions must match initial ORDER BY expressions SELECT DISTINCT ON (y) x, y, z FROM xyz ORDER BY x, y statement error SELECT DISTINCT ON expressions must match initial ORDER BY expressions SELECT DISTINCT ON (y, z) x, y, z FROM xyz ORDER BY x query I SELECT DISTINCT ON (x) x FROM xyz ORDER BY x DESC ---- 4 2 1 # We add a filter to eliminate one of the rows that may be flakily returned # depending on parallel execution of DISTINCT ON. # Note: Result differs from Cockroach but matches Postgres. query III SELECT DISTINCT ON (x, z) y, z, x FROM xyz WHERE (x,y,z) != (4, 1, 6) ORDER BY z ---- 2 1 1 1 2 1 2 3 2 5 6 4 1 NULL 1 # Note: Result differs from Cockroach but matches Postgres. query III SELECT DISTINCT ON (x) y, z, x FROM xyz ORDER BY x ASC, z DESC, y DESC ---- 1 NULL 1 2 3 2 5 6 4 # Regression test for cockroach#35437: Discard extra ordering columns after performing # DISTINCT operation. query T SELECT (SELECT DISTINCT ON (a) a FROM abc ORDER BY a, b||'foo') || 'bar'; ---- 1bar ##################### # With aggregations # ##################### statement error column "xyz.y" must appear in the GROUP BY clause or be used in an aggregate function SELECT DISTINCT ON(max(x)) y FROM xyz statement error column "xyz.z" must appear in the GROUP BY clause or be used in an aggregate function SELECT DISTINCT ON(max(x), z) min(y) FROM xyz query I SELECT DISTINCT ON (max(x)) min(y) FROM xyz ---- 1 query I SELECT DISTINCT ON (min(x)) max(y) FROM xyz ---- 5 query T SELECT DISTINCT ON(min(a), max(b), min(c)) max(c) FROM abc ---- 2 ################# # With GROUP BY # ################# statement error column "xyz.x" must appear in the GROUP BY clause or be used in an aggregate function SELECT DISTINCT ON (x) min(x) FROM xyz GROUP BY y query I rowsort SELECT DISTINCT ON(y) min(x) FROM xyz GROUP BY y ---- 1 1 4 query I SELECT DISTINCT ON(min(x)) min(x) FROM xyz GROUP BY y HAVING min(x) = 1 ---- 1 ######################### # With window functions # ######################### skipif postgresql # TODO(benesch): support row_number query I rowsort SELECT DISTINCT ON(row_number() OVER(ORDER BY (pk1, pk2))) y FROM xyz ---- 1 1 1 2 2 5 1 skipif postgresql # TODO(benesch): support row_number query I SELECT DISTINCT ON(row_number() OVER(ORDER BY (pk1, pk2))) y FROM xyz ORDER BY row_number() OVER(ORDER BY (pk1, pk2)) DESC ---- 1 5 2 2 1 1 1 ########################### # With ordinal references # ########################### statement error column reference 2 in DISTINCT ON clause is out of range \(1 - 1\) SELECT DISTINCT ON (2) x FROM xyz query I rowsort SELECT DISTINCT ON (1) x FROM xyz ---- 1 2 4 query III rowsort SELECT DISTINCT ON (1,2,3) x, y, z FROM xyz ---- 1 1 NULL 1 1 2 1 2 1 2 2 3 4 5 6 4 1 6 ######################### # With alias references # ######################### # This should prioritize alias (use 'x' as the key). # This would be non-deterministic if we don't select y (actually x) from the # subquery. query I rowsort SELECT y FROM (SELECT DISTINCT ON(y) x AS y, y AS x FROM xyz) ---- 1 2 4 # Ignores the alias. query I rowsort SELECT DISTINCT ON(x) x AS y FROM xyz ---- 1 2 4 ################################## # With nested parentheses/tuples # ################################## query II rowsort SELECT DISTINCT ON(((x)), (x, y)) x, y FROM xyz ---- 1 1 1 2 2 2 4 5 4 1 ################################ # Hybrid PK and non-PK queries # ################################ # We need to rowsort this since the ORDER BY isn't on the entire SELECT columns. query III rowsort SELECT DISTINCT ON(pk1, pk2, x, y) x, y, z FROM xyz ORDER BY x, y ---- 1 1 NULL 1 1 2 1 1 2 1 2 1 2 2 3 4 1 6 4 5 6 # Ordering only propagates up until distinctNode. # pk1 ordering does not propagate at all since it's not explicitly needed. # We add a filter since there could be multiple valid pk1s otherwise for distinct # rows. query I rowsort SELECT DISTINCT ON (x, y, z) pk1 FROM (SELECT * FROM xyz WHERE x >= 2) ORDER BY x ---- 5 6 7 # Regression tests for cockroach#34112: distinct on constant column. query II SELECT DISTINCT ON (x) x, y FROM xyz WHERE x = 1 ORDER BY x, y ---- 1 1 query I SELECT count(*) FROM (SELECT DISTINCT ON (x) x, y FROM xyz WHERE x = 1 ORDER BY x, y) ---- 1