# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.

mode cockroach

simple conn=mz_system,user=mz_system
ALTER SYSTEM SET unsafe_enable_table_keys = true
----
COMPLETE 0

statement ok
CREATE TABLE t (a int, b int)

statement ok
INSERT INTO t (a, b) VALUES (1, 1), (1, 2), (2, 3), (3, 1)

query error aggregate functions are not allowed in WHERE clause \(function pg_catalog.sum\)
SELECT a FROM t WHERE sum(b) = 3 GROUP BY a

query error column "t.b" must appear in the GROUP BY clause or be used in an aggregate function
SELECT b FROM t GROUP BY a

query error column "t.b" must appear in the GROUP BY clause or be used in an aggregate function
SELECT 1 FROM t GROUP BY a ORDER BY b

query error column "t.b" must appear in the GROUP BY clause or be used in an aggregate function
SELECT 1 FROM t GROUP BY a HAVING b > 0

query error column "t.b" must appear in the GROUP BY clause or be used in an aggregate function
SELECT DISTINCT ON (b) a FROM t GROUP BY a ORDER BY b

query error column "t.b" must appear in the GROUP BY clause or be used in an aggregate function
SELECT t.b FROM t GROUP BY a

query error column "t.a" must appear in the GROUP BY clause or be used in an aggregate function
SELECT (SELECT a FROM t GROUP BY b) FROM t

query error column "t1.b" must appear in the GROUP BY clause or be used in an aggregate function
SELECT (SELECT t2.a FROM t t2 WHERE t1.b = t2.b) FROM t t1 GROUP BY t1.a;

query error column "t1.b" must appear in the GROUP BY clause or be used in an aggregate function
SELECT sum(t1.a), (SELECT t2.a FROM t t2 WHERE t1.b = t2.b) FROM t t1;

query error column "c" does not exist
SELECT c FROM t GROUP BY a

query error column "t.c" does not exist
SELECT t.c FROM t GROUP BY a

query IIIR colnames
SELECT 1 AS literal, sum(a) as sum_a, sum(b), avg(a) FROM t
----
literal  sum_a  sum  avg
1        7      7    1.75

query I rowsort
SELECT a FROM t GROUP BY a HAVING sum(b) = 3
----
1
2

query I rowsort
SELECT a + 1 FROM t GROUP BY a + 1 HAVING sum(b) = 3
----
2
3

query II rowsort
SELECT t1.a, (SELECT t2.a FROM t t2 WHERE t2.b = 2 AND t1.b = t2.b GROUP BY t2.a) FROM t t1;
----
1 NULL
2 NULL
3 NULL
1 1

query I
SELECT (SELECT sum(b) FROM t WHERE b = 2 GROUP BY a) FROM t t1 GROUP BY t1.b;
----
2
2
2

# Simple column names in GROUP BY can refer to columns from the output list...
query TII rowsort
SELECT 'dummy', a AS c, sum(b) FROM t GROUP BY c
----
dummy 1 3
dummy 2 3
dummy 3 1

# ...unless they are ambiguous...
query error column reference "c" is ambiguous
SELECT a AS c, sum(b) AS c FROM t GROUP BY c

# ...although ambiguity between the input list and the output list is not an
# error; the column in the input list is preferred.
query II rowsort
SELECT a, sum(b) AS a FROM t GROUP BY a
----
1 3
2 3
3 1

query I rowsort
SELECT a FROM t GROUP BY t.a, t.a
----
1
2
3

query I rowsort
SELECT a FROM t GROUP BY t.a, public.t.a
----
1
2
3

# Smoke test to make sure multiple accumulable and hierarchical reductions work
query IIIII rowsort
SELECT a, count(b), min(b), sum(b), max(b) FROM t GROUP BY a
----
1 2 1 3 2
2 1 3 3 3
3 1 1 1 1

# Test that hinting the group size works
query II rowsort
SELECT a, sum(b) AS a FROM t GROUP BY a OPTIONS (AGGREGATE INPUT GROUP SIZE 100)
----
1 3
2 3
3 1

# unless hint is bad
query error invalid AGGREGATE INPUT GROUP SIZE: cannot use value as number
SELECT a, sum(b) AS a FROM t GROUP BY a OPTIONS (AGGREGATE INPUT GROUP SIZE = 'foo')

query error
SELECT a, sum(b) AS a FROM t GROUP BY a OPTIONS (AGGREGATE INPUT  GROUP SIZE = 0.1)

# Test that an ordinal in a GROUP BY that refers to a column that is an
# expression, rather than a simple column reference, works.
query IT rowsort
SELECT 2 * a, sum(b) FROM t GROUP BY 1
----
2  3
4  3
6  1

# Ensure that the sum of NULLs is NULL.

query T
SELECT sum(column1) FROM (VALUES (NULL::int2), (NULL))
----
NULL

query T
SELECT sum(column1) FROM (VALUES (NULL::int4), (NULL))
----
NULL

query T
SELECT sum(column1) FROM (VALUES (NULL::int8), (NULL))
----
NULL

query T
SELECT sum(column1) FROM (VALUES (NULL::numeric), (NULL))
----
NULL

query TTTT colnames
SHOW COLUMNS FROM t
----
name  nullable  type     comment
 a    true      integer  (empty)
 b    true      integer  (empty)

# Tests on int8 sums to make sure we handle overflow and underflow correctly

statement ok
CREATE TABLE t_bigint (a bigint, b bigint)

statement ok
INSERT INTO t_bigint (a, b) VALUES (1, 1), (1, 2), (2, 9223372036854775807), (2, 9223372036854775807), (3, -9223372036854775808), (3, -9223372036854775808)

query II rowsort
SELECT a, sum(b) FROM t_bigint GROUP BY a
----
1 3
2 18446744073709551614
3 -18446744073709551616

query T colnames
SELECT pg_typeof(sum(b)) FROM t_bigint
----
pg_typeof
numeric

query TT colnames
SELECT pg_typeof(a) as a_type, pg_typeof(b) as b_type FROM t_bigint GROUP BY a_type, b_type
----
a_type b_type
bigint bigint

# Tests to make sure reduce elision works correctly

statement ok
CREATE TABLE agg_pk (a INT PRIMARY KEY, b INT, c BIGINT)

statement ok
INSERT INTO agg_pk VALUES (1, 2, 3), (2, 3, 4), (3, 4, 5)

query II
SELECT a, sum(b) from agg_pk group by a order by a
----
1 2
2 3
3 4

query T multiline
EXPLAIN OPTIMIZED PLAN WITH(humanized expressions, arity, join implementations) AS VERBOSE TEXT FOR SELECT a, sum(b) from agg_pk group by a
----
Explained Query:
  Project (#0{a}, #3) // { arity: 2 }
    Map (integer_to_bigint(#1{b})) // { arity: 4 }
      ReadStorage materialize.public.agg_pk // { arity: 3 }

Source materialize.public.agg_pk

Target cluster: quickstart

EOF

query II
SELECT a, sum(c) from agg_pk group by a order by a
----
1 3
2 4
3 5

query T multiline
EXPLAIN OPTIMIZED PLAN WITH(humanized expressions, arity, join implementations) AS VERBOSE TEXT FOR SELECT a, sum(c) from agg_pk group by a
----
Explained Query:
  Project (#0{a}, #3) // { arity: 2 }
    Map (bigint_to_numeric(#2{c})) // { arity: 4 }
      ReadStorage materialize.public.agg_pk // { arity: 3 }

Source materialize.public.agg_pk

Target cluster: quickstart

EOF

# avg on an integer column should return a decimal with the default decimal
# division scale increase.

query R
SELECT avg(a) FROM t
----
1.75

# But avg on a float column should return a float.

statement ok
CREATE TABLE t2 (a float)

statement ok
INSERT INTO t2 VALUES (1.0), (1.0), (2.0), (3.0)

query R
SELECT avg(a) FROM t2
----
1.75

# avg of an explicit NULL should return an error.

query error db error: ERROR: function sum\(unknown\) is not unique
SELECT avg(NULL)

statement error
SELECT * ORDER BY SUM(fake_column)

query RRRRRR colnames
SELECT variance(a), var_samp(a), var_pop(a), stddev(a), stddev_samp(a), stddev_pop(a) FROM t
----
variance        var_samp        var_pop         stddev          stddev_samp     stddev_pop
0.916666666666666666666666666666666666667  0.916666666666666666666666666666666666667  0.6875  0.957427107756338109975101911369821553037  0.957427107756338109975101911369821553037  0.829156197588849962278733184167671670982

query RRRRRR
SELECT variance(a), var_samp(a), var_pop(a), stddev(a), stddev_samp(a), stddev_pop(a) FROM t2
----
0.9166666666666666  0.9166666666666666  0.6875  0.9574271077563381  0.9574271077563381  0.82915619758885

# TODO(benesch): these filter tests are copied from cockroach/aggregate.slt;
# remove them from here when we can run that file in its entirely.

statement ok
CREATE TABLE filter_test (
  k INT,
  v INT,
  mark BOOL
)

statement OK
INSERT INTO filter_test VALUES
(1, 2, false),
(3, 4, true),
(5, NULL, true),
(6, 2, true),
(7, 2, true),
(8, 4, true),
(NULL, 4, true)

# FILTER should eliminate some results.
query II rowsort
SELECT v, count(*) FILTER (WHERE k > 5) FROM filter_test GROUP BY v
----
2 2
4 1
NULL 0

# Test multiple filters
query IBIII rowsort
SELECT v, mark, count(*) FILTER (WHERE k > 5), count(*), max(k) FILTER (WHERE k < 8) FROM filter_test GROUP BY v, mark
----
2 false 0 1 1
2 true 2 2 7
4 true 1 3 3
NULL true 0 1 5

query error FILTER specified, but abs is not an aggregate function
SELECT abs(1) FILTER (WHERE false)

query error Expected end of statement, found left parenthesis
SELECT column1 FILTER (WHERE column1 = 1) FROM (VALUES (1))

query error db error: ERROR: aggregate functions are not allowed in FILTER \(function pg_catalog\.count\)
SELECT v, count(*) FILTER (WHERE count(1) > 5) FROM filter_test GROUP BY v

# These filter tests are Materialize-specific.

# Test avg, which needs to propgate the filter through its implementation.
query IR rowsort
SELECT v, avg(k) FILTER (WHERE k > 5) FROM filter_test GROUP BY v
----
2 6.5
4 8
NULL NULL

# Similarly for variance and stddev.
query IRR rowsort
SELECT v, variance(k) FILTER (WHERE k > 5), stddev(k) FILTER (WHERE k > 5) FROM filter_test GROUP BY v
----
2  0.5  0.707106781186547524400844362104849039285
4  NULL  NULL
NULL  NULL  NULL


# Multiple tests related to distinctness of aggregates on constants (issue database-issues#887)
query I rowsort
select count(distinct column1) from (values (1)) _;
----
1

query I rowsort
select count(distinct column1) from (values (1), (2), (1), (4)) _;
----
3

query I rowsort
select sum(distinct column1) from (values (1), (2), (1), (4)) _;
----
7

query error count\(\*\) must be used to call a parameterless aggregate function
SELECT count()

query error db error: ERROR: function sum\(\) does not exist
SELECT sum(*)

# Ensure int2 has its own max implementation
query I
SELECT max(column1) FROM (VALUES (1::int2), (-1::int2));
----
1

query T
SELECT pg_typeof(max(column1)) FROM (VALUES (1::int2), (-1::int2));
----
smallint

# ORDER BY

query TTTT
WITH
    v (a, b)
        AS (
            VALUES
                ('a', 1),
                ('b', 2),
                ('c', 0),
                ('d', 2)
        )
SELECT
    list_agg(a ORDER BY a DESC)::text AS a_desc,
    list_agg(a ORDER BY b)::text AS b,
    list_agg(a ORDER BY b, a ASC)::text AS b_a_asc,
    list_agg(a ORDER BY b, a DESC)::text AS b_a_desc
FROM
    v
----
{d,c,b,a}  {c,a,b,d}  {c,a,b,d}  {c,a,d,b}

query TTTTT
WITH
    v (a, b)
        AS (
            VALUES
                ('a', 1),
                ('b', 2),
                ('c', 0),
                ('d', 2),
                ('e', NULL)
        )
SELECT
    array_agg(a ORDER BY a DESC)::text AS a_desc,
    array_agg(a ORDER BY b)::text AS b,
    array_agg(a ORDER BY b, a ASC)::text AS b_a_asc,
    array_agg(a ORDER BY b, a DESC)::text AS b_a_desc,
    array_agg(a ORDER BY b NULLS FIRST, a DESC)::text AS b_a_desc_nulls_first
FROM
    v
----
{e,d,c,b,a}  {c,a,b,d,e}  {c,a,b,d,e}  {c,a,d,b,e}  {e,c,a,d,b}

query TTTT
WITH
    v (a, b)
        AS (
            VALUES
                ('a', 1),
                ('b', 2),
                ('c', 0),
                ('d', 2)
        )
SELECT
    string_agg(a, '-' ORDER BY a DESC)::text AS a_desc,
    string_agg(a, '-' ORDER BY b)::text AS b,
    string_agg(a, '-' ORDER BY b, a ASC)::text AS b_a_asc,
    string_agg(a, '-' ORDER BY b, a DESC)::text AS b_a_desc
FROM
    v
----
d-c-b-a c-a-b-d c-a-b-d c-a-d-b

query TTTT
WITH
    v (a, b)
        AS (
            VALUES
                ('a', 1),
                ('b', 2),
                ('c', 0),
                ('d', 2)
        )
SELECT
    jsonb_agg(a ORDER BY a DESC)::text AS a_desc,
    jsonb_agg(a ORDER BY b)::text AS b,
    jsonb_agg(a ORDER BY b, a ASC)::text AS b_a_asc,
    jsonb_agg(a ORDER BY b, a DESC)::text AS b_a_desc
FROM
    v
----
["d","c","b","a"]  ["c","a","b","d"]  ["c","a","b","d"]  ["c","a","d","b"]

query TTTT
WITH
    v (a, b)
        AS (
            VALUES
                ('a', 1),
                ('b', 2),
                ('c', 0),
                ('d', 2)
        )
SELECT
    jsonb_object_agg(b, a ORDER BY a DESC)::text AS a_desc,
    jsonb_object_agg(b, a ORDER BY b)::text AS b,
    jsonb_object_agg(b, a ORDER BY b, a ASC)::text AS b_a_asc,
    jsonb_object_agg(b, a ORDER BY b, a DESC)::text AS b_a_desc
FROM
    v
----
{"0":"c","1":"a","2":"b"}  {"0":"c","1":"a","2":"d"}  {"0":"c","1":"a","2":"d"}  {"0":"c","1":"a","2":"b"}


# Test Reduction elision

statement ok
CREATE TABLE a (x text)

statement ok
INSERT INTO a VALUES ('a'),('b')

statement ok
CREATE TABLE qs (q int not null)

query T
SELECT STRING_AGG(x, ',') FROM (SELECT * FROM a ORDER BY x);
----
a,b

query T
SELECT STRING_AGG(x, ',') FROM (SELECT * FROM a ORDER BY x limit 1);
----
a

query T
SELECT STRING_AGG(x, ',') FROM (SELECT * FROM (SELECT 'a' as x UNION ALL SELECT 'b' as x) ORDER BY x)
----
a,b

query T
SELECT STRING_AGG(x, ',') FROM (SELECT * FROM (SELECT 'a' as x UNION ALL SELECT 'b' as x) ORDER BY x limit 1)
----
a

query T
SELECT STRING_AGG(x, ',') from (SELECT TRUE::text as x FROM(SELECT AVG(0) FROM qs))
----
true

query T
SELECT LIST_AGG(x)::text FROM (SELECT * FROM a ORDER BY x)
----
{a,b}

query T
SELECT LIST_AGG(x)::text FROM (SELECT * FROM a ORDER BY x limit 1)
----
{a}

query T
SELECT LIST_AGG(x)::text FROM (SELECT * FROM (SELECT 'a' as x UNION ALL SELECT 'b' as x) ORDER BY x)
----
{a,b}

query T
SELECT LIST_AGG(x)::text FROM (SELECT * FROM (SELECT 'a' as x UNION ALL SELECT 'b' as x) ORDER BY x limit 1)
----
{a}

query T
SELECT LIST_AGG(x)::text from (SELECT TRUE::text as x FROM(SELECT AVG(0) FROM qs))
----
{true}

query T
SELECT ARRAY_AGG(x) FROM (SELECT * FROM a ORDER BY x)
----
{a,b}

query T
SELECT ARRAY_AGG(x) FROM (SELECT * FROM a ORDER BY x limit 1)
----
{a}

query T
SELECT ARRAY_AGG(x) FROM (SELECT * FROM (SELECT 'a' as x UNION ALL SELECT 'b' as x) ORDER BY x)
----
{a,b}

query T
SELECT ARRAY_AGG(x) FROM (SELECT * FROM (SELECT 'a' as x UNION ALL SELECT 'b' as x) ORDER BY x limit 1)
----
{a}

query T
SELECT ARRAY_AGG(x) from (SELECT TRUE::text as x FROM(SELECT AVG(0) FROM qs))
----
{true}

query T
SELECT JSONB_AGG(x) FROM (SELECT * FROM a ORDER BY x)
----
["a","b"]

query T
SELECT JSONB_AGG(x) FROM (SELECT * FROM a ORDER BY x limit 1)
----
["a"]

query T
SELECT JSONB_AGG(x) FROM (SELECT * FROM (SELECT 'a' as x UNION ALL SELECT 'b' as x) ORDER BY x)
----
["a","b"]

query T
SELECT JSONB_AGG(x) FROM (SELECT * FROM (SELECT 'a' as x UNION ALL SELECT 'b' as x) ORDER BY x limit 1)
----
["a"]

query T
SELECT JSONB_AGG(x) from (SELECT TRUE::text as x FROM(SELECT AVG(0) FROM qs))
----
["true"]

query T
SELECT JSONB_OBJECT_AGG(a,b) FROM (SELECT * FROM t ORDER BY a)
----
{"1":2,"2":3,"3":1}

query T
SELECT JSONB_OBJECT_AGG(a,b) FROM (SELECT * FROM t ORDER BY a limit 1)
----
{"1":1}

query T
SELECT JSONB_OBJECT_AGG(a,b) FROM (SELECT * FROM (SELECT 'a' as a,'b' as b UNION ALL SELECT 'c' as a,'d' as b) ORDER by a)
----
{"a":"b","c":"d"}

query T
SELECT JSONB_OBJECT_AGG(a,b) FROM (SELECT * FROM (SELECT 'a' as a,'b' as b UNION ALL SELECT 'c' as a,'d' as b) ORDER by a limit 1)
----
{"a":"b"}

query T
SELECT JSONB_OBJECT_AGG(a,b) from (SELECT TRUE::text as a, FALSE::text as b FROM(SELECT AVG(0) FROM qs))
----
{"true":"false"}

query TI
SELECT a.*, ROW_NUMBER() over () FROM (SELECT * FROM a ORDER BY x) a
----
a  1
b  2

query TI
SELECT a.*, ROW_NUMBER() over () FROM (SELECT * FROM a ORDER BY x limit 1) a
----
a  1


query TI
SELECT a.*, ROW_NUMBER() OVER() FROM (SELECT * FROM (SELECT 'a' as x UNION ALL SELECT 'b' as x) ORDER BY x) a
----
a  1
b  2

query TI
SELECT a.*, ROW_NUMBER() OVER() FROM (SELECT * FROM (SELECT 'a' as x UNION ALL SELECT 'b' as x) ORDER BY x limit 1) a
----
a  1

query TI
SELECT a.*, ROW_NUMBER() OVER() from (SELECT TRUE::text as x FROM(SELECT AVG(0) FROM qs)) a
----
true 1

statement ok
CREATE TABLE t_16 (i16 smallint)

statement ok
INSERT INTO t_16 VALUES (0), (-1), (1)

query TT
SELECT MIN(i16), MAX(i16) from t_16
----
-1 1

# Verify that the behavior of `stddev` is sane when variance is small

statement ok
CREATE TABLE t_variance (x float)

statement ok
INSERT INTO t_variance VALUES (0.2)

query I
SELECT stddev(x) FROM t_variance
----
NULL

statement ok
INSERT INTO t_variance VALUES (0.2)

query I
SELECT stddev(x) FROM t_variance
----
0

query I
SELECT stddev(x) FROM generate_series(0, -1) empty(x)
----
NULL


# Should include two sum(*) aggregates.
query T multiline
EXPLAIN RAW PLAN FOR SELECT stddev(x), sum(x) FROM t_variance;
----
Project (#4, #3)
  Map (sqrtf64(case when (((#0{?column?} - ((#1{?column?} * #1{?column?}) / bigint_to_double(case when (#2{?column?} = integer_to_bigint(0)) then null else #2{?column?} end))) / bigint_to_double(case when ((#2{?column?} - integer_to_bigint(1)) = integer_to_bigint(0)) then null else (#2{?column?} - integer_to_bigint(1)) end))) IS NULL then null else greatest(((#0{?column?} - ((#1{?column?} * #1{?column?}) / bigint_to_double(case when (#2{?column?} = integer_to_bigint(0)) then null else #2{?column?} end))) / bigint_to_double(case when ((#2{?column?} - integer_to_bigint(1)) = integer_to_bigint(0)) then null else (#2{?column?} - integer_to_bigint(1)) end)), integer_to_double(0)) end))
    Reduce aggregates=[sum((#0{x} * #0{x})), sum(#0{x}), count(#0{x}), sum(#0{x})]
      Get materialize.public.t_variance

Target cluster: quickstart

EOF

# Should include only one sum(*) aggregate.
query T multiline
EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT stddev(x), sum(x) FROM t_variance;
----
Explained Query:
  With
    cte l0 =
      Reduce aggregates=[sum((#0{x} * #0{x})), sum(#0{x}), count(#0{x})]
        ReadStorage materialize.public.t_variance
  Return
    Project (#4, #3{sum_x})
      Map (sqrtf64(case when ((#0{sum}) IS NULL OR (#1{sum_x}) IS NULL OR (case when (#2{count_x} = 0) then null else #2{count_x} end) IS NULL OR (case when (0 = (#2{count_x} - 1)) then null else (#2{count_x} - 1) end) IS NULL) then null else greatest(((#0{sum} - ((#1{sum_x} * #1{sum_x}) / bigint_to_double(case when (#2{count_x} = 0) then null else #2{count_x} end))) / bigint_to_double(case when (0 = (#2{count_x} - 1)) then null else (#2{count_x} - 1) end)), 0) end))
        Union
          Project (#0{sum}..=#2{count_x}, #1{sum_x})
            Get l0
          Map (null, null, 0, null)
            Union
              Negate
                Project ()
                  Get l0
              Constant
                - ()

Source materialize.public.t_variance

Target cluster: quickstart

EOF