rust
/
Materialize


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
							# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.

$ set-sql-timeout duration=300s
$ set-arg-default default-replica-size=1

# Test for a subset of the information returned by introspection sources.
# The test focuses on computing answers that are independent of particular
# timing measurement data, even if these stable answers may take some time
# to appear in the results of queries to introspection sources.

# Note that we count on the retry behavior of testdrive in this test
# since introspection sources may take some time to catch up.
$ set-sql-timeout duration=60s

# Introspection subscribes add noise to the introspection sources, so disable them.
$ postgres-execute connection=postgres://mz_system:materialize@${testdrive.materialize-internal-sql-addr}
ALTER SYSTEM SET enable_introspection_subscribes = false

# In case the environment has other replicas
> CREATE CLUSTER test SIZE '4-4'
> SET cluster = test

> CREATE TABLE t (a int)

> CREATE MATERIALIZED VIEW mv AS SELECT * FROM t

> SELECT COUNT(*)
  FROM
    mz_materialized_views AS views,
    mz_introspection.mz_compute_exports AS compute_exports,
    mz_introspection.mz_compute_import_frontiers_per_worker AS import_frontiers
  WHERE
    views.name = 'mv' AND
    views.id = compute_exports.export_id AND
    compute_exports.export_id = import_frontiers.export_id AND
    time > 0
16

> CREATE VIEW vv AS SELECT * FROM t

> SELECT COUNT(*) FROM (
    SELECT import_frontiers.export_id, import_frontiers.import_id
    FROM mz_introspection.mz_compute_import_frontiers AS import_frontiers
    WHERE export_id LIKE 'u%'
  )
1

> CREATE DEFAULT INDEX ON vv

> SELECT COUNT(*) FROM (
    SELECT import_frontiers.export_id, import_frontiers.import_id
    FROM mz_introspection.mz_compute_import_frontiers AS import_frontiers
    WHERE export_id LIKE 'u%'
  )
2

> SELECT COUNT(*)
  FROM
    mz_views AS views,
    mz_indexes AS indexes,
    mz_introspection.mz_compute_exports compute_exports,
    mz_introspection.mz_compute_import_frontiers_per_worker AS import_frontiers
  WHERE
    views.name = 'vv' AND
    views.id = indexes.on_id AND
    indexes.id = compute_exports.export_id AND
    compute_exports.export_id = import_frontiers.export_id AND
    time > 0
16

> CREATE MATERIALIZED VIEW mvv AS SELECT * FROM vv

> SELECT COUNT(*) FROM (
    SELECT import_frontiers.export_id, import_frontiers.import_id
    FROM mz_introspection.mz_compute_import_frontiers AS import_frontiers
    WHERE export_id LIKE 'u%'
  )
3

> DROP MATERIALIZED VIEW mvv

> SELECT COUNT(*) FROM (
    SELECT import_frontiers.export_id, import_frontiers.import_id
    FROM mz_introspection.mz_compute_import_frontiers AS import_frontiers
    WHERE export_id LIKE 'u%'
  )
2

> DROP INDEX vv_primary_idx

> SELECT COUNT(*) FROM (
    SELECT import_frontiers.export_id, import_frontiers.import_id
    FROM mz_introspection.mz_compute_import_frontiers AS import_frontiers
    WHERE export_id LIKE 'u%'
  )
1

> DROP MATERIALIZED VIEW mv

> SELECT COUNT(*) FROM (
    SELECT import_frontiers.export_id, import_frontiers.import_id
    FROM mz_introspection.mz_compute_import_frontiers AS import_frontiers
    WHERE export_id LIKE 'u%'
  )
0

# Test that frontiers of introspection sources advance at all.

! SELECT * FROM mz_introspection.mz_active_peeks AS OF 0
contains: Timestamp (0) is not valid for all inputs

# Test that logged subscribe frontiers advance beyond 0.

$ set-regex match=\d{13} replacement=<TIMESTAMP>

> BEGIN
> DECLARE c CURSOR FOR SUBSCRIBE (
  SELECT true
  FROM mz_introspection.mz_compute_frontiers f, mz_internal.mz_subscriptions s
  WHERE f.export_id = s.id AND time > 0)
> FETCH 1 c WITH (timeout='20s')
<TIMESTAMP> 1 true
> COMMIT

# Test that mz_compute_exports contains correct dataflow IDs.

> CREATE MATERIALIZED VIEW my_unique_mv_name AS SELECT * FROM t

> SELECT count(*)
  FROM
    mz_materialized_views mv,
    mz_introspection.mz_compute_exports exp,
    mz_introspection.mz_dataflows df
  WHERE
    mv.name = 'my_unique_mv_name' AND
    mv.id = exp.export_id AND
    exp.dataflow_id = df.id AND
    df.name LIKE '%my_unique_mv_name%'
1

# Test that each operator has at most one parent

> SELECT max(count) FROM (
    SELECT count(*)
    FROM mz_introspection.mz_dataflow_operator_parents
    GROUP BY id) counts
1

# Test that certain `*_per_worker` relations include the same data for each
# worker. The corresponding global relations rely on this to justify showing
# only the data from worker 0.

> SELECT DISTINCT count(*)
  FROM mz_introspection.mz_dataflows_per_worker
  GROUP BY id, name
16

> SELECT DISTINCT count(*)
  FROM mz_introspection.mz_dataflow_addresses_per_worker
  GROUP BY id, address
16

> SELECT DISTINCT count(*)
  FROM mz_introspection.mz_dataflow_channels_per_worker
  GROUP BY id, from_index, from_port, to_index, to_port
16

> SELECT DISTINCT count(*)
  FROM mz_introspection.mz_dataflow_operators_per_worker
  GROUP BY id, name
16

> SELECT DISTINCT count(*)
  FROM mz_introspection.mz_dataflow_operator_dataflows_per_worker
  GROUP BY id, name, dataflow_id, dataflow_name
16

> SELECT DISTINCT count(*)
  FROM mz_introspection.mz_dataflow_channel_operators_per_worker
  GROUP BY id, from_operator_id, to_operator_id
16

> SELECT DISTINCT count(*)
  FROM mz_introspection.mz_compute_exports_per_worker
  GROUP BY export_id, dataflow_id
16

> CREATE DEFAULT INDEX ON t
> SELECT DISTINCT count(*)
  FROM mz_introspection.mz_arrangement_sharing_per_worker
  GROUP BY operator_id, count
16

> DROP INDEX t_primary_idx

# Test that the roll-up of arrangement sizes is correct

> INSERT INTO t SELECT * FROM generate_series(1, 100)

> CREATE TABLE t2 (b int)
> INSERT INTO t2 SELECT * FROM generate_series(1, 200)

> CREATE VIEW vv_arr AS SELECT sum(a) FROM t JOIN t2 ON t.a = t2.b

> CREATE MATERIALIZED VIEW mv_arr AS SELECT * FROM vv_arr

> CREATE DEFAULT INDEX ii_arr ON vv_arr

# It's hard to come up with precise bounds because we might de-duplicate some data in arrangements.
> SELECT records >= 300, size > 0 FROM mz_introspection.mz_dataflow_arrangement_sizes WHERE name LIKE '%ii_arr' OR name LIKE '%mv_arr'
true true
true true

# Test that non-arranging dataflows show up in `mz_dataflow_arrangement_sizes`

> CREATE TABLE t3 (c int)

> CREATE DEFAULT INDEX ii_empty ON t3

> SELECT records, size < 16 * 1024, allocations < 512 FROM mz_introspection.mz_dataflow_arrangement_sizes WHERE name LIKE '%ii_empty'
<null> true true

# Tests that arrangement sizes are approximate

> CREATE TABLE t4 (c int8)

> CREATE INDEX ii_t4 ON t4(c)

# We have 16 workers, and only want to ensure that the sizes are not egregious.

> SELECT records, size < 16 * 1024, allocations < 512 FROM mz_introspection.mz_dataflow_arrangement_sizes WHERE name LIKE '%ii_t4'
<null> true true

> INSERT INTO t4 SELECT 1

> SELECT records, size < 16 * 1024, allocations > 0 FROM mz_introspection.mz_dataflow_arrangement_sizes WHERE name LIKE '%ii_t4'
1 true true

> INSERT INTO t4 SELECT generate_series(1, 1000)

# Determining exact sizes is difficult because of deduplication in arrangements, so we just use safe values.
> SELECT records >= 1000 AND records <= 1001, batches > 0, size > 0 AND size < 4*30000, capacity > 0, allocations > 0 FROM mz_introspection.mz_dataflow_arrangement_sizes WHERE name LIKE '%ii_t4'
true true true true true

> DROP INDEX ii_t4

> SELECT records, batches, size, capacity, allocations FROM mz_introspection.mz_dataflow_arrangement_sizes WHERE name LIKE '%ii_t4'

# Test arrangement size logging for error dataflows

> CREATE TABLE t5(a int)

> INSERT INTO t5 SELECT -a FROM generate_series(1, 10000) g(a)

> CREATE VIEW vt5 AS SELECT a::uint2 FROM t5

> CREATE INDEX vt5_idx ON vt5(a)

# Cast to numeric to get its rounding behavior
> SELECT records, (size::numeric/1024/1024)::int FROM mz_introspection.mz_dataflow_arrangement_sizes WHERE name LIKE '%vt5_idx'
10000 1

> DROP TABLE t5 CASCADE

# Test that `mz_dataflow_arrangement_sizes` shows dataflows not contained in the catalog.

> CREATE TABLE t6 (a int)
> INSERT INTO t6 SELECT generate_series(1, 1000)
> CREATE INDEX ii_t6 ON t6 (a)
> CREATE VIEW t6_plus_1 AS SELECT a + 1 AS b FROM t6
> CREATE INDEX ii_t6_plus_1 ON t6_plus_1 (b)
> DROP INDEX ii_t6

> SELECT name, records > 0 FROM mz_introspection.mz_dataflow_arrangement_sizes WHERE name LIKE '%ii_t6%'
"Dataflow: materialize.public.ii_t6"        true
"Dataflow: materialize.public.ii_t6_plus_1" true

# Test that `mz_dataflow_arrangement_sizes` produces sensible results with accumulable reductions.

> CREATE TABLE ten (f1 integer);

> INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);

> CREATE MATERIALIZED VIEW c1 AS
  SELECT COUNT(DISTINCT 100 * a1.f1 + 10 * a2.f1 + a3.f1)
  FROM ten AS a1, ten AS a2, ten AS a3;

> CREATE MATERIALIZED VIEW c2 AS
  SELECT 100 * a1.f1 + 10 * a2.f1 + a3.f1, COUNT(*)
  FROM ten AS a1, ten AS a2, ten AS a3
  GROUP BY 1
  HAVING COUNT(*) > 1;

> SELECT
    records > 2 * 1000,
    records < 2 * 2 * 1000,
    size > 0,
    size < 4 * 130 * 1000,
    allocations < 2 * 2 * 1000
  FROM mz_introspection.mz_dataflow_arrangement_sizes
  WHERE name LIKE '%c1%';
true true true true true

> SELECT
    records > 1000,
    records < 2 * 1000,
    size > 0,
    size < 4 * 100 * 1024,
    allocations < 2 * 1000
  FROM mz_introspection.mz_dataflow_arrangement_sizes
  WHERE name LIKE '%c2%';
true true true true true

# For coverage, we also include a recursive materialized view to account for dynamic timestamps.
> CREATE MATERIALIZED VIEW rec AS
  WITH MUTUALLY RECURSIVE (ERROR AT RECURSION LIMIT 3) term (key int, iter int) AS (
    SELECT 100 * a1.f1 + 10 * a2.f1 + a3.f1 AS key, 0 AS iter
    FROM ten AS a1, ten AS a2, ten AS a3
    UNION
    SELECT key, iter
    FROM term
    UNION
    SELECT key, MAX(iter) + 1 AS iter
    FROM term
    GROUP BY key
    HAVING MAX(iter) < 1
    OPTIONS (AGGREGATE INPUT GROUP SIZE = 1)
  )
  SELECT * FROM term;

> SELECT
    records > 0,
    records < 2 * 12 * 1000,
    size > 0,
    size < 4 * 1000 * 1000,
    allocations < 2 * 12 * 1000
  FROM mz_introspection.mz_dataflow_arrangement_sizes
  WHERE name LIKE '%rec%';
true true true true true

> DROP TABLE ten CASCADE;

# Test mz_dataflow_arrangement_sizes with hierarchical reductions and top-k over
# monotonic inputs. The latter is when there is a possibility for memory misestimation
# since we include monoids in the diff field.
> CREATE CLUSTER counter_cluster SIZE = '1';
> CREATE SOURCE counter IN CLUSTER counter_cluster FROM LOAD GENERATOR COUNTER (TICK INTERVAL '2ms');

> CREATE VIEW input AS
  SELECT counter % 1000 + 1 AS f1, counter % 10 + 1 AS f2
  FROM counter;
> CREATE DEFAULT INDEX ON input;

> CREATE VIEW m_minmax AS
  SELECT f1, MIN(f2), MAX(f2)
  FROM input
  GROUP BY f1;
> CREATE DEFAULT INDEX ON m_minmax;

> CREATE VIEW m_top1 AS
  SELECT DISTINCT ON (f1) f1, f2
  FROM input
  ORDER BY f1, f2 DESC;
> CREATE DEFAULT INDEX ON m_top1;

> SELECT
    records >= 2 * 1000,
    records < 1.1 * 2 * 1000,
    size > 0,
    size < 4 * 200 * 1000,
    allocations < 2 * 2 * 1000
  FROM mz_introspection.mz_dataflow_arrangement_sizes
  WHERE name LIKE '%m_minmax%';
true true true true true

> SELECT
    records >= 2 * 1000,
    records < 2 * 2 * 1000,
    size > 0,
    size < 4 * 172 * 1000,
    allocations < 2 * 2 * 1000
  FROM mz_introspection.mz_dataflow_arrangement_sizes
  WHERE name LIKE '%m_top1%';
true true true true true

> DROP SOURCE counter CASCADE;

# Test dataflow error introspection.

> CREATE TABLE zeros (a int)
> CREATE VIEW v_div_by_zero AS SELECT 1 / a AS x FROM zeros
> CREATE INDEX idx1_div_by_zero ON v_div_by_zero (x)
> CREATE MATERIALIZED VIEW mv_div_by_zero AS SELECT 1 / a AS x FROM zeros
> CREATE INDEX idx2_div_by_zero ON mv_div_by_zero (x)

> SELECT name, count
  FROM mz_introspection.mz_compute_error_counts c
  JOIN mz_objects o ON (c.export_id = o.id)
  ORDER BY name

> INSERT INTO zeros VALUES (0)

> SELECT name, count
  FROM mz_introspection.mz_compute_error_counts c
  JOIN mz_objects o ON (c.export_id = o.id)
  ORDER BY name
idx1_div_by_zero 1
idx2_div_by_zero 1
mv_div_by_zero   1

> INSERT INTO zeros VALUES (0), (0)

> SELECT name, count
  FROM mz_introspection.mz_compute_error_counts c
  JOIN mz_objects o ON (c.export_id = o.id)
  ORDER BY name
idx1_div_by_zero 3
idx2_div_by_zero 3
mv_div_by_zero   3

> DELETE FROM zeros

> SELECT name, count
  FROM mz_introspection.mz_compute_error_counts c
  JOIN mz_objects o ON (c.export_id = o.id)
  ORDER BY name

# Test that error logging is retracted when objects are dropped.

> INSERT INTO zeros VALUES (0), (0)

> SELECT name, count
  FROM mz_introspection.mz_compute_error_counts c
  JOIN mz_objects o ON (c.export_id = o.id)
  ORDER BY name
idx1_div_by_zero 2
idx2_div_by_zero 2
mv_div_by_zero   2

> DROP INDEX idx1_div_by_zero

> SELECT name, count
  FROM mz_introspection.mz_compute_error_counts c
  JOIN mz_objects o ON (c.export_id = o.id)
  ORDER BY name
idx2_div_by_zero 2
mv_div_by_zero   2

> DROP MATERIALIZED VIEW mv_div_by_zero

> SELECT name, count
  FROM mz_introspection.mz_compute_error_counts c
  JOIN mz_objects o ON (c.export_id = o.id)
  ORDER BY name

# Test logging of errors in indexes that advance to the empty frontier.
#
# Note that the same is not expected to work for MVs that advance to the empty
# frontier. There is no reason to keep completed MV dataflows around, so we
# drop them immediately, removing all their associated logging in the process.

> CREATE MATERIALIZED VIEW mv_zero AS SELECT 0 AS x
> CREATE VIEW div_by_zero AS SELECT 1 / x FROM mv_zero
> CREATE INDEX idx_div_by_zero ON div_by_zero ()

> SELECT name, count
  FROM mz_introspection.mz_compute_error_counts c
  JOIN mz_objects o ON (c.export_id = o.id)
  ORDER BY name
idx_div_by_zero 1

> DROP MATERIALIZED VIEW mv_zero CASCADE

# Test logging of errors in reused indexes.

> CREATE INDEX idx1_div_by_zero ON v_div_by_zero (x)
> CREATE INDEX idx2_div_by_zero ON v_div_by_zero (x)
> CREATE INDEX idx3_div_by_zero ON v_div_by_zero (x)

> SELECT name, count
  FROM mz_introspection.mz_compute_error_counts c
  JOIN mz_objects o ON (c.export_id = o.id)
  ORDER BY name
idx1_div_by_zero 2
idx2_div_by_zero 2
idx3_div_by_zero 2

> INSERT INTO zeros VALUES (0)

> SELECT name, count
  FROM mz_introspection.mz_compute_error_counts c
  JOIN mz_objects o ON (c.export_id = o.id)
  ORDER BY name
idx1_div_by_zero 3
idx2_div_by_zero 3
idx3_div_by_zero 3

> DROP TABLE zeros CASCADE

> SELECT name, count
  FROM mz_introspection.mz_compute_error_counts c
  JOIN mz_objects o ON (c.export_id = o.id)
  ORDER BY name

# Cleanup.
> DROP CLUSTER test CASCADE