123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475 |
- # Copyright Materialize, Inc. and contributors. All rights reserved.
- #
- # Use of this software is governed by the Business Source License
- # included in the LICENSE file at the root of this repository.
- #
- # As of the Change Date specified in that file, in accordance with
- # the Business Source License, use of this software will be governed
- # by the Apache License, Version 2.0.
- # Test reporting of dataflow hydration status through
- # `mz_internal.mz_compute_hydration_statuses`,
- # `mz_internal.mz_hydration_statuses`, and
- # `mz_internal.mz_compute_operator_hydration_statuses`.
- #
- # Note that all of the below tests only assert that the `hydrated` flag
- # eventually becomes `true`, not that it starts off as `false`. That's because
- # we have no control about the hydration timing of dataflows or the update
- # cadence of the hydration introspection relations, so we have no reliable way
- # of ensuring that a query arrives before a dataflow has hydrated.
- #
- # These tests rely on testdrive's retry feature, as dataflows can take an
- # unknown (but hopefully small) time to hydrate. Furthermore, the queried
- # introspection relations are asynchronously updated, so DDL commands are not
- # immediately reflected there.
- > CREATE CLUSTER test REPLICAS (hydrated_test_1 (SIZE '1'))
- > SET cluster = test
- # Test that on an empty cluster only the introspection indexes show up.
- # Introspection subscribes do not show up because the hydration introspection
- # relations exclude transient dataflow.
- > SELECT DISTINCT left(h.object_id, 2), h.hydrated
- FROM mz_internal.mz_compute_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- WHERE r.name LIKE 'hydrated_test%';
- si true
- > SELECT DISTINCT left(h.object_id, 2), h.hydrated
- FROM mz_internal.mz_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- WHERE r.name LIKE 'hydrated_test%';
- si true
- # No operator-level hydration status logging for introspection dataflows.
- > SELECT DISTINCT left(h.object_id, 1), h.hydrated
- FROM mz_internal.mz_compute_operator_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- WHERE r.name LIKE 'hydrated_test%';
- # Test adding new compute dataflows.
- > CREATE TABLE t (a int)
- > CREATE INDEX idx ON t (a)
- > CREATE MATERIALIZED VIEW mv AS SELECT * FROM t
- > CREATE MATERIALIZED VIEW mv_const AS SELECT 1
- > SELECT o.name, r.name, h.hydrated, h.hydration_time IS NOT NULL
- FROM mz_internal.mz_compute_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- idx hydrated_test_1 true true
- mv hydrated_test_1 true true
- mv_const hydrated_test_1 true false
- > SELECT o.name, r.name, h.hydrated
- FROM mz_internal.mz_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- idx hydrated_test_1 true
- mv hydrated_test_1 true
- mv_const hydrated_test_1 true
- > SELECT o.name, r.name, bool_and(h.hydrated)
- FROM mz_internal.mz_compute_operator_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%'
- GROUP BY o.name, r.name;
- idx hydrated_test_1 true
- mv hydrated_test_1 true
- mv_const hydrated_test_1 true
- # Test adding new replicas.
- > CREATE CLUSTER REPLICA test.hydrated_test_2 SIZE '1'
- > SELECT o.name, r.name, h.hydrated, h.hydration_time IS NOT NULL
- FROM mz_internal.mz_compute_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- idx hydrated_test_1 true true
- idx hydrated_test_2 true true
- mv hydrated_test_1 true true
- mv hydrated_test_2 true true
- mv_const hydrated_test_1 true false
- mv_const hydrated_test_2 true false
- > SELECT o.name, r.name, h.hydrated
- FROM mz_internal.mz_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- idx hydrated_test_1 true
- idx hydrated_test_2 true
- mv hydrated_test_1 true
- mv hydrated_test_2 true
- mv_const hydrated_test_1 true
- mv_const hydrated_test_2 true
- # `mv_const` doesn't show up for the second replica because its output was
- # already fully computed so it wasn't installed on the replica and no operators
- # exist.
- > SELECT o.name, r.name, bool_and(h.hydrated)
- FROM mz_internal.mz_compute_operator_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%'
- GROUP BY o.name, r.name;
- idx hydrated_test_1 true
- idx hydrated_test_2 true
- mv hydrated_test_1 true
- mv hydrated_test_2 true
- mv_const hydrated_test_1 true
- # Test dropping replicas.
- > DROP CLUSTER REPLICA test.hydrated_test_1
- > SELECT o.name, r.name, h.hydrated, h.hydration_time IS NOT NULL
- FROM mz_internal.mz_compute_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- idx hydrated_test_2 true true
- mv hydrated_test_2 true true
- mv_const hydrated_test_2 true false
- > SELECT o.name, r.name, h.hydrated
- FROM mz_internal.mz_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- idx hydrated_test_2 true
- mv hydrated_test_2 true
- mv_const hydrated_test_2 true
- > SELECT o.name, r.name, bool_and(h.hydrated)
- FROM mz_internal.mz_compute_operator_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%'
- GROUP BY o.name, r.name;
- idx hydrated_test_2 true
- mv hydrated_test_2 true
- > DROP CLUSTER REPLICA test.hydrated_test_2
- > SELECT o.name, r.name, h.hydrated, h.hydration_time IS NOT NULL
- FROM mz_internal.mz_compute_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- > SELECT o.name, r.name, h.hydrated
- FROM mz_internal.mz_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- > SELECT o.name, r.name, bool_and(h.hydrated)
- FROM mz_internal.mz_compute_operator_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%'
- GROUP BY o.name, r.name;
- # Test dropping dataflows.
- > CREATE CLUSTER REPLICA test.hydrated_test_3 SIZE '1'
- > SELECT o.name, r.name, h.hydrated, h.hydration_time IS NOT NULL
- FROM mz_internal.mz_compute_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- idx hydrated_test_3 true true
- mv hydrated_test_3 true true
- mv_const hydrated_test_3 true false
- > SELECT o.name, r.name, h.hydrated
- FROM mz_internal.mz_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- idx hydrated_test_3 true
- mv hydrated_test_3 true
- mv_const hydrated_test_3 true
- > SELECT o.name, r.name, bool_and(h.hydrated)
- FROM mz_internal.mz_compute_operator_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%'
- GROUP BY o.name, r.name;
- idx hydrated_test_3 true
- mv hydrated_test_3 true
- > DROP INDEX idx;
- > DROP MATERIALIZED VIEW mv_const;
- > SELECT o.name, r.name, h.hydrated, h.hydration_time IS NOT NULL
- FROM mz_internal.mz_compute_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- mv hydrated_test_3 true true
- > SELECT o.name, r.name, h.hydrated
- FROM mz_internal.mz_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- mv hydrated_test_3 true
- > SELECT o.name, r.name, bool_and(h.hydrated)
- FROM mz_internal.mz_compute_operator_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%'
- GROUP BY o.name, r.name;
- mv hydrated_test_3 true
- # Test adding new storage dataflows.
- > CREATE SOURCE src
- IN CLUSTER test
- FROM LOAD GENERATOR auction (UP TO 100);
- > CREATE TABLE accounts FROM SOURCE src (REFERENCE accounts);
- > CREATE TABLE auctions FROM SOURCE src (REFERENCE auctions);
- > CREATE TABLE bids FROM SOURCE src (REFERENCE bids);
- > CREATE TABLE organizations FROM SOURCE src (REFERENCE organizations);
- > CREATE TABLE users FROM SOURCE src (REFERENCE users);
- > CREATE CONNECTION kafka_conn
- TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT)
- > CREATE CONNECTION csr_conn
- TO CONFLUENT SCHEMA REGISTRY (URL '${testdrive.schema-registry-url}')
- > CREATE SINK snk
- IN CLUSTER test
- FROM mv
- INTO KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-sink1-${testdrive.seed}')
- FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
- ENVELOPE DEBEZIUM
- # Webhook sources are not backed by dataflows, so they have no concept of
- # hydration and shouldn't show up in mz_hydration_statuses.
- > CREATE SOURCE web IN CLUSTER test FROM WEBHOOK BODY FORMAT JSON
- > SELECT o.name, r.name, h.hydrated
- FROM mz_internal.mz_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- mv hydrated_test_3 true
- src hydrated_test_3 true
- snk hydrated_test_3 true
- # Test dropping replicas.
- > DROP CLUSTER REPLICA test.hydrated_test_3
- > SELECT o.name, r.name, h.hydrated
- FROM mz_internal.mz_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- # Test adding new replicas.
- > CREATE CLUSTER REPLICA test.hydrated_test_4 SIZE '1'
- > SELECT o.name, r.name, h.hydrated
- FROM mz_internal.mz_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- mv hydrated_test_4 true
- src hydrated_test_4 true
- snk hydrated_test_4 true
- # Test dropping dataflows.
- > DROP SINK snk
- > DROP SOURCE src CASCADE
- > DROP MATERIALIZED VIEW mv
- > SELECT o.name, r.name, h.hydrated
- FROM mz_internal.mz_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- # Test hydration status reporting with WMR dataflows.
- > CREATE MATERIALIZED VIEW mv_wmr AS
- WITH MUTUALLY RECURSIVE
- x (a int) AS (
- SELECT * FROM t
- UNION ALL
- SELECT a + 1 FROM x WHERE a < 10
- )
- SELECT * FROM x;
- > CREATE MATERIALIZED VIEW mv_wmr_const AS
- WITH MUTUALLY RECURSIVE
- x (a int) AS (
- VALUES (1)
- UNION ALL
- SELECT a + 1 FROM x WHERE a < 10
- )
- SELECT * FROM x;
- > CREATE MATERIALIZED VIEW mv_wmr_stuck AS
- WITH MUTUALLY RECURSIVE
- x (a int) AS (
- VALUES (1)
- UNION ALL
- SELECT a + 1 FROM x
- )
- SELECT * FROM x;
- > SELECT o.name, r.name, h.hydrated, h.hydration_time IS NOT NULL
- FROM mz_internal.mz_compute_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- mv_wmr hydrated_test_4 true true
- mv_wmr_const hydrated_test_4 true false
- mv_wmr_stuck hydrated_test_4 false false
- > SELECT o.name, r.name, h.hydrated
- FROM mz_internal.mz_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%';
- mv_wmr hydrated_test_4 true
- mv_wmr_const hydrated_test_4 true
- mv_wmr_stuck hydrated_test_4 false
- > SELECT o.name, r.name, bool_and(h.hydrated)
- FROM mz_internal.mz_compute_operator_hydration_statuses h
- JOIN mz_cluster_replicas r ON (r.id = h.replica_id)
- JOIN mz_objects o ON (o.id = h.object_id)
- WHERE
- r.name LIKE 'hydrated_test%' AND
- o.id NOT LIKE 's%'
- GROUP BY o.name, r.name;
- mv_wmr hydrated_test_4 true
- mv_wmr_const hydrated_test_4 true
- mv_wmr_stuck hydrated_test_4 false
- > DROP MATERIALIZED VIEW mv_wmr
- > DROP MATERIALIZED VIEW mv_wmr_const
- > DROP MATERIALIZED VIEW mv_wmr_stuck
- # Test that incorrectly configured sinks do _not_ show as hydrated.
- > CREATE TABLE schema1 (a int)
- > CREATE TABLE schema2 (a text)
- > CREATE SINK snk_schema1
- IN CLUSTER test
- FROM schema1
- INTO KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-snk_schema1-${testdrive.seed}')
- FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
- ENVELOPE DEBEZIUM
- > SELECT s.name, h.hydrated
- FROM mz_sinks s
- JOIN mz_internal.mz_hydration_statuses h ON (h.object_id = s.id)
- snk_schema1 true
- > CREATE SINK snk_schema2
- IN CLUSTER test
- FROM schema2
- INTO KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-snk_schema1-${testdrive.seed}')
- FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
- ENVELOPE DEBEZIUM
- # If we have a bug, the sink's hydration status might toggle between `false`
- # and `true`, rather than always being `true`. Using retries might therefore
- # cause this test to pass even if it shouldn't. We instead disable retries and
- # manually check the hydration status a couple times.
- $ set-max-tries max-tries=1
- > SELECT s.name, h.hydrated
- FROM mz_sinks s
- JOIN mz_internal.mz_hydration_statuses h ON (h.object_id = s.id)
- snk_schema1 true
- snk_schema2 false
- > SELECT s.name, h.hydrated
- FROM mz_sinks s
- JOIN mz_internal.mz_hydration_statuses h ON (h.object_id = s.id)
- snk_schema1 true
- snk_schema2 false
- > SELECT s.name, h.hydrated
- FROM mz_sinks s
- JOIN mz_internal.mz_hydration_statuses h ON (h.object_id = s.id)
- snk_schema1 true
- snk_schema2 false
- > SELECT s.name, h.hydrated
- FROM mz_sinks s
- JOIN mz_internal.mz_hydration_statuses h ON (h.object_id = s.id)
- snk_schema1 true
- snk_schema2 false
- > SELECT s.name, h.hydrated
- FROM mz_sinks s
- JOIN mz_internal.mz_hydration_statuses h ON (h.object_id = s.id)
- snk_schema1 true
- snk_schema2 false
|