# Copyright Materialize, Inc. and contributors. All rights reserved. # # Use of this software is governed by the Business Source License # included in the LICENSE file at the root of this repository. # # As of the Change Date specified in that file, in accordance with # the Business Source License, use of this software will be governed # by the Apache License, Version 2.0. # Test reporting of dataflow hydration status through # `mz_internal.mz_compute_hydration_statuses`, # `mz_internal.mz_hydration_statuses`, and # `mz_internal.mz_compute_operator_hydration_statuses`. # # Note that all of the below tests only assert that the `hydrated` flag # eventually becomes `true`, not that it starts off as `false`. That's because # we have no control about the hydration timing of dataflows or the update # cadence of the hydration introspection relations, so we have no reliable way # of ensuring that a query arrives before a dataflow has hydrated. # # These tests rely on testdrive's retry feature, as dataflows can take an # unknown (but hopefully small) time to hydrate. Furthermore, the queried # introspection relations are asynchronously updated, so DDL commands are not # immediately reflected there. > CREATE CLUSTER test REPLICAS (hydrated_test_1 (SIZE '1')) > SET cluster = test # Test that on an empty cluster only the introspection indexes show up. # Introspection subscribes do not show up because the hydration introspection # relations exclude transient dataflow. > SELECT DISTINCT left(h.object_id, 2), h.hydrated FROM mz_internal.mz_compute_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) WHERE r.name LIKE 'hydrated_test%'; si true > SELECT DISTINCT left(h.object_id, 2), h.hydrated FROM mz_internal.mz_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) WHERE r.name LIKE 'hydrated_test%'; si true # No operator-level hydration status logging for introspection dataflows. > SELECT DISTINCT left(h.object_id, 1), h.hydrated FROM mz_internal.mz_compute_operator_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) WHERE r.name LIKE 'hydrated_test%'; # Test adding new compute dataflows. > CREATE TABLE t (a int) > CREATE INDEX idx ON t (a) > CREATE MATERIALIZED VIEW mv AS SELECT * FROM t > CREATE MATERIALIZED VIEW mv_const AS SELECT 1 > SELECT o.name, r.name, h.hydrated, h.hydration_time IS NOT NULL FROM mz_internal.mz_compute_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; idx hydrated_test_1 true true mv hydrated_test_1 true true mv_const hydrated_test_1 true false > SELECT o.name, r.name, h.hydrated FROM mz_internal.mz_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; idx hydrated_test_1 true mv hydrated_test_1 true mv_const hydrated_test_1 true > SELECT o.name, r.name, bool_and(h.hydrated) FROM mz_internal.mz_compute_operator_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%' GROUP BY o.name, r.name; idx hydrated_test_1 true mv hydrated_test_1 true mv_const hydrated_test_1 true # Test adding new replicas. > CREATE CLUSTER REPLICA test.hydrated_test_2 SIZE '1' > SELECT o.name, r.name, h.hydrated, h.hydration_time IS NOT NULL FROM mz_internal.mz_compute_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; idx hydrated_test_1 true true idx hydrated_test_2 true true mv hydrated_test_1 true true mv hydrated_test_2 true true mv_const hydrated_test_1 true false mv_const hydrated_test_2 true false > SELECT o.name, r.name, h.hydrated FROM mz_internal.mz_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; idx hydrated_test_1 true idx hydrated_test_2 true mv hydrated_test_1 true mv hydrated_test_2 true mv_const hydrated_test_1 true mv_const hydrated_test_2 true # `mv_const` doesn't show up for the second replica because its output was # already fully computed so it wasn't installed on the replica and no operators # exist. > SELECT o.name, r.name, bool_and(h.hydrated) FROM mz_internal.mz_compute_operator_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%' GROUP BY o.name, r.name; idx hydrated_test_1 true idx hydrated_test_2 true mv hydrated_test_1 true mv hydrated_test_2 true mv_const hydrated_test_1 true # Test dropping replicas. > DROP CLUSTER REPLICA test.hydrated_test_1 > SELECT o.name, r.name, h.hydrated, h.hydration_time IS NOT NULL FROM mz_internal.mz_compute_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; idx hydrated_test_2 true true mv hydrated_test_2 true true mv_const hydrated_test_2 true false > SELECT o.name, r.name, h.hydrated FROM mz_internal.mz_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; idx hydrated_test_2 true mv hydrated_test_2 true mv_const hydrated_test_2 true > SELECT o.name, r.name, bool_and(h.hydrated) FROM mz_internal.mz_compute_operator_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%' GROUP BY o.name, r.name; idx hydrated_test_2 true mv hydrated_test_2 true > DROP CLUSTER REPLICA test.hydrated_test_2 > SELECT o.name, r.name, h.hydrated, h.hydration_time IS NOT NULL FROM mz_internal.mz_compute_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; > SELECT o.name, r.name, h.hydrated FROM mz_internal.mz_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; > SELECT o.name, r.name, bool_and(h.hydrated) FROM mz_internal.mz_compute_operator_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%' GROUP BY o.name, r.name; # Test dropping dataflows. > CREATE CLUSTER REPLICA test.hydrated_test_3 SIZE '1' > SELECT o.name, r.name, h.hydrated, h.hydration_time IS NOT NULL FROM mz_internal.mz_compute_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; idx hydrated_test_3 true true mv hydrated_test_3 true true mv_const hydrated_test_3 true false > SELECT o.name, r.name, h.hydrated FROM mz_internal.mz_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; idx hydrated_test_3 true mv hydrated_test_3 true mv_const hydrated_test_3 true > SELECT o.name, r.name, bool_and(h.hydrated) FROM mz_internal.mz_compute_operator_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%' GROUP BY o.name, r.name; idx hydrated_test_3 true mv hydrated_test_3 true > DROP INDEX idx; > DROP MATERIALIZED VIEW mv_const; > SELECT o.name, r.name, h.hydrated, h.hydration_time IS NOT NULL FROM mz_internal.mz_compute_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; mv hydrated_test_3 true true > SELECT o.name, r.name, h.hydrated FROM mz_internal.mz_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; mv hydrated_test_3 true > SELECT o.name, r.name, bool_and(h.hydrated) FROM mz_internal.mz_compute_operator_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%' GROUP BY o.name, r.name; mv hydrated_test_3 true # Test adding new storage dataflows. > CREATE SOURCE src IN CLUSTER test FROM LOAD GENERATOR auction (UP TO 100); > CREATE TABLE accounts FROM SOURCE src (REFERENCE accounts); > CREATE TABLE auctions FROM SOURCE src (REFERENCE auctions); > CREATE TABLE bids FROM SOURCE src (REFERENCE bids); > CREATE TABLE organizations FROM SOURCE src (REFERENCE organizations); > CREATE TABLE users FROM SOURCE src (REFERENCE users); > CREATE CONNECTION kafka_conn TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT) > CREATE CONNECTION csr_conn TO CONFLUENT SCHEMA REGISTRY (URL '${testdrive.schema-registry-url}') > CREATE SINK snk IN CLUSTER test FROM mv INTO KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-sink1-${testdrive.seed}') FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn ENVELOPE DEBEZIUM # Webhook sources are not backed by dataflows, so they have no concept of # hydration and shouldn't show up in mz_hydration_statuses. > CREATE SOURCE web IN CLUSTER test FROM WEBHOOK BODY FORMAT JSON > SELECT o.name, r.name, h.hydrated FROM mz_internal.mz_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; mv hydrated_test_3 true src hydrated_test_3 true snk hydrated_test_3 true # Test dropping replicas. > DROP CLUSTER REPLICA test.hydrated_test_3 > SELECT o.name, r.name, h.hydrated FROM mz_internal.mz_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; # Test adding new replicas. > CREATE CLUSTER REPLICA test.hydrated_test_4 SIZE '1' > SELECT o.name, r.name, h.hydrated FROM mz_internal.mz_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; mv hydrated_test_4 true src hydrated_test_4 true snk hydrated_test_4 true # Test dropping dataflows. > DROP SINK snk > DROP SOURCE src CASCADE > DROP MATERIALIZED VIEW mv > SELECT o.name, r.name, h.hydrated FROM mz_internal.mz_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; # Test hydration status reporting with WMR dataflows. > CREATE MATERIALIZED VIEW mv_wmr AS WITH MUTUALLY RECURSIVE x (a int) AS ( SELECT * FROM t UNION ALL SELECT a + 1 FROM x WHERE a < 10 ) SELECT * FROM x; > CREATE MATERIALIZED VIEW mv_wmr_const AS WITH MUTUALLY RECURSIVE x (a int) AS ( VALUES (1) UNION ALL SELECT a + 1 FROM x WHERE a < 10 ) SELECT * FROM x; > CREATE MATERIALIZED VIEW mv_wmr_stuck AS WITH MUTUALLY RECURSIVE x (a int) AS ( VALUES (1) UNION ALL SELECT a + 1 FROM x ) SELECT * FROM x; > SELECT o.name, r.name, h.hydrated, h.hydration_time IS NOT NULL FROM mz_internal.mz_compute_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; mv_wmr hydrated_test_4 true true mv_wmr_const hydrated_test_4 true false mv_wmr_stuck hydrated_test_4 false false > SELECT o.name, r.name, h.hydrated FROM mz_internal.mz_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%'; mv_wmr hydrated_test_4 true mv_wmr_const hydrated_test_4 true mv_wmr_stuck hydrated_test_4 false > SELECT o.name, r.name, bool_and(h.hydrated) FROM mz_internal.mz_compute_operator_hydration_statuses h JOIN mz_cluster_replicas r ON (r.id = h.replica_id) JOIN mz_objects o ON (o.id = h.object_id) WHERE r.name LIKE 'hydrated_test%' AND o.id NOT LIKE 's%' GROUP BY o.name, r.name; mv_wmr hydrated_test_4 true mv_wmr_const hydrated_test_4 true mv_wmr_stuck hydrated_test_4 false > DROP MATERIALIZED VIEW mv_wmr > DROP MATERIALIZED VIEW mv_wmr_const > DROP MATERIALIZED VIEW mv_wmr_stuck # Test that incorrectly configured sinks do _not_ show as hydrated. > CREATE TABLE schema1 (a int) > CREATE TABLE schema2 (a text) > CREATE SINK snk_schema1 IN CLUSTER test FROM schema1 INTO KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-snk_schema1-${testdrive.seed}') FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn ENVELOPE DEBEZIUM > SELECT s.name, h.hydrated FROM mz_sinks s JOIN mz_internal.mz_hydration_statuses h ON (h.object_id = s.id) snk_schema1 true > CREATE SINK snk_schema2 IN CLUSTER test FROM schema2 INTO KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-snk_schema1-${testdrive.seed}') FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn ENVELOPE DEBEZIUM # If we have a bug, the sink's hydration status might toggle between `false` # and `true`, rather than always being `true`. Using retries might therefore # cause this test to pass even if it shouldn't. We instead disable retries and # manually check the hydration status a couple times. $ set-max-tries max-tries=1 > SELECT s.name, h.hydrated FROM mz_sinks s JOIN mz_internal.mz_hydration_statuses h ON (h.object_id = s.id) snk_schema1 true snk_schema2 false > SELECT s.name, h.hydrated FROM mz_sinks s JOIN mz_internal.mz_hydration_statuses h ON (h.object_id = s.id) snk_schema1 true snk_schema2 false > SELECT s.name, h.hydrated FROM mz_sinks s JOIN mz_internal.mz_hydration_statuses h ON (h.object_id = s.id) snk_schema1 true snk_schema2 false > SELECT s.name, h.hydrated FROM mz_sinks s JOIN mz_internal.mz_hydration_statuses h ON (h.object_id = s.id) snk_schema1 true snk_schema2 false > SELECT s.name, h.hydrated FROM mz_sinks s JOIN mz_internal.mz_hydration_statuses h ON (h.object_id = s.id) snk_schema1 true snk_schema2 false