123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445 |
- # Copyright Materialize, Inc. and contributors. All rights reserved.
- #
- # Use of this software is governed by the Business Source License
- # included in the LICENSE file at the root of this repository.
- #
- # As of the Change Date specified in that file, in accordance with
- # the Business Source License, use of this software will be governed
- # by the Apache License, Version 2.0.
- """
- Explicit deterministic tests for read-only mode and zero downtime deploys (same
- version, no upgrade).
- """
- import time
- from datetime import datetime, timedelta
- from textwrap import dedent
- from threading import Thread
- from psycopg.errors import OperationalError
- from materialize import buildkite
- from materialize.mzcompose import get_default_system_parameters
- from materialize.mzcompose.composition import Composition
- from materialize.mzcompose.services.kafka import Kafka
- from materialize.mzcompose.services.materialized import (
- LEADER_STATUS_HEALTHCHECK,
- DeploymentStatus,
- Materialized,
- )
- from materialize.mzcompose.services.mysql import MySql
- from materialize.mzcompose.services.mz import Mz
- from materialize.mzcompose.services.postgres import (
- CockroachOrPostgresMetadata,
- Postgres,
- )
- from materialize.mzcompose.services.schema_registry import SchemaRegistry
- from materialize.mzcompose.services.testdrive import Testdrive
- from materialize.mzcompose.services.zookeeper import Zookeeper
- from materialize.ui import CommandFailureCausedUIError
- DEFAULT_TIMEOUT = "300s"
- SYSTEM_PARAMETER_DEFAULTS = get_default_system_parameters(zero_downtime=True)
- SERVICES = [
- MySql(),
- Postgres(),
- Zookeeper(),
- Kafka(),
- SchemaRegistry(),
- CockroachOrPostgresMetadata(),
- Mz(app_password=""),
- Materialized(
- name="mz_old",
- sanity_restart=False,
- deploy_generation=0,
- system_parameter_defaults=SYSTEM_PARAMETER_DEFAULTS,
- external_metadata_store=True,
- default_replication_factor=2,
- ),
- Materialized(
- name="mz_new",
- sanity_restart=False,
- deploy_generation=1,
- system_parameter_defaults=SYSTEM_PARAMETER_DEFAULTS,
- restart="on-failure",
- external_metadata_store=True,
- default_replication_factor=2,
- ),
- Testdrive(
- materialize_url="postgres://materialize@mz_old:6875",
- materialize_url_internal="postgres://materialize@mz_old:6877",
- mz_service="mz_old",
- materialize_params={"cluster": "cluster"},
- no_reset=True,
- seed=1,
- default_timeout=DEFAULT_TIMEOUT,
- ),
- ]
- def workflow_default(c: Composition) -> None:
- def process(name: str) -> None:
- if name == "default":
- return
- with c.test_case(name):
- c.workflow(name)
- workflows = buildkite.shard_list(
- list(c.workflows.keys()), lambda workflow: workflow
- )
- c.test_parts(workflows, process)
- def workflow_read_only(c: Composition) -> None:
- """Verify read-only mode."""
- c.down(destroy_volumes=True)
- c.up(
- "zookeeper",
- "kafka",
- "schema-registry",
- "postgres",
- "mysql",
- "mz_old",
- {"name": "testdrive", "persistent": True},
- )
- # Make sure cluster is owned by the system so it doesn't get dropped
- # between testdrive runs.
- c.sql(
- """
- DROP CLUSTER IF EXISTS cluster CASCADE;
- CREATE CLUSTER cluster SIZE '2-1';
- GRANT ALL ON CLUSTER cluster TO materialize;
- ALTER SYSTEM SET cluster = cluster;
- CREATE CLUSTER cluster_singlereplica SIZE '1', REPLICATION FACTOR 1;
- GRANT ALL ON CLUSTER cluster_singlereplica TO materialize;
- """,
- service="mz_old",
- port=6877,
- user="mz_system",
- )
- # Inserts should be reflected when writes are allowed.
- c.testdrive(
- dedent(
- f"""
- > SET CLUSTER = cluster;
- > CREATE TABLE t (a int, b int);
- > CREATE CONNECTION IF NOT EXISTS kafka_conn FOR KAFKA BROKER '${{testdrive.kafka-addr}}', SECURITY PROTOCOL = 'PLAINTEXT';
- > CREATE CONNECTION IF NOT EXISTS csr_conn FOR CONFLUENT SCHEMA REGISTRY URL '${{testdrive.schema-registry-url}}';
- > CREATE SINK kafka_sink
- IN CLUSTER cluster
- FROM t
- INTO KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-kafka-sink-${{testdrive.seed}}')
- FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
- ENVELOPE DEBEZIUM;
- > INSERT INTO t VALUES (1, 2);
- > CREATE INDEX t_idx ON t (a, b);
- > CREATE MATERIALIZED VIEW mv AS SELECT sum(a) FROM t;
- > SET TRANSACTION_ISOLATION TO 'SERIALIZABLE';
- > SELECT * FROM mv;
- 1
- > SELECT max(b) FROM t;
- 2
- $ kafka-create-topic topic=kafka
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka
- key1A,key1B:value1A,value1B
- > CREATE SOURCE kafka_source
- IN CLUSTER cluster
- FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-kafka-${{testdrive.seed}}');
- > CREATE TABLE kafka_source_tbl (key1, key2, value1, value2)
- FROM SOURCE kafka_source (REFERENCE "testdrive-kafka-${{testdrive.seed}}")
- KEY FORMAT CSV WITH 2 COLUMNS DELIMITED BY ','
- VALUE FORMAT CSV WITH 2 COLUMNS DELIMITED BY ','
- ENVELOPE UPSERT;
- > SELECT * FROM kafka_source_tbl
- key1A key1B value1A value1B
- $ postgres-execute connection=postgres://postgres:postgres@postgres
- CREATE USER postgres1 WITH SUPERUSER PASSWORD 'postgres';
- ALTER USER postgres1 WITH replication;
- DROP PUBLICATION IF EXISTS postgres_source;
- DROP TABLE IF EXISTS postgres_source_table;
- CREATE TABLE postgres_source_table (f1 TEXT, f2 INTEGER);
- ALTER TABLE postgres_source_table REPLICA IDENTITY FULL;
- INSERT INTO postgres_source_table SELECT 'A', 0;
- CREATE PUBLICATION postgres_source FOR ALL TABLES;
- > CREATE SECRET pgpass AS 'postgres';
- > CREATE CONNECTION pg FOR POSTGRES
- HOST 'postgres',
- DATABASE postgres,
- USER postgres1,
- PASSWORD SECRET pgpass;
- > CREATE SOURCE postgres_source
- IN CLUSTER cluster
- FROM POSTGRES CONNECTION pg
- (PUBLICATION 'postgres_source');
- > CREATE TABLE postgres_source_table FROM SOURCE postgres_source (REFERENCE postgres_source_table)
- > SELECT * FROM postgres_source_table;
- A 0
- $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
- $ mysql-execute name=mysql
- # create the database if it does not exist yet but do not drop it
- CREATE DATABASE IF NOT EXISTS public;
- USE public;
- CREATE USER mysql1 IDENTIFIED BY 'mysql';
- GRANT REPLICATION SLAVE ON *.* TO mysql1;
- GRANT ALL ON public.* TO mysql1;
- CREATE TABLE mysql_source_table (f1 VARCHAR(32), f2 INTEGER);
- INSERT INTO mysql_source_table VALUES ('A', 0);
- > CREATE SECRET mysqlpass AS 'mysql';
- > CREATE CONNECTION mysql TO MYSQL (
- HOST 'mysql',
- USER mysql1,
- PASSWORD SECRET mysqlpass);
- > CREATE SOURCE mysql_source
- IN CLUSTER cluster
- FROM MYSQL CONNECTION mysql;
- > CREATE TABLE mysql_source_table FROM SOURCE mysql_source (REFERENCE public.mysql_source_table);
- > SELECT * FROM mysql_source_table;
- A 0
- $ kafka-verify-topic sink=materialize.public.kafka_sink
- > CREATE SOURCE kafka_sink_source
- IN CLUSTER cluster
- FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-kafka-sink-${{testdrive.seed}}')
- > CREATE TABLE kafka_sink_source_tbl FROM SOURCE kafka_sink_source (REFERENCE "testdrive-kafka-sink-${{testdrive.seed}}")
- FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
- ENVELOPE NONE
- > SELECT (before).a, (before).b, (after).a, (after).b FROM kafka_sink_source_tbl
- <null> <null> 1 2
- > CREATE SOURCE webhook_source
- IN CLUSTER cluster_singlereplica
- FROM WEBHOOK BODY FORMAT TEXT
- $ webhook-append database=materialize schema=public name=webhook_source
- AAA
- > SELECT * FROM webhook_source
- AAA
- """
- )
- )
- # Restart in a new deploy generation, which will cause Materialize to
- # boot in read-only mode.
- with c.override(
- Materialized(
- name="mz_old",
- deploy_generation=1,
- external_metadata_store=True,
- system_parameter_defaults=SYSTEM_PARAMETER_DEFAULTS,
- default_replication_factor=2,
- )
- ):
- c.up("mz_old")
- c.testdrive(
- dedent(
- f"""
- $ webhook-append database=materialize schema=public name=webhook_source status=500
- BBB
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka
- key2A,key2B:value2A,value2B
- $ postgres-execute connection=postgres://postgres:postgres@postgres
- INSERT INTO postgres_source_table VALUES ('B', 1);
- $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
- $ mysql-execute name=mysql
- USE public;
- INSERT INTO mysql_source_table VALUES ('B', 1);
- > SET CLUSTER = cluster;
- > SELECT 1
- 1
- ! INSERT INTO t VALUES (3, 4);
- contains: cannot write in read-only mode
- > SET TRANSACTION_ISOLATION TO 'SERIALIZABLE';
- > SELECT * FROM mv;
- 1
- # TODO: Currently hangs
- # > SELECT max(b) FROM t;
- # 2
- > SELECT mz_unsafe.mz_sleep(5)
- <null>
- ! INSERT INTO t VALUES (5, 6);
- contains: cannot write in read-only mode
- > SELECT * FROM mv;
- 1
- ! DROP INDEX t_idx
- contains: cannot write in read-only mode
- ! CREATE INDEX t_idx2 ON t (a, b)
- contains: cannot write in read-only mode
- ! CREATE MATERIALIZED VIEW mv2 AS SELECT sum(a) FROM t;
- contains: cannot write in read-only mode
- $ set-regex match=(s\\d+|\\d{{13}}|[ ]{{12}}0|u\\d{{1,3}}|\\(\\d+-\\d\\d-\\d\\d\\s\\d\\d:\\d\\d:\\d\\d\\.\\d\\d\\d\\)) replacement=<>
- > EXPLAIN TIMESTAMP FOR SELECT * FROM mv;
- " query timestamp: <> <>\\nlargest not in advance of upper: <> <>\\n upper:[<> <>]\\n since:[<> <>]\\n can respond immediately: true\\n timeline: Some(EpochMilliseconds)\\n session wall time: <> <>\\n\\nsource materialize.public.mv (<>, storage):\\n read frontier:[<> <>]\\n write frontier:[<> <>]\\n\\nbinding constraints:\\nlower:\\n (StorageInput([User(6)])): [<> <>]\\n"
- > SELECT * FROM kafka_source_tbl
- key1A key1B value1A value1B
- > SELECT * FROM postgres_source_table
- A 0
- > SELECT * FROM mysql_source_table;
- A 0
- > SELECT (before).a, (before).b, (after).a, (after).b FROM kafka_sink_source_tbl
- <null> <null> 1 2
- > SELECT * FROM webhook_source
- AAA
- """
- )
- )
- c.up("mz_old")
- c.await_mz_deployment_status(DeploymentStatus.READY_TO_PROMOTE, "mz_old")
- c.promote_mz("mz_old")
- # After promotion, the deployment should boot with writes allowed.
- with c.override(
- Materialized(
- name="mz_old",
- healthcheck=[
- "CMD-SHELL",
- """[ "$(curl -f localhost:6878/api/leader/status)" = '{"status":"IsLeader"}' ]""",
- ],
- deploy_generation=1,
- system_parameter_defaults=SYSTEM_PARAMETER_DEFAULTS,
- external_metadata_store=True,
- default_replication_factor=2,
- )
- ):
- c.up("mz_old")
- c.testdrive(
- dedent(
- f"""
- $ webhook-append database=materialize schema=public name=webhook_source
- CCC
- > SET CLUSTER = cluster;
- > SET TRANSACTION_ISOLATION TO 'SERIALIZABLE';
- > CREATE MATERIALIZED VIEW mv2 AS SELECT sum(a) FROM t;
- > SELECT * FROM mv;
- 1
- > SELECT * FROM mv2;
- 1
- > SELECT max(b) FROM t;
- 2
- > INSERT INTO t VALUES (7, 8);
- > SELECT * FROM mv;
- 8
- > SELECT * FROM mv2;
- 8
- > SELECT max(b) FROM t;
- 8
- > SELECT * FROM kafka_source_tbl
- key1A key1B value1A value1B
- key2A key2B value2A value2B
- > SELECT * FROM postgres_source_table
- A 0
- B 1
- > SELECT * FROM mysql_source_table;
- A 0
- B 1
- > SELECT (before).a, (before).b, (after).a, (after).b FROM kafka_sink_source_tbl
- <null> <null> 1 2
- <null> <null> 7 8
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka
- key3A,key3B:value3A,value3B
- $ postgres-execute connection=postgres://postgres:postgres@postgres
- INSERT INTO postgres_source_table VALUES ('C', 2);
- $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
- $ mysql-execute name=mysql
- USE public;
- INSERT INTO mysql_source_table VALUES ('C', 2);
- > SELECT * FROM kafka_source_tbl
- key1A key1B value1A value1B
- key2A key2B value2A value2B
- key3A key3B value3A value3B
- > SELECT * FROM postgres_source_table
- A 0
- B 1
- C 2
- > SELECT * FROM mysql_source_table;
- A 0
- B 1
- C 2
- > SELECT * FROM webhook_source
- AAA
- CCC
- """
- )
- )
- def workflow_basic(c: Composition) -> None:
- """Verify basic 0dt deployment flow."""
- c.down(destroy_volumes=True)
- c.up(
- "zookeeper",
- "kafka",
- "schema-registry",
- "postgres",
- "mysql",
- "mz_old",
- {"name": "testdrive", "persistent": True},
- )
- # Make sure cluster is owned by the system so it doesn't get dropped
- # between testdrive runs.
- c.sql(
- """
- DROP CLUSTER IF EXISTS cluster CASCADE;
- CREATE CLUSTER cluster SIZE '2-1';
- GRANT ALL ON CLUSTER cluster TO materialize;
- ALTER SYSTEM SET cluster = cluster;
- CREATE CLUSTER cluster_singlereplica SIZE '1', REPLICATION FACTOR 1;
- GRANT ALL ON CLUSTER cluster_singlereplica TO materialize;
- """,
- service="mz_old",
- port=6877,
- user="mz_system",
- )
- # Inserts should be reflected when writes are allowed.
- c.testdrive(
- dedent(
- f"""
- > SET CLUSTER = cluster;
- > CREATE TABLE t (a int, b int);
- > CREATE CONNECTION IF NOT EXISTS kafka_conn FOR KAFKA BROKER '${{testdrive.kafka-addr}}', SECURITY PROTOCOL = 'PLAINTEXT';
- > CREATE CONNECTION IF NOT EXISTS csr_conn FOR CONFLUENT SCHEMA REGISTRY URL '${{testdrive.schema-registry-url}}';
- > CREATE SINK kafka_sink
- IN CLUSTER cluster
- FROM t
- INTO KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-kafka-sink-${{testdrive.seed}}')
- FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
- ENVELOPE DEBEZIUM;
- > INSERT INTO t VALUES (1, 2);
- > CREATE INDEX t_idx ON t (a, b);
- > CREATE MATERIALIZED VIEW mv AS SELECT sum(a) FROM t;
- > SET TRANSACTION_ISOLATION TO 'SERIALIZABLE';
- > SELECT * FROM mv;
- 1
- > SELECT max(b) FROM t;
- 2
- $ kafka-create-topic topic=kafka
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka
- key1A,key1B:value1A,value1B
- > CREATE SOURCE kafka_source
- IN CLUSTER cluster
- FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-kafka-${{testdrive.seed}}');
- > CREATE TABLE kafka_source_tbl (key1, key2, value1, value2)
- FROM SOURCE kafka_source (REFERENCE "testdrive-kafka-${{testdrive.seed}}")
- KEY FORMAT CSV WITH 2 COLUMNS DELIMITED BY ','
- VALUE FORMAT CSV WITH 2 COLUMNS DELIMITED BY ','
- ENVELOPE UPSERT;
- > SELECT * FROM kafka_source_tbl
- key1A key1B value1A value1B
- $ postgres-execute connection=postgres://postgres:postgres@postgres
- CREATE USER postgres1 WITH SUPERUSER PASSWORD 'postgres';
- ALTER USER postgres1 WITH replication;
- DROP PUBLICATION IF EXISTS postgres_source;
- DROP TABLE IF EXISTS postgres_source_table;
- CREATE TABLE postgres_source_table (f1 TEXT, f2 INTEGER);
- ALTER TABLE postgres_source_table REPLICA IDENTITY FULL;
- INSERT INTO postgres_source_table SELECT 'A', 0;
- CREATE PUBLICATION postgres_source FOR ALL TABLES;
- > CREATE SECRET pgpass AS 'postgres';
- > CREATE CONNECTION pg FOR POSTGRES
- HOST 'postgres',
- DATABASE postgres,
- USER postgres1,
- PASSWORD SECRET pgpass;
- > CREATE SOURCE postgres_source
- IN CLUSTER cluster
- FROM POSTGRES CONNECTION pg
- (PUBLICATION 'postgres_source');
- > CREATE TABLE postgres_source_table FROM SOURCE postgres_source (REFERENCE postgres_source_table)
- > SELECT * FROM postgres_source_table;
- A 0
- $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
- $ mysql-execute name=mysql
- # create the database if it does not exist yet but do not drop it
- CREATE DATABASE IF NOT EXISTS public;
- USE public;
- CREATE USER mysql1 IDENTIFIED BY 'mysql';
- GRANT REPLICATION SLAVE ON *.* TO mysql1;
- GRANT ALL ON public.* TO mysql1;
- CREATE TABLE mysql_source_table (f1 VARCHAR(32), f2 INTEGER);
- INSERT INTO mysql_source_table VALUES ('A', 0);
- > CREATE SECRET mysqlpass AS 'mysql';
- > CREATE CONNECTION mysql TO MYSQL (
- HOST 'mysql',
- USER mysql1,
- PASSWORD SECRET mysqlpass);
- > CREATE SOURCE mysql_source1
- IN CLUSTER cluster
- FROM MYSQL CONNECTION mysql;
- > CREATE TABLE mysql_source_table FROM SOURCE mysql_source1 (REFERENCE public.mysql_source_table);
- > SELECT * FROM mysql_source_table;
- A 0
- $ kafka-verify-topic sink=materialize.public.kafka_sink
- > CREATE SOURCE kafka_sink_source
- IN CLUSTER cluster
- FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-kafka-sink-${{testdrive.seed}}')
- > CREATE TABLE kafka_sink_source_tbl FROM SOURCE kafka_sink_source (REFERENCE "testdrive-kafka-sink-${{testdrive.seed}}")
- FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
- ENVELOPE NONE
- > SELECT (before).a, (before).b, (after).a, (after).b FROM kafka_sink_source_tbl
- <null> <null> 1 2
- > CREATE SOURCE webhook_source
- IN CLUSTER cluster_singlereplica
- FROM WEBHOOK BODY FORMAT TEXT
- $ webhook-append database=materialize schema=public name=webhook_source
- AAA
- > SELECT * FROM webhook_source
- AAA
- $ set-max-tries max-tries=1
- $ set-regex match=\\d{{13,20}} replacement=<TIMESTAMP>
- > BEGIN
- > DECLARE c CURSOR FOR SUBSCRIBE (SELECT a FROM t);
- > FETCH ALL c WITH (timeout='5s');
- <TIMESTAMP> 1 1
- > COMMIT
- """
- )
- )
- # Start new Materialize in a new deploy generation, which will cause
- # Materialize to boot in read-only mode.
- c.up("mz_new")
- # Verify against new Materialize that it is in read-only mode
- with c.override(
- Testdrive(
- materialize_url="postgres://materialize@mz_new:6875",
- materialize_url_internal="postgres://materialize@mz_new:6877",
- mz_service="mz_new",
- materialize_params={"cluster": "cluster"},
- no_reset=True,
- seed=1,
- default_timeout=DEFAULT_TIMEOUT,
- )
- ):
- c.up({"name": "testdrive", "persistent": True})
- c.testdrive(
- dedent(
- f"""
- $ webhook-append database=materialize schema=public name=webhook_source status=500
- BBB
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka
- key2A,key2B:value2A,value2B
- $ postgres-execute connection=postgres://postgres:postgres@postgres
- INSERT INTO postgres_source_table VALUES ('B', 1);
- $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
- $ mysql-execute name=mysql
- USE public;
- INSERT INTO mysql_source_table VALUES ('B', 1);
- > SET CLUSTER = cluster;
- > SELECT 1
- 1
- ! INSERT INTO t VALUES (3, 4);
- contains: cannot write in read-only mode
- > SET TRANSACTION_ISOLATION TO 'SERIALIZABLE';
- > SELECT * FROM mv;
- 1
- # TODO: Currently hangs
- # > SELECT max(b) FROM t;
- # 2
- > SELECT mz_unsafe.mz_sleep(5)
- <null>
- ! INSERT INTO t VALUES (5, 6);
- contains: cannot write in read-only mode
- > SELECT * FROM mv;
- 1
- ! DROP INDEX t_idx
- contains: cannot write in read-only mode
- ! CREATE INDEX t_idx2 ON t (a, b)
- contains: cannot write in read-only mode
- ! CREATE MATERIALIZED VIEW mv2 AS SELECT sum(a) FROM t;
- contains: cannot write in read-only mode
- $ set-regex match=(s\\d+|\\d{{13}}|[ ]{{12}}0|u\\d{{1,3}}|\\(\\d+-\\d\\d-\\d\\d\\s\\d\\d:\\d\\d:\\d\\d\\.\\d\\d\\d\\)) replacement=<>
- > EXPLAIN TIMESTAMP FOR SELECT * FROM mv;
- " query timestamp: <> <>\\nlargest not in advance of upper: <> <>\\n upper:[<> <>]\\n since:[<> <>]\\n can respond immediately: true\\n timeline: Some(EpochMilliseconds)\\n session wall time: <> <>\\n\\nsource materialize.public.mv (<>, storage):\\n read frontier:[<> <>]\\n write frontier:[<> <>]\\n\\nbinding constraints:\\nlower:\\n (StorageInput([User(6)])): [<> <>]\\n"
- > SELECT * FROM kafka_source_tbl
- key1A key1B value1A value1B
- key2A key2B value2A value2B
- > SELECT * FROM postgres_source_table
- A 0
- B 1
- > SELECT * FROM mysql_source_table;
- A 0
- B 1
- > SELECT (before).a, (before).b, (after).a, (after).b FROM kafka_sink_source_tbl
- <null> <null> 1 2
- > SELECT * FROM webhook_source
- AAA
- $ set-max-tries max-tries=1
- $ set-regex match=\\d{{13,20}} replacement=<TIMESTAMP>
- > BEGIN
- ! DECLARE c CURSOR FOR SUBSCRIBE (SELECT a FROM t);
- contains: cannot write in read-only mode
- > ROLLBACK
- # Actual subscribes without a declare still work though
- > SUBSCRIBE (WITH a(x) AS (SELECT 'a') SELECT generate_series(1, 2), x FROM a)
- <TIMESTAMP> 1 1 a
- <TIMESTAMP> 1 2 a
- """
- )
- )
- # But the old Materialize can still run writes
- c.up({"name": "testdrive", "persistent": True})
- c.testdrive(
- dedent(
- f"""
- $ webhook-append database=materialize schema=public name=webhook_source
- CCC
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka
- key3A,key3B:value3A,value3B
- $ postgres-execute connection=postgres://postgres:postgres@postgres
- INSERT INTO postgres_source_table VALUES ('C', 2);
- $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
- $ mysql-execute name=mysql
- USE public;
- INSERT INTO mysql_source_table VALUES ('C', 2);
- > SET CLUSTER = cluster;
- > SELECT 1
- 1
- > INSERT INTO t VALUES (3, 4);
- > SET TRANSACTION_ISOLATION TO 'SERIALIZABLE';
- > SELECT * FROM mv;
- 4
- > SELECT max(b) FROM t;
- 4
- > SELECT mz_unsafe.mz_sleep(5)
- <null>
- > INSERT INTO t VALUES (5, 6);
- > SELECT * FROM mv;
- 9
- > DROP INDEX t_idx
- > CREATE INDEX t_idx2 ON t (a, b)
- > CREATE MATERIALIZED VIEW mv2 AS SELECT sum(a) FROM t;
- $ set-regex match=(s\\d+|\\d{{13}}|[ ]{{12}}0|u\\d{{1,3}}|\\(\\d+-\\d\\d-\\d\\d\\s\\d\\d:\\d\\d:\\d\\d\\.\\d\\d\\d\\)) replacement=<>
- > EXPLAIN TIMESTAMP FOR SELECT * FROM mv;
- " query timestamp: <> <>\\nlargest not in advance of upper: <> <>\\n upper:[<> <>]\\n since:[<> <>]\\n can respond immediately: true\\n timeline: Some(EpochMilliseconds)\\n session wall time: <> <>\\n\\nsource materialize.public.mv (<>, storage):\\n read frontier:[<> <>]\\n write frontier:[<> <>]\\n\\nbinding constraints:\\nlower:\\n (StorageInput([User(6)])): [<> <>]\\n"
- > SELECT * FROM kafka_source_tbl
- key1A key1B value1A value1B
- key2A key2B value2A value2B
- key3A key3B value3A value3B
- > SELECT * FROM postgres_source_table
- A 0
- B 1
- C 2
- > SELECT * FROM mysql_source_table;
- A 0
- B 1
- C 2
- > SELECT (before).a, (before).b, (after).a, (after).b FROM kafka_sink_source_tbl
- <null> <null> 1 2
- <null> <null> 3 4
- <null> <null> 5 6
- > SELECT * FROM webhook_source
- AAA
- CCC
- $ set-max-tries max-tries=1
- $ set-regex match=\\d{{13,20}} replacement=<TIMESTAMP>
- > BEGIN
- > DECLARE c CURSOR FOR SUBSCRIBE (SELECT a FROM t);
- > FETCH ALL c WITH (timeout='5s');
- <TIMESTAMP> 1 1
- <TIMESTAMP> 1 3
- <TIMESTAMP> 1 5
- > COMMIT
- """
- )
- )
- with c.override(
- Testdrive(
- materialize_url="postgres://materialize@mz_new:6875",
- materialize_url_internal="postgres://materialize@mz_new:6877",
- mz_service="mz_new",
- materialize_params={"cluster": "cluster"},
- no_reset=True,
- seed=1,
- default_timeout=DEFAULT_TIMEOUT,
- )
- ):
- c.up({"name": "testdrive", "persistent": True})
- c.testdrive(
- dedent(
- """
- $ webhook-append database=materialize schema=public name=webhook_source status=500
- DDD
- > SET CLUSTER = cluster;
- > SELECT 1
- 1
- ! INSERT INTO t VALUES (3, 4);
- contains: cannot write in read-only mode
- > SET TRANSACTION_ISOLATION TO 'SERIALIZABLE';
- > SELECT * FROM mv;
- 9
- > SELECT max(b) FROM t;
- 6
- > SELECT * FROM mv;
- 9
- > SELECT * FROM kafka_source_tbl
- key1A key1B value1A value1B
- key2A key2B value2A value2B
- key3A key3B value3A value3B
- > SELECT * FROM postgres_source_table
- A 0
- B 1
- C 2
- > SELECT * FROM mysql_source_table;
- A 0
- B 1
- C 2
- > SELECT (before).a, (before).b, (after).a, (after).b FROM kafka_sink_source_tbl
- <null> <null> 1 2
- <null> <null> 3 4
- <null> <null> 5 6
- > SELECT * FROM webhook_source
- AAA
- CCC
- $ set-max-tries max-tries=1
- $ set-regex match=\\d{13,20} replacement=<TIMESTAMP>
- > BEGIN
- ! DECLARE c CURSOR FOR SUBSCRIBE (SELECT a FROM t);
- contains: cannot write in read-only mode
- > ROLLBACK
- # Actual subscribes without a declare still work though
- > SUBSCRIBE (WITH a(x) AS (SELECT 'a') SELECT generate_series(1, 2), x FROM a)
- <TIMESTAMP> 1 1 a
- <TIMESTAMP> 1 2 a
- """
- )
- )
- c.await_mz_deployment_status(DeploymentStatus.READY_TO_PROMOTE, "mz_new")
- c.promote_mz("mz_new")
- # Give some time for Mz to restart after promotion
- for i in range(10):
- try:
- c.sql("SELECT 1", service="mz_old")
- except OperationalError as e:
- assert (
- "server closed the connection unexpectedly" in str(e)
- or "Can't create a connection to host" in str(e)
- or "Connection refused" in str(e)
- ), f"Unexpected error: {e}"
- except CommandFailureCausedUIError as e:
- # service "mz_old" is not running
- assert "running docker compose failed" in str(
- e
- ), f"Unexpected error: {e}"
- break
- time.sleep(1)
- else:
- raise RuntimeError("mz_old didn't stop running within 10 seconds")
- for i in range(10):
- try:
- c.sql("SELECT 1", service="mz_new")
- break
- except CommandFailureCausedUIError:
- pass
- except OperationalError:
- pass
- time.sleep(1)
- else:
- raise RuntimeError("mz_new didn't come up within 10 seconds")
- c.await_mz_deployment_status(DeploymentStatus.IS_LEADER, "mz_new")
- c.testdrive(
- dedent(
- f"""
- $ webhook-append database=materialize schema=public name=webhook_source
- EEE
- > SET CLUSTER = cluster;
- > SET TRANSACTION_ISOLATION TO 'SERIALIZABLE';
- > CREATE MATERIALIZED VIEW mv3 AS SELECT sum(a) FROM t;
- > SELECT * FROM mv;
- 9
- > SELECT * FROM mv2;
- 9
- > SELECT * FROM mv3;
- 9
- > SELECT max(b) FROM t;
- 6
- > INSERT INTO t VALUES (7, 8);
- > SELECT * FROM mv;
- 16
- > SELECT * FROM mv2;
- 16
- > SELECT max(b) FROM t;
- 8
- > SELECT * FROM kafka_source_tbl
- key1A key1B value1A value1B
- key2A key2B value2A value2B
- key3A key3B value3A value3B
- > SELECT * FROM postgres_source_table
- A 0
- B 1
- C 2
- > SELECT * FROM mysql_source_table;
- A 0
- B 1
- C 2
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka
- key4A,key4B:value4A,value4B
- $ postgres-execute connection=postgres://postgres:postgres@postgres
- INSERT INTO postgres_source_table VALUES ('D', 3);
- $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
- $ mysql-execute name=mysql
- USE public;
- INSERT INTO mysql_source_table VALUES ('D', 3);
- > SELECT * FROM kafka_source_tbl
- key1A key1B value1A value1B
- key2A key2B value2A value2B
- key3A key3B value3A value3B
- key4A key4B value4A value4B
- > SELECT * FROM postgres_source_table
- A 0
- B 1
- C 2
- D 3
- > SELECT * FROM mysql_source_table;
- A 0
- B 1
- C 2
- D 3
- > SELECT (before).a, (before).b, (after).a, (after).b FROM kafka_sink_source_tbl
- <null> <null> 1 2
- <null> <null> 3 4
- <null> <null> 5 6
- <null> <null> 7 8
- > SELECT * FROM webhook_source
- AAA
- CCC
- EEE
- $ set-max-tries max-tries=1
- $ set-regex match=\\d{{13,20}} replacement=<TIMESTAMP>
- > BEGIN
- > DECLARE c CURSOR FOR SUBSCRIBE (SELECT a FROM t);
- > FETCH ALL c WITH (timeout='5s');
- <TIMESTAMP> 1 1
- <TIMESTAMP> 1 3
- <TIMESTAMP> 1 5
- <TIMESTAMP> 1 7
- > COMMIT
- """
- )
- )
- def workflow_kafka_source_rehydration(c: Composition) -> None:
- """Verify Kafka source rehydration in 0dt deployment"""
- c.down(destroy_volumes=True)
- c.up(
- "zookeeper",
- "kafka",
- "schema-registry",
- "mz_old",
- {"name": "testdrive", "persistent": True},
- )
- count = 1000000
- repeats = 20
- # Make sure cluster is owned by the system so it doesn't get dropped
- # between testdrive runs.
- c.sql(
- """
- DROP CLUSTER IF EXISTS cluster CASCADE;
- CREATE CLUSTER cluster SIZE '1';
- GRANT ALL ON CLUSTER cluster TO materialize;
- ALTER SYSTEM SET cluster = cluster;
- CREATE CLUSTER cluster_singlereplica SIZE '1', REPLICATION FACTOR 1;
- GRANT ALL ON CLUSTER cluster_singlereplica TO materialize;
- """,
- service="mz_old",
- port=6877,
- user="mz_system",
- )
- start_time = time.time()
- c.testdrive(
- dedent(
- f"""
- > SET CLUSTER = cluster;
- > CREATE CONNECTION IF NOT EXISTS kafka_conn FOR KAFKA BROKER '${{testdrive.kafka-addr}}', SECURITY PROTOCOL = 'PLAINTEXT';
- > CREATE CONNECTION IF NOT EXISTS csr_conn FOR CONFLUENT SCHEMA REGISTRY URL '${{testdrive.schema-registry-url}}';
- $ kafka-create-topic topic=kafka-large
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka-large repeat={count}
- key0A,key${{kafka-ingest.iteration}}:value0A,${{kafka-ingest.iteration}}
- > CREATE SOURCE kafka_source
- IN CLUSTER cluster
- FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-kafka-large-${{testdrive.seed}}');
- > CREATE TABLE kafka_source_tbl (key1, key2, value1, value2)
- FROM SOURCE kafka_source (REFERENCE "testdrive-kafka-large-${{testdrive.seed}}")
- KEY FORMAT CSV WITH 2 COLUMNS DELIMITED BY ','
- VALUE FORMAT CSV WITH 2 COLUMNS DELIMITED BY ','
- ENVELOPE UPSERT;
- > CREATE VIEW kafka_source_cnt AS SELECT count(*) FROM kafka_source_tbl
- > CREATE DEFAULT INDEX on kafka_source_cnt
- > SELECT * FROM kafka_source_cnt
- {count}
- """
- )
- )
- for i in range(1, repeats):
- c.testdrive(
- dedent(
- f"""
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka-large repeat={count}
- key{i}A,key{i}${{kafka-ingest.iteration}}:value{i}A,${{kafka-ingest.iteration}}
- > SELECT * FROM kafka_source_cnt
- {count*(i+1)}
- """
- )
- )
- elapsed = time.time() - start_time
- print(f"initial ingestion took {elapsed} seconds")
- with c.override(
- Materialized(
- name="mz_new",
- sanity_restart=False,
- deploy_generation=1,
- system_parameter_defaults=SYSTEM_PARAMETER_DEFAULTS,
- restart="on-failure",
- external_metadata_store=True,
- default_replication_factor=2,
- ),
- Testdrive(
- materialize_url="postgres://materialize@mz_new:6875",
- materialize_url_internal="postgres://materialize@mz_new:6877",
- mz_service="mz_new",
- materialize_params={"cluster": "cluster"},
- no_reset=True,
- seed=1,
- default_timeout=DEFAULT_TIMEOUT,
- ),
- ):
- c.up("mz_new")
- start_time = time.time()
- c.await_mz_deployment_status(DeploymentStatus.READY_TO_PROMOTE, "mz_new")
- elapsed = time.time() - start_time
- print(f"re-hydration took {elapsed} seconds")
- c.promote_mz("mz_new")
- start_time = time.time()
- c.await_mz_deployment_status(
- DeploymentStatus.IS_LEADER, "mz_new", sleep_time=None
- )
- elapsed = time.time() - start_time
- print(f"promotion took {elapsed} seconds")
- start_time = time.time()
- result = c.sql_query("SELECT * FROM kafka_source_cnt", service="mz_new")
- elapsed = time.time() - start_time
- print(f"final check took {elapsed} seconds")
- assert result[0][0] == count * repeats, f"Wrong result: {result}"
- result = c.sql_query("SELECT count(*) FROM kafka_source_tbl", service="mz_new")
- assert result[0][0] == count * repeats, f"Wrong result: {result}"
- assert (
- elapsed < 3
- ), f"Took {elapsed}s to SELECT on Kafka source after 0dt upgrade, is it hydrated?"
- start_time = time.time()
- result = c.sql_query("SELECT 1", service="mz_new")
- elapsed = time.time() - start_time
- print(f"bootstrapping (checked via SELECT 1) took {elapsed} seconds")
- assert result[0][0] == 1, f"Wrong result: {result}"
- print("Ingesting again")
- for i in range(repeats, repeats * 2):
- c.testdrive(
- dedent(
- f"""
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka-large repeat={count}
- key{i}A,key{i}${{kafka-ingest.iteration}}:value{i}A,${{kafka-ingest.iteration}}
- """
- )
- )
- c.testdrive(
- dedent(
- f"""
- > SET CLUSTER = cluster;
- > SELECT * FROM kafka_source_cnt
- {2*count*repeats}
- > SELECT count(*) FROM kafka_source_tbl
- {2*count*repeats}
- """
- )
- )
- def workflow_kafka_source_rehydration_large_initial(c: Composition) -> None:
- """Verify Kafka source rehydration in 0dt deployment"""
- c.down(destroy_volumes=True)
- c.up(
- "zookeeper",
- "kafka",
- "schema-registry",
- "mz_old",
- {"name": "testdrive", "persistent": True},
- )
- count = 1000000
- repeats = 20
- # Make sure cluster is owned by the system so it doesn't get dropped
- # between testdrive runs.
- c.sql(
- """
- DROP CLUSTER IF EXISTS cluster CASCADE;
- CREATE CLUSTER cluster SIZE '1';
- GRANT ALL ON CLUSTER cluster TO materialize;
- ALTER SYSTEM SET cluster = cluster;
- CREATE CLUSTER cluster_singlereplica SIZE '1', REPLICATION FACTOR 1;
- GRANT ALL ON CLUSTER cluster_singlereplica TO materialize;
- """,
- service="mz_old",
- port=6877,
- user="mz_system",
- )
- start_time = time.time()
- c.testdrive(
- dedent(
- """
- $ kafka-create-topic topic=kafka-large
- """
- )
- )
- for i in range(repeats):
- c.testdrive(
- dedent(
- f"""
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka-large repeat={count}
- key{i}A,key{i}${{kafka-ingest.iteration}}:value{i}A,${{kafka-ingest.iteration}}
- """
- )
- )
- c.testdrive(
- dedent(
- f"""
- > SET CLUSTER = cluster;
- > CREATE CONNECTION IF NOT EXISTS kafka_conn FOR KAFKA BROKER '${{testdrive.kafka-addr}}', SECURITY PROTOCOL = 'PLAINTEXT';
- > CREATE CONNECTION IF NOT EXISTS csr_conn FOR CONFLUENT SCHEMA REGISTRY URL '${{testdrive.schema-registry-url}}';
- > CREATE SOURCE kafka_source
- IN CLUSTER cluster
- FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-kafka-large-${{testdrive.seed}}');
- > CREATE TABLE kafka_source_tbl (key1, key2, value1, value2)
- FROM SOURCE kafka_source (REFERENCE "testdrive-kafka-large-${{testdrive.seed}}")
- KEY FORMAT CSV WITH 2 COLUMNS DELIMITED BY ','
- VALUE FORMAT CSV WITH 2 COLUMNS DELIMITED BY ','
- ENVELOPE UPSERT;
- > CREATE VIEW kafka_source_cnt AS SELECT count(*) FROM kafka_source_tbl
- > CREATE DEFAULT INDEX on kafka_source_cnt
- > SELECT * FROM kafka_source_cnt
- {count*repeats}
- """
- )
- )
- elapsed = time.time() - start_time
- print(f"initial ingestion took {elapsed} seconds")
- with c.override(
- Materialized(
- name="mz_new",
- sanity_restart=False,
- deploy_generation=1,
- system_parameter_defaults=SYSTEM_PARAMETER_DEFAULTS,
- restart="on-failure",
- external_metadata_store=True,
- default_replication_factor=2,
- ),
- Testdrive(
- materialize_url="postgres://materialize@mz_new:6875",
- materialize_url_internal="postgres://materialize@mz_new:6877",
- mz_service="mz_new",
- materialize_params={"cluster": "cluster"},
- no_reset=True,
- seed=1,
- default_timeout=DEFAULT_TIMEOUT,
- ),
- ):
- c.up("mz_new")
- start_time = time.time()
- c.await_mz_deployment_status(DeploymentStatus.READY_TO_PROMOTE, "mz_new")
- elapsed = time.time() - start_time
- print(f"re-hydration took {elapsed} seconds")
- c.promote_mz("mz_new")
- start_time = time.time()
- c.await_mz_deployment_status(
- DeploymentStatus.IS_LEADER, "mz_new", sleep_time=None
- )
- elapsed = time.time() - start_time
- print(f"promotion took {elapsed} seconds")
- start_time = time.time()
- result = c.sql_query("SELECT * FROM kafka_source_cnt", service="mz_new")
- elapsed = time.time() - start_time
- print(f"final check took {elapsed} seconds")
- assert result[0][0] == count * repeats, f"Wrong result: {result}"
- result = c.sql_query("SELECT count(*) FROM kafka_source_tbl", service="mz_new")
- assert result[0][0] == count * repeats, f"Wrong result: {result}"
- assert (
- elapsed < 3
- ), f"Took {elapsed}s to SELECT on Kafka source after 0dt upgrade, is it hydrated?"
- start_time = time.time()
- result = c.sql_query("SELECT 1", service="mz_new")
- elapsed = time.time() - start_time
- print(f"bootstrapping (checked via SELECT 1) took {elapsed} seconds")
- assert result[0][0] == 1, f"Wrong result: {result}"
- print("Ingesting again")
- for i in range(repeats, repeats * 2):
- c.testdrive(
- dedent(
- f"""
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka-large repeat={count}
- key{i}A,key{i}${{kafka-ingest.iteration}}:value{i}A,${{kafka-ingest.iteration}}
- """
- )
- )
- c.testdrive(
- dedent(
- f"""
- > SET CLUSTER = cluster;
- > SELECT * FROM kafka_source_cnt
- {2*count*repeats}
- > SELECT count(*) FROM kafka_source_tbl
- {2*count*repeats}
- """
- )
- )
- def workflow_pg_source_rehydration(c: Composition) -> None:
- """Verify Postgres source rehydration in 0dt deployment"""
- c.down(destroy_volumes=True)
- c.up("postgres", "mz_old", {"name": "testdrive", "persistent": True})
- count = 1000000
- repeats = 100
- # Make sure cluster is owned by the system so it doesn't get dropped
- # between testdrive runs.
- c.sql(
- """
- DROP CLUSTER IF EXISTS cluster CASCADE;
- CREATE CLUSTER cluster SIZE '1';
- GRANT ALL ON CLUSTER cluster TO materialize;
- ALTER SYSTEM SET cluster = cluster;
- CREATE CLUSTER cluster_singlereplica SIZE '1', REPLICATION FACTOR 1;
- GRANT ALL ON CLUSTER cluster_singlereplica TO materialize;
- """,
- service="mz_old",
- port=6877,
- user="mz_system",
- )
- inserts = (
- "INSERT INTO postgres_source_table VALUES "
- + ", ".join([f"({i})" for i in range(count)])
- + ";"
- )
- start_time = time.time()
- c.testdrive(
- dedent(
- f"""
- > SET CLUSTER = cluster;
- $ postgres-execute connection=postgres://postgres:postgres@postgres
- CREATE USER postgres1 WITH SUPERUSER PASSWORD 'postgres';
- ALTER USER postgres1 WITH replication;
- DROP PUBLICATION IF EXISTS postgres_source;
- DROP TABLE IF EXISTS postgres_source_table;
- CREATE TABLE postgres_source_table (f1 INTEGER);
- ALTER TABLE postgres_source_table REPLICA IDENTITY FULL;
- {inserts}
- CREATE PUBLICATION postgres_source FOR ALL TABLES;
- > CREATE SECRET pgpass AS 'postgres';
- > CREATE CONNECTION pg FOR POSTGRES
- HOST 'postgres',
- DATABASE postgres,
- USER postgres1,
- PASSWORD SECRET pgpass;
- > CREATE SOURCE postgres_source
- IN CLUSTER cluster
- FROM POSTGRES CONNECTION pg
- (PUBLICATION 'postgres_source');
- > CREATE TABLE postgres_source_table FROM SOURCE postgres_source (REFERENCE postgres_source_table)
- > CREATE VIEW postgres_source_cnt AS SELECT count(*) FROM postgres_source_table
- > CREATE DEFAULT INDEX ON postgres_source_cnt
- > SELECT * FROM postgres_source_cnt;
- {count}
- """
- ),
- quiet=True,
- )
- for i in range(1, repeats):
- c.testdrive(
- dedent(
- f"""
- $ postgres-execute connection=postgres://postgres:postgres@postgres
- {inserts}
- > SELECT * FROM postgres_source_cnt
- {count*(i+1)}
- """
- ),
- quiet=True,
- )
- elapsed = time.time() - start_time
- print(f"initial ingestion took {elapsed} seconds")
- with c.override(
- Materialized(
- name="mz_new",
- sanity_restart=False,
- deploy_generation=1,
- system_parameter_defaults=SYSTEM_PARAMETER_DEFAULTS,
- restart="on-failure",
- external_metadata_store=True,
- default_replication_factor=2,
- ),
- Testdrive(
- materialize_url="postgres://materialize@mz_new:6875",
- materialize_url_internal="postgres://materialize@mz_new:6877",
- mz_service="mz_new",
- materialize_params={"cluster": "cluster"},
- no_reset=True,
- seed=1,
- default_timeout=DEFAULT_TIMEOUT,
- ),
- ):
- c.up("mz_new")
- start_time = time.time()
- c.await_mz_deployment_status(DeploymentStatus.READY_TO_PROMOTE, "mz_new")
- elapsed = time.time() - start_time
- print(f"re-hydration took {elapsed} seconds")
- c.promote_mz("mz_new")
- start_time = time.time()
- c.await_mz_deployment_status(
- DeploymentStatus.IS_LEADER, "mz_new", sleep_time=None
- )
- elapsed = time.time() - start_time
- print(f"promotion took {elapsed} seconds")
- start_time = time.time()
- result = c.sql_query("SELECT * FROM postgres_source_cnt", service="mz_new")
- elapsed = time.time() - start_time
- print(f"final check took {elapsed} seconds")
- assert result[0][0] == count * repeats, f"Wrong result: {result}"
- assert (
- elapsed < 4
- ), f"Took {elapsed}s to SELECT on Postgres source after 0dt upgrade, is it hydrated?"
- result = c.sql_query(
- "SELECT count(*) FROM postgres_source_table", service="mz_new"
- )
- assert result[0][0] == count * repeats, f"Wrong result: {result}"
- print("Ingesting again")
- for i in range(repeats, repeats * 2):
- c.testdrive(
- dedent(
- f"""
- $ postgres-execute connection=postgres://postgres:postgres@postgres
- {inserts}
- > SELECT * FROM postgres_source_cnt
- {count*(i+1)}
- """
- ),
- quiet=True,
- )
- result = c.sql_query("SELECT * FROM postgres_source_cnt", service="mz_new")
- assert result[0][0] == 2 * count * repeats, f"Wrong result: {result}"
- result = c.sql_query(
- "SELECT count(*) FROM postgres_source_table", service="mz_new"
- )
- assert result[0][0] == 2 * count * repeats, f"Wrong result: {result}"
- def workflow_mysql_source_rehydration(c: Composition) -> None:
- """Verify Postgres source rehydration in 0dt deployment"""
- c.down(destroy_volumes=True)
- c.up("mysql", "mz_old", {"name": "testdrive", "persistent": True})
- count = 1000000
- repeats = 100
- # Make sure cluster is owned by the system so it doesn't get dropped
- # between testdrive runs.
- c.sql(
- """
- DROP CLUSTER IF EXISTS cluster CASCADE;
- CREATE CLUSTER cluster SIZE '1';
- GRANT ALL ON CLUSTER cluster TO materialize;
- ALTER SYSTEM SET cluster = cluster;
- CREATE CLUSTER cluster_singlereplica SIZE '1', REPLICATION FACTOR 1;
- GRANT ALL ON CLUSTER cluster_singlereplica TO materialize;
- """,
- service="mz_old",
- port=6877,
- user="mz_system",
- )
- inserts = (
- "INSERT INTO mysql_source_table VALUES "
- + ", ".join([f"({i})" for i in range(count)])
- + ";"
- )
- start_time = time.time()
- c.testdrive(
- dedent(
- f"""
- > SET CLUSTER = cluster;
- $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
- $ mysql-execute name=mysql
- # create the database if it does not exist yet but do not drop it
- CREATE DATABASE IF NOT EXISTS public;
- USE public;
- CREATE USER mysql1 IDENTIFIED BY 'mysql';
- GRANT REPLICATION SLAVE ON *.* TO mysql1;
- GRANT ALL ON public.* TO mysql1;
- CREATE TABLE mysql_source_table (f1 INTEGER);
- {inserts}
- > CREATE SECRET mysqlpass AS 'mysql';
- > CREATE CONNECTION mysql TO MYSQL (
- HOST 'mysql',
- USER mysql1,
- PASSWORD SECRET mysqlpass);
- > CREATE SOURCE mysql_source
- IN CLUSTER cluster
- FROM MYSQL CONNECTION mysql;
- > CREATE TABLE mysql_source_table FROM SOURCE mysql_source (REFERENCE public.mysql_source_table);
- > CREATE VIEW mysql_source_cnt AS SELECT count(*) FROM mysql_source_table
- > CREATE DEFAULT INDEX ON mysql_source_cnt
- > SELECT * FROM mysql_source_cnt;
- {count}
- """
- ),
- quiet=True,
- )
- for i in range(1, repeats):
- c.testdrive(
- dedent(
- f"""
- $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
- $ mysql-execute name=mysql
- USE public;
- {inserts}
- > SELECT * FROM mysql_source_cnt;
- {count*(i+1)}
- """
- ),
- quiet=True,
- )
- elapsed = time.time() - start_time
- print(f"initial ingestion took {elapsed} seconds")
- with c.override(
- Materialized(
- name="mz_new",
- sanity_restart=False,
- deploy_generation=1,
- system_parameter_defaults=SYSTEM_PARAMETER_DEFAULTS,
- restart="on-failure",
- external_metadata_store=True,
- default_replication_factor=2,
- ),
- Testdrive(
- materialize_url="postgres://materialize@mz_new:6875",
- materialize_url_internal="postgres://materialize@mz_new:6877",
- mz_service="mz_new",
- materialize_params={"cluster": "cluster"},
- no_reset=True,
- seed=1,
- default_timeout=DEFAULT_TIMEOUT,
- ),
- ):
- c.up("mz_new")
- start_time = time.time()
- c.await_mz_deployment_status(DeploymentStatus.READY_TO_PROMOTE, "mz_new")
- elapsed = time.time() - start_time
- print(f"re-hydration took {elapsed} seconds")
- c.promote_mz("mz_new")
- start_time = time.time()
- c.await_mz_deployment_status(
- DeploymentStatus.IS_LEADER, "mz_new", sleep_time=None
- )
- elapsed = time.time() - start_time
- print(f"promotion took {elapsed} seconds")
- start_time = time.time()
- result = c.sql_query("SELECT * FROM mysql_source_cnt", service="mz_new")
- elapsed = time.time() - start_time
- print(f"final check took {elapsed} seconds")
- assert result[0][0] == count * repeats, f"Wrong result: {result}"
- assert (
- elapsed < 4
- ), f"Took {elapsed}s to SELECT on MySQL source after 0dt upgrade, is it hydrated?"
- result = c.sql_query(
- "SELECT count(*) FROM mysql_source_table", service="mz_new"
- )
- assert result[0][0] == count * repeats, f"Wrong result: {result}"
- print("Ingesting again")
- for i in range(repeats, repeats * 2):
- c.testdrive(
- dedent(
- f"""
- $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
- $ mysql-execute name=mysql
- USE public;
- {inserts}
- > SELECT * FROM mysql_source_cnt;
- {count*(i+1)}
- """
- ),
- quiet=True,
- )
- result = c.sql_query("SELECT * FROM mysql_source_cnt", service="mz_new")
- assert result[0][0] == 2 * count * repeats, f"Wrong result: {result}"
- result = c.sql_query(
- "SELECT count(*) FROM mysql_source_table", service="mz_new"
- )
- assert result[0][0] == 2 * count * repeats, f"Wrong result: {result}"
- def workflow_kafka_source_failpoint(c: Composition) -> None:
- """Verify that source status updates of the newly deployed environment take
- precedent over older source status updates when promoted.
- The original Materialized instance (mz_old) is started with a failpoint
- that simulates a failure during state multi-put. After creating a Kafka
- source, we promote a new deployment (mz_new) and verify that the source
- status in mz_source_statuses is marked as 'running', indicating that the
- source has rehydrated correctly despite the injected failure."""
- c.down(destroy_volumes=True)
- # Start the required services.
- c.up(
- "zookeeper",
- "kafka",
- "schema-registry",
- {"name": "testdrive", "persistent": True},
- )
- # Start the original Materialized instance with the failpoint enabled.
- with c.override(
- Materialized(
- name="mz_old",
- sanity_restart=False,
- deploy_generation=0,
- system_parameter_defaults=SYSTEM_PARAMETER_DEFAULTS,
- external_metadata_store=True,
- environment_extra=["FAILPOINTS=fail_state_multi_put=return"],
- default_replication_factor=2,
- )
- ):
- c.up("mz_old")
- # Make sure cluster is owned by the system so it doesn't get dropped
- # between testdrive runs.
- c.sql(
- dedent(
- """
- DROP CLUSTER IF EXISTS cluster CASCADE;
- CREATE CLUSTER cluster SIZE '1';
- GRANT ALL ON CLUSTER cluster TO materialize;
- ALTER SYSTEM SET cluster = cluster;
- CREATE CLUSTER cluster_singlereplica SIZE '1', REPLICATION FACTOR 1;
- GRANT ALL ON CLUSTER cluster_singlereplica TO materialize;
- """
- ),
- service="mz_old",
- port=6877,
- user="mz_system",
- )
- c.testdrive(
- dedent(
- """
- > SET CLUSTER = cluster;
- > CREATE CONNECTION IF NOT EXISTS kafka_conn FOR KAFKA BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL = 'PLAINTEXT';
- $ kafka-create-topic topic=kafka-fp
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka-fp
- keyA,keyA:valA,valA
- > CREATE SOURCE kafka_source_fp
- IN CLUSTER cluster
- FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-kafka-fp-${testdrive.seed}');
- > CREATE TABLE kafka_source_tbl (key1, key2, value1, value2)
- FROM SOURCE kafka_source_fp (REFERENCE "testdrive-kafka-fp-${testdrive.seed}")
- KEY FORMAT CSV WITH 2 COLUMNS DELIMITED BY ','
- VALUE FORMAT CSV WITH 2 COLUMNS DELIMITED BY ','
- ENVELOPE UPSERT;
- > SELECT status FROM mz_internal.mz_source_statuses WHERE name = 'kafka_source_fp';
- stalled
- """
- )
- )
- with c.override(
- Materialized(
- name="mz_new",
- sanity_restart=False,
- deploy_generation=1,
- system_parameter_defaults=SYSTEM_PARAMETER_DEFAULTS,
- restart="on-failure",
- external_metadata_store=True,
- default_replication_factor=2,
- ),
- Testdrive(
- materialize_url="postgres://materialize@mz_new:6875",
- materialize_url_internal="postgres://materialize@mz_new:6877",
- mz_service="mz_new",
- materialize_params={"cluster": "cluster"},
- no_reset=True,
- seed=1,
- default_timeout=DEFAULT_TIMEOUT,
- ),
- ):
- c.up("mz_new")
- c.await_mz_deployment_status(DeploymentStatus.READY_TO_PROMOTE, "mz_new")
- c.promote_mz("mz_new")
- c.await_mz_deployment_status(
- DeploymentStatus.IS_LEADER, "mz_new", sleep_time=None
- )
- # Verify that the Kafka source's status is marked as "running" in mz_source_statuses.
- c.testdrive(
- dedent(
- """
- > SELECT status FROM mz_internal.mz_source_statuses WHERE name = 'kafka_source_fp';
- running
- """
- )
- )
- def fetch_reconciliation_metrics(c: Composition, process: str) -> tuple[int, int]:
- # TODO: Replace me with mz_internal.mz_cluster_replica_ports when it exists
- internal_http = c.exec(
- process,
- "bash",
- "-c",
- 'ps aux | grep -v grep | grep "cluster_id=s2" | sed -e "s#.* --internal-http-listen-addr=\\([^ ]*\\) .*#\\1#"',
- capture=True,
- ).stdout.strip()
- metrics = c.exec(
- process,
- "curl",
- "--silent",
- "--unix-socket",
- internal_http,
- "localhost/metrics",
- capture=True,
- ).stdout
- reused = 0
- replaced = 0
- for metric in metrics.splitlines():
- if metric.startswith("mz_compute_reconciliation_reused_dataflows_count_total"):
- reused += int(metric.split()[1])
- elif metric.startswith(
- "mz_compute_reconciliation_replaced_dataflows_count_total"
- ):
- replaced += int(metric.split()[1])
- return reused, replaced
- def workflow_builtin_item_migrations(c: Composition) -> None:
- """Verify builtin item migrations"""
- c.down(destroy_volumes=True)
- c.up("mz_old")
- c.sql(
- "CREATE MATERIALIZED VIEW mv AS SELECT name FROM mz_tables;",
- service="mz_old",
- port=6877,
- user="mz_system",
- )
- mz_tables_gid = c.sql_query(
- "SELECT id FROM mz_tables WHERE name = 'mz_tables'",
- service="mz_old",
- )[0][0]
- mv_gid = c.sql_query(
- "SELECT id FROM mz_materialized_views WHERE name = 'mv'",
- service="mz_old",
- )[0][0]
- mz_tables_shard_id = c.sql_query(
- f"SELECT shard_id FROM mz_internal.mz_storage_shards WHERE object_id = '{mz_tables_gid}'",
- service="mz_old",
- )[0][0]
- mv_shard_id = c.sql_query(
- f"SELECT shard_id FROM mz_internal.mz_storage_shards WHERE object_id = '{mv_gid}'",
- service="mz_old",
- )[0][0]
- with c.override(
- Materialized(
- name="mz_new",
- sanity_restart=False,
- deploy_generation=1,
- system_parameter_defaults=SYSTEM_PARAMETER_DEFAULTS,
- restart="on-failure",
- external_metadata_store=True,
- force_migrations="all",
- healthcheck=LEADER_STATUS_HEALTHCHECK,
- default_replication_factor=2,
- ),
- ):
- c.up("mz_new")
- new_mz_tables_gid = c.sql_query(
- "SELECT id FROM mz_tables WHERE name = 'mz_tables'",
- service="mz_new",
- )[0][0]
- new_mv_gid = c.sql_query(
- "SELECT id FROM mz_materialized_views WHERE name = 'mv'",
- service="mz_new",
- )[0][0]
- assert new_mz_tables_gid == mz_tables_gid
- assert new_mv_gid == mv_gid
- # mz_internal.mz_storage_shards won't update until this instance becomes the leader
- c.await_mz_deployment_status(DeploymentStatus.READY_TO_PROMOTE, "mz_new")
- c.promote_mz("mz_new")
- c.await_mz_deployment_status(DeploymentStatus.IS_LEADER, "mz_new")
- new_mz_tables_gid = c.sql_query(
- "SELECT id FROM mz_tables WHERE name = 'mz_tables'",
- service="mz_new",
- reuse_connection=False,
- )[0][0]
- new_mv_gid = c.sql_query(
- "SELECT id FROM mz_materialized_views WHERE name = 'mv'",
- service="mz_new",
- reuse_connection=False,
- )[0][0]
- assert new_mz_tables_gid == mz_tables_gid
- assert new_mv_gid == mv_gid
- new_mz_tables_shard_id = c.sql_query(
- f"SELECT shard_id FROM mz_internal.mz_storage_shards WHERE object_id = '{mz_tables_gid}'",
- service="mz_new",
- reuse_connection=False,
- )[0][0]
- new_mv_shard_id = c.sql_query(
- f"SELECT shard_id FROM mz_internal.mz_storage_shards WHERE object_id = '{mv_gid}'",
- service="mz_new",
- reuse_connection=False,
- )[0][0]
- assert new_mz_tables_shard_id != mz_tables_shard_id
- assert new_mv_shard_id == mv_shard_id
- reused, replaced = fetch_reconciliation_metrics(c, "mz_new")
- assert reused > 0
- assert (
- replaced == 0
- ), f"{replaced} dataflows have been replaced, expected all to be reused"
- def workflow_materialized_view_correction_pruning(c: Composition) -> None:
- """
- Verify that the MV sink consolidates away the snapshot updates in read-only
- mode.
- """
- c.down(destroy_volumes=True)
- c.up("mz_old")
- c.sql(
- "ALTER SYSTEM SET unsafe_enable_unorchestrated_cluster_replicas = true;",
- service="mz_old",
- port=6877,
- user="mz_system",
- )
- c.sql(
- """
- CREATE TABLE t (a int);
- INSERT INTO t SELECT generate_series(1, 1000);
- CREATE MATERIALIZED VIEW mv AS SELECT * FROM t;
- SELECT * FROM mv LIMIT 1;
- """,
- service="mz_old",
- )
- c.up("mz_new")
- c.sql("SELECT * FROM mv LIMIT 1", service="mz_new")
- def get_clusterd_internal_http_address():
- logs = c.invoke("logs", "mz_new", capture=True).stdout
- for line in logs.splitlines():
- # quickstart must be u1 since it's the only non-system cluster
- if (
- "cluster-u1-replica-u1-gen-1" in line
- and "mz_clusterd: serving internal HTTP server on" in line
- ):
- return line.split(" ")[-1]
- raise RuntimeError("No HTTP endpoint for quickstart clusterd found in logs")
- def get_correction_metrics():
- address = get_clusterd_internal_http_address()
- resp = c.exec(
- "mz_new",
- "curl",
- "--unix-socket",
- address,
- "http:/prof/metrics",
- capture=True,
- ).stdout
- metrics = {}
- for line in resp.splitlines():
- key, value = line.split(maxsplit=1)
- metrics[key] = value
- insertions = int(metrics["mz_persist_sink_correction_insertions_total"])
- deletions = int(metrics["mz_persist_sink_correction_deletions_total"])
- return (insertions, deletions)
- insertions = None
- deletions = None
- # The correction buffer should stabilize in a state where it has seen 2000
- # insertions (positive + negative updates), and as many deletions. The
- # absolute amount of records in the correction buffer should be zero.
- for _ in range(10):
- time.sleep(1)
- insertions, deletions = get_correction_metrics()
- if insertions > 1000 and insertions - deletions == 0:
- break
- else:
- raise AssertionError(
- f"unexpected correction metrics: {insertions=}, {deletions=}"
- )
- def workflow_upsert_sources(c: Composition) -> None:
- c.down(destroy_volumes=True)
- c.up(
- "zookeeper",
- "kafka",
- "schema-registry",
- "postgres",
- "mysql",
- "mz_old",
- {"name": "testdrive", "persistent": True},
- )
- num_threads = 50
- c.sql(
- f"""
- DROP CLUSTER IF EXISTS cluster CASCADE;
- CREATE CLUSTER cluster SIZE '2-1';
- GRANT ALL ON CLUSTER cluster TO materialize;
- ALTER SYSTEM SET cluster = cluster;
- CREATE CLUSTER cluster_singlereplica SIZE '1', REPLICATION FACTOR 1;
- GRANT ALL ON CLUSTER cluster_singlereplica TO materialize;
- ALTER SYSTEM SET max_sources = {num_threads * 2};
- ALTER SYSTEM SET max_materialized_views = {num_threads * 2};
- """,
- service="mz_old",
- port=6877,
- user="mz_system",
- )
- c.testdrive(
- dedent(
- """
- > SET CLUSTER = cluster;
- > CREATE CONNECTION IF NOT EXISTS kafka_conn FOR KAFKA BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL = 'PLAINTEXT';
- > CREATE CONNECTION IF NOT EXISTS csr_conn FOR CONFLUENT SCHEMA REGISTRY URL '${testdrive.schema-registry-url}';
- """
- )
- )
- end_time = datetime.now() + timedelta(seconds=200)
- mz1 = "mz_old"
- mz2 = "mz_new"
- def worker(i: int) -> None:
- c.testdrive(
- dedent(
- f"""
- $ kafka-create-topic topic=kafka{i}
- > CREATE SOURCE kafka_source{i}
- IN CLUSTER cluster
- FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-kafka{i}-${{testdrive.seed}}');
- > CREATE TABLE kafka_source_tbl{i} (key1, key2, value1, value2)
- FROM SOURCE kafka_source{i} (REFERENCE "testdrive-kafka{i}-${{testdrive.seed}}")
- KEY FORMAT CSV WITH 2 COLUMNS DELIMITED BY ','
- VALUE FORMAT CSV WITH 2 COLUMNS DELIMITED BY ','
- ENVELOPE UPSERT;
- > CREATE DEFAULT INDEX ON kafka_source_tbl{i}
- > CREATE MATERIALIZED VIEW mv{i} AS SELECT * FROM kafka_source_tbl{i}
- """
- )
- )
- while datetime.now() < end_time:
- try:
- c.testdrive(
- dedent(
- f"""
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka{i} repeat=10000
- key1A,key1B:value1A,value1B
- """
- )
- )
- except:
- pass
- threads = []
- for i in range(num_threads):
- thread = Thread(name=f"worker_{i}", target=worker, args=(i,))
- threads.append(thread)
- for thread in threads:
- thread.start()
- i = 1
- while datetime.now() < end_time:
- with c.override(
- Materialized(
- name=mz2,
- sanity_restart=False,
- deploy_generation=i,
- system_parameter_defaults=SYSTEM_PARAMETER_DEFAULTS,
- restart="on-failure",
- external_metadata_store=True,
- default_replication_factor=2,
- ),
- Testdrive(
- materialize_url=f"postgres://materialize@{mz1}:6875",
- materialize_url_internal=f"postgres://materialize@{mz1}:6877",
- mz_service=mz1,
- materialize_params={"cluster": "cluster"},
- no_consistency_checks=True,
- no_reset=True,
- seed=1,
- default_timeout=DEFAULT_TIMEOUT,
- ),
- ):
- c.up(mz2)
- c.await_mz_deployment_status(DeploymentStatus.READY_TO_PROMOTE, mz2)
- c.promote_mz(mz2)
- c.await_mz_deployment_status(DeploymentStatus.IS_LEADER, mz2)
- i += 1
- mz1, mz2 = mz2, mz1
- for thread in threads:
- thread.join()
- def workflow_ddl(c: Composition) -> None:
- """Verify basic 0dt deployment flow with DDLs running during the 0dt deployment."""
- c.down(destroy_volumes=True)
- c.up(
- "zookeeper",
- "kafka",
- "schema-registry",
- "postgres",
- "mysql",
- "mz_old",
- {"name": "testdrive", "persistent": True},
- )
- # Make sure cluster is owned by the system so it doesn't get dropped
- # between testdrive runs.
- c.sql(
- """
- DROP CLUSTER IF EXISTS cluster CASCADE;
- CREATE CLUSTER cluster SIZE '2-1';
- GRANT ALL ON CLUSTER cluster TO materialize;
- ALTER SYSTEM SET cluster = cluster;
- CREATE CLUSTER cluster_singlereplica SIZE '1', REPLICATION FACTOR 1;
- GRANT ALL ON CLUSTER cluster_singlereplica TO materialize;
- """,
- service="mz_old",
- port=6877,
- user="mz_system",
- )
- # Inserts should be reflected when writes are allowed.
- c.testdrive(
- dedent(
- f"""
- > SET CLUSTER = cluster;
- > CREATE TABLE t (a int, b int);
- > CREATE CONNECTION IF NOT EXISTS kafka_conn FOR KAFKA BROKER '${{testdrive.kafka-addr}}', SECURITY PROTOCOL = 'PLAINTEXT';
- > CREATE CONNECTION IF NOT EXISTS csr_conn FOR CONFLUENT SCHEMA REGISTRY URL '${{testdrive.schema-registry-url}}';
- > CREATE SINK kafka_sink
- IN CLUSTER cluster
- FROM t
- INTO KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-kafka-sink-${{testdrive.seed}}')
- FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
- ENVELOPE DEBEZIUM;
- > INSERT INTO t VALUES (1, 2);
- > CREATE INDEX t_idx ON t (a, b);
- > CREATE MATERIALIZED VIEW mv AS SELECT sum(a) FROM t;
- > SET TRANSACTION_ISOLATION TO 'SERIALIZABLE';
- > SELECT * FROM mv;
- 1
- > SELECT max(b) FROM t;
- 2
- $ kafka-create-topic topic=kafka
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka
- key1A,key1B:value1A,value1B
- > CREATE SOURCE kafka_source
- IN CLUSTER cluster
- FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-kafka-${{testdrive.seed}}');
- > CREATE TABLE kafka_source_tbl (key1, key2, value1, value2)
- FROM SOURCE kafka_source (REFERENCE "testdrive-kafka-${{testdrive.seed}}")
- KEY FORMAT CSV WITH 2 COLUMNS DELIMITED BY ','
- VALUE FORMAT CSV WITH 2 COLUMNS DELIMITED BY ','
- ENVELOPE UPSERT;
- > SELECT * FROM kafka_source_tbl
- key1A key1B value1A value1B
- $ postgres-execute connection=postgres://postgres:postgres@postgres
- CREATE USER postgres1 WITH SUPERUSER PASSWORD 'postgres';
- ALTER USER postgres1 WITH replication;
- DROP PUBLICATION IF EXISTS postgres_source;
- DROP TABLE IF EXISTS postgres_source_table;
- CREATE TABLE postgres_source_table (f1 TEXT, f2 INTEGER);
- ALTER TABLE postgres_source_table REPLICA IDENTITY FULL;
- INSERT INTO postgres_source_table SELECT 'A', 0;
- CREATE PUBLICATION postgres_source FOR ALL TABLES;
- > CREATE SECRET pgpass AS 'postgres';
- > CREATE CONNECTION pg FOR POSTGRES
- HOST 'postgres',
- DATABASE postgres,
- USER postgres1,
- PASSWORD SECRET pgpass;
- > CREATE SOURCE postgres_source
- IN CLUSTER cluster
- FROM POSTGRES CONNECTION pg
- (PUBLICATION 'postgres_source');
- > CREATE TABLE postgres_source_table FROM SOURCE postgres_source (REFERENCE postgres_source_table)
- > SELECT * FROM postgres_source_table;
- A 0
- $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
- $ mysql-execute name=mysql
- # create the database if it does not exist yet but do not drop it
- CREATE DATABASE IF NOT EXISTS public;
- USE public;
- CREATE USER mysql1 IDENTIFIED BY 'mysql';
- GRANT REPLICATION SLAVE ON *.* TO mysql1;
- GRANT ALL ON public.* TO mysql1;
- CREATE TABLE mysql_source_table (f1 VARCHAR(32), f2 INTEGER);
- INSERT INTO mysql_source_table VALUES ('A', 0);
- > CREATE SECRET mysqlpass AS 'mysql';
- > CREATE CONNECTION mysql TO MYSQL (
- HOST 'mysql',
- USER mysql1,
- PASSWORD SECRET mysqlpass);
- > CREATE SOURCE mysql_source1
- IN CLUSTER cluster
- FROM MYSQL CONNECTION mysql;
- > CREATE TABLE mysql_source_table FROM SOURCE mysql_source1 (REFERENCE public.mysql_source_table);
- > SELECT * FROM mysql_source_table;
- A 0
- $ kafka-verify-topic sink=materialize.public.kafka_sink
- > CREATE SOURCE kafka_sink_source
- IN CLUSTER cluster
- FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-kafka-sink-${{testdrive.seed}}')
- > CREATE TABLE kafka_sink_source_tbl FROM SOURCE kafka_sink_source (REFERENCE "testdrive-kafka-sink-${{testdrive.seed}}")
- FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
- ENVELOPE NONE
- > SELECT (before).a, (before).b, (after).a, (after).b FROM kafka_sink_source_tbl
- <null> <null> 1 2
- > CREATE SOURCE webhook_source
- IN CLUSTER cluster_singlereplica
- FROM WEBHOOK BODY FORMAT TEXT
- $ webhook-append database=materialize schema=public name=webhook_source
- AAA
- > SELECT * FROM webhook_source
- AAA
- $ set-max-tries max-tries=1
- $ set-regex match=\\d{{13,20}} replacement=<TIMESTAMP>
- > BEGIN
- > DECLARE c CURSOR FOR SUBSCRIBE (SELECT a FROM t);
- > FETCH ALL c WITH (timeout='5s');
- <TIMESTAMP> 1 1
- > COMMIT
- """
- )
- )
- # Start new Materialize in a new deploy generation, which will cause
- # Materialize to boot in read-only mode.
- c.up("mz_new")
- # Verify against new Materialize that it is in read-only mode
- with c.override(
- Testdrive(
- materialize_url="postgres://materialize@mz_new:6875",
- materialize_url_internal="postgres://materialize@mz_new:6877",
- mz_service="mz_new",
- materialize_params={"cluster": "cluster"},
- no_reset=True,
- seed=1,
- default_timeout=DEFAULT_TIMEOUT,
- )
- ):
- c.up({"name": "testdrive", "persistent": True})
- c.testdrive(
- dedent(
- f"""
- $ webhook-append database=materialize schema=public name=webhook_source status=500
- BBB
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka
- key2A,key2B:value2A,value2B
- $ postgres-execute connection=postgres://postgres:postgres@postgres
- INSERT INTO postgres_source_table VALUES ('B', 1);
- $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
- $ mysql-execute name=mysql
- USE public;
- INSERT INTO mysql_source_table VALUES ('B', 1);
- > SET CLUSTER = cluster;
- > SELECT 1
- 1
- ! INSERT INTO t VALUES (3, 4);
- contains: cannot write in read-only mode
- > SET TRANSACTION_ISOLATION TO 'SERIALIZABLE';
- > SELECT * FROM mv;
- 1
- # TODO: Currently hangs
- # > SELECT max(b) FROM t;
- # 2
- > SELECT mz_unsafe.mz_sleep(5)
- <null>
- ! INSERT INTO t VALUES (5, 6);
- contains: cannot write in read-only mode
- > SELECT * FROM mv;
- 1
- ! DROP INDEX t_idx
- contains: cannot write in read-only mode
- ! CREATE INDEX t_idx2 ON t (a, b)
- contains: cannot write in read-only mode
- ! CREATE MATERIALIZED VIEW mv2 AS SELECT sum(a) FROM t;
- contains: cannot write in read-only mode
- $ set-regex match=(s\\d+|\\d{{13}}|[ ]{{12}}0|u\\d{{1,3}}|\\(\\d+-\\d\\d-\\d\\d\\s\\d\\d:\\d\\d:\\d\\d\\.\\d\\d\\d\\)) replacement=<>
- > EXPLAIN TIMESTAMP FOR SELECT * FROM mv;
- " query timestamp: <> <>\\nlargest not in advance of upper: <> <>\\n upper:[<> <>]\\n since:[<> <>]\\n can respond immediately: true\\n timeline: Some(EpochMilliseconds)\\n session wall time: <> <>\\n\\nsource materialize.public.mv (<>, storage):\\n read frontier:[<> <>]\\n write frontier:[<> <>]\\n\\nbinding constraints:\\nlower:\\n (StorageInput([User(6)])): [<> <>]\\n"
- > SELECT * FROM kafka_source_tbl
- key1A key1B value1A value1B
- key2A key2B value2A value2B
- > SELECT * FROM postgres_source_table
- A 0
- B 1
- > SELECT * FROM mysql_source_table;
- A 0
- B 1
- > SELECT (before).a, (before).b, (after).a, (after).b FROM kafka_sink_source_tbl
- <null> <null> 1 2
- > SELECT * FROM webhook_source
- AAA
- $ set-max-tries max-tries=1
- $ set-regex match=\\d{{13,20}} replacement=<TIMESTAMP>
- > BEGIN
- ! DECLARE c CURSOR FOR SUBSCRIBE (SELECT a FROM t);
- contains: cannot write in read-only mode
- > ROLLBACK
- # Actual subscribes without a declare still work though
- > SUBSCRIBE (WITH a(x) AS (SELECT 'a') SELECT generate_series(1, 2), x FROM a)
- <TIMESTAMP> 1 1 a
- <TIMESTAMP> 1 2 a
- """
- )
- )
- # Run DDLs against the old Materialize, which should restart the new one
- c.up({"name": "testdrive", "persistent": True})
- c.testdrive(
- dedent(
- f"""
- > CREATE TABLE t1 (a INT);
- $ webhook-append database=materialize schema=public name=webhook_source
- CCC
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka
- key3A,key3B:value3A,value3B
- $ postgres-execute connection=postgres://postgres:postgres@postgres
- INSERT INTO postgres_source_table VALUES ('C', 2);
- $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
- $ mysql-execute name=mysql
- USE public;
- INSERT INTO mysql_source_table VALUES ('C', 2);
- > CREATE TABLE t2 (a INT);
- > SET CLUSTER = cluster;
- > SELECT 1
- 1
- > INSERT INTO t VALUES (3, 4);
- > SET TRANSACTION_ISOLATION TO 'SERIALIZABLE';
- > SELECT * FROM mv;
- 4
- > SELECT max(b) FROM t;
- 4
- > SELECT mz_unsafe.mz_sleep(5)
- <null>
- > INSERT INTO t VALUES (5, 6);
- > SELECT * FROM mv;
- 9
- > DROP INDEX t_idx
- > CREATE INDEX t_idx2 ON t (a, b)
- > CREATE MATERIALIZED VIEW mv2 AS SELECT sum(a) FROM t;
- $ set-regex match=(s\\d+|\\d{{13}}|[ ]{{12}}0|u\\d{{1,3}}|\\(\\d+-\\d\\d-\\d\\d\\s\\d\\d:\\d\\d:\\d\\d\\.\\d\\d\\d\\)) replacement=<>
- > EXPLAIN TIMESTAMP FOR SELECT * FROM mv;
- " query timestamp: <> <>\\nlargest not in advance of upper: <> <>\\n upper:[<> <>]\\n since:[<> <>]\\n can respond immediately: true\\n timeline: Some(EpochMilliseconds)\\n session wall time: <> <>\\n\\nsource materialize.public.mv (<>, storage):\\n read frontier:[<> <>]\\n write frontier:[<> <>]\\n\\nbinding constraints:\\nlower:\\n (StorageInput([User(6)])): [<> <>]\\n"
- > SELECT * FROM kafka_source_tbl
- key1A key1B value1A value1B
- key2A key2B value2A value2B
- key3A key3B value3A value3B
- > SELECT * FROM postgres_source_table
- A 0
- B 1
- C 2
- > SELECT * FROM mysql_source_table;
- A 0
- B 1
- C 2
- > SELECT (before).a, (before).b, (after).a, (after).b FROM kafka_sink_source_tbl
- <null> <null> 1 2
- <null> <null> 3 4
- <null> <null> 5 6
- > SELECT * FROM webhook_source
- AAA
- CCC
- > CREATE TABLE t3 (a INT);
- $ set-max-tries max-tries=1
- $ set-regex match=\\d{{13,20}} replacement=<TIMESTAMP>
- > BEGIN
- > DECLARE c CURSOR FOR SUBSCRIBE (SELECT a FROM t);
- > FETCH ALL c WITH (timeout='5s');
- <TIMESTAMP> 1 1
- <TIMESTAMP> 1 3
- <TIMESTAMP> 1 5
- > COMMIT
- > CREATE TABLE t4 (a INT);
- """
- )
- )
- with c.override(
- Testdrive(
- materialize_url="postgres://materialize@mz_new:6875",
- materialize_url_internal="postgres://materialize@mz_new:6877",
- mz_service="mz_new",
- materialize_params={"cluster": "cluster"},
- no_reset=True,
- seed=1,
- default_timeout=DEFAULT_TIMEOUT,
- )
- ):
- c.up({"name": "testdrive", "persistent": True})
- c.testdrive(
- dedent(
- """
- $ webhook-append database=materialize schema=public name=webhook_source status=500
- DDD
- > SET CLUSTER = cluster;
- > SELECT 1
- 1
- ! INSERT INTO t VALUES (3, 4);
- contains: cannot write in read-only mode
- > SET TRANSACTION_ISOLATION TO 'SERIALIZABLE';
- > SELECT * FROM mv;
- 9
- > SELECT max(b) FROM t;
- 6
- > SELECT * FROM mv;
- 9
- > SELECT * FROM kafka_source_tbl
- key1A key1B value1A value1B
- key2A key2B value2A value2B
- key3A key3B value3A value3B
- > SELECT * FROM postgres_source_table
- A 0
- B 1
- C 2
- > SELECT * FROM mysql_source_table;
- A 0
- B 1
- C 2
- > SELECT (before).a, (before).b, (after).a, (after).b FROM kafka_sink_source_tbl
- <null> <null> 1 2
- <null> <null> 3 4
- <null> <null> 5 6
- > SELECT * FROM webhook_source
- AAA
- CCC
- $ set-max-tries max-tries=1
- $ set-regex match=\\d{13,20} replacement=<TIMESTAMP>
- > BEGIN
- ! DECLARE c CURSOR FOR SUBSCRIBE (SELECT a FROM t);
- contains: cannot write in read-only mode
- > ROLLBACK
- # Actual subscribes without a declare still work though
- > SUBSCRIBE (WITH a(x) AS (SELECT 'a') SELECT generate_series(1, 2), x FROM a)
- <TIMESTAMP> 1 1 a
- <TIMESTAMP> 1 2 a
- """
- )
- )
- c.await_mz_deployment_status(DeploymentStatus.READY_TO_PROMOTE, "mz_new")
- c.promote_mz("mz_new")
- # Give some time for Mz to restart after promotion
- for i in range(10):
- try:
- c.sql("SELECT 1", service="mz_old")
- except OperationalError as e:
- assert (
- "server closed the connection unexpectedly" in str(e)
- or "Can't create a connection to host" in str(e)
- or "Connection refused" in str(e)
- ), f"Unexpected error: {e}"
- except CommandFailureCausedUIError as e:
- # service "mz_old" is not running
- assert "running docker compose failed" in str(
- e
- ), f"Unexpected error: {e}"
- break
- time.sleep(1)
- else:
- raise RuntimeError("mz_old didn't stop running within 10 seconds")
- for i in range(10):
- try:
- c.sql("SELECT 1", service="mz_new")
- break
- except CommandFailureCausedUIError:
- pass
- except OperationalError:
- pass
- time.sleep(1)
- else:
- raise RuntimeError("mz_new didn't come up within 10 seconds")
- c.await_mz_deployment_status(DeploymentStatus.IS_LEADER, "mz_new")
- c.testdrive(
- dedent(
- f"""
- $ webhook-append database=materialize schema=public name=webhook_source
- EEE
- > SET CLUSTER = cluster;
- > SET TRANSACTION_ISOLATION TO 'SERIALIZABLE';
- > CREATE MATERIALIZED VIEW mv3 AS SELECT sum(a) FROM t;
- > SELECT * FROM mv;
- 9
- > SELECT * FROM mv2;
- 9
- > SELECT * FROM mv3;
- 9
- > SELECT max(b) FROM t;
- 6
- > INSERT INTO t VALUES (7, 8);
- > SELECT * FROM mv;
- 16
- > SELECT * FROM mv2;
- 16
- > SELECT max(b) FROM t;
- 8
- > SELECT * FROM kafka_source_tbl
- key1A key1B value1A value1B
- key2A key2B value2A value2B
- key3A key3B value3A value3B
- > SELECT * FROM postgres_source_table
- A 0
- B 1
- C 2
- > SELECT * FROM mysql_source_table;
- A 0
- B 1
- C 2
- $ kafka-ingest format=bytes key-format=bytes key-terminator=: topic=kafka
- key4A,key4B:value4A,value4B
- $ postgres-execute connection=postgres://postgres:postgres@postgres
- INSERT INTO postgres_source_table VALUES ('D', 3);
- $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD}
- $ mysql-execute name=mysql
- USE public;
- INSERT INTO mysql_source_table VALUES ('D', 3);
- > SELECT * FROM kafka_source_tbl
- key1A key1B value1A value1B
- key2A key2B value2A value2B
- key3A key3B value3A value3B
- key4A key4B value4A value4B
- > SELECT * FROM postgres_source_table
- A 0
- B 1
- C 2
- D 3
- > SELECT * FROM mysql_source_table;
- A 0
- B 1
- C 2
- D 3
- > SELECT (before).a, (before).b, (after).a, (after).b FROM kafka_sink_source_tbl
- <null> <null> 1 2
- <null> <null> 3 4
- <null> <null> 5 6
- <null> <null> 7 8
- > SELECT * FROM webhook_source
- AAA
- CCC
- EEE
- $ set-max-tries max-tries=1
- $ set-regex match=\\d{{13,20}} replacement=<TIMESTAMP>
- > BEGIN
- > DECLARE c CURSOR FOR SUBSCRIBE (SELECT a FROM t);
- > FETCH ALL c WITH (timeout='5s');
- <TIMESTAMP> 1 1
- <TIMESTAMP> 1 3
- <TIMESTAMP> 1 5
- <TIMESTAMP> 1 7
- > COMMIT
- """
- )
- )
|