123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596 |
- # Copyright Materialize, Inc. and contributors. All rights reserved.
- #
- # Use of this software is governed by the Business Source License
- # included in the LICENSE file at the root of this repository.
- #
- # As of the Change Date specified in that file, in accordance with
- # the Business Source License, use of this software will be governed
- # by the Apache License, Version 2.0.
- """
- Test that real-time recency works w/ slow ingest of upstream data from Kafka+MySQL+Postgres
- """
- import random
- import threading
- import time
- from materialize.mzcompose.composition import Composition
- from materialize.mzcompose.services.kafka import Kafka
- from materialize.mzcompose.services.materialized import Materialized
- from materialize.mzcompose.services.mysql import MySql
- from materialize.mzcompose.services.postgres import Postgres
- from materialize.mzcompose.services.schema_registry import SchemaRegistry
- from materialize.mzcompose.services.testdrive import Testdrive
- from materialize.mzcompose.services.zookeeper import Zookeeper
- from materialize.util import PropagatingThread
- SERVICES = [
- Zookeeper(),
- Kafka(),
- SchemaRegistry(),
- MySql(),
- Postgres(),
- Materialized(),
- Testdrive(
- entrypoint_extra=[
- f"--var=mysql-root-password={MySql.DEFAULT_ROOT_PASSWORD}",
- ],
- ),
- ]
- def workflow_default(c: Composition) -> None:
- c.down(destroy_volumes=True)
- c.up("zookeeper", "kafka", "schema-registry", "postgres", "mysql", "materialized")
- seed = random.getrandbits(16)
- c.run_testdrive_files(
- "--no-reset",
- "--max-errors=1",
- f"--seed={seed}",
- "rtr/mz-setup.td",
- )
- running = True
- def query():
- with c.sql_cursor() as cursor:
- cursor.execute("SET TRANSACTION_ISOLATION = 'STRICT SERIALIZABLE'")
- cursor.execute("SET REAL_TIME_RECENCY TO TRUE")
- queries = [
- """SELECT sum(count) FROM (
- SELECT count(*) FROM table_mysql
- UNION ALL SELECT count(*) FROM table_pg
- UNION ALL SELECT count(*) FROM input_kafka)""",
- "SELECT sum FROM sum",
- ]
- thread_name = threading.current_thread().getName()
- while running:
- for query in queries:
- start_time = time.time()
- cursor.execute(query.encode())
- results = cursor.fetchone()
- assert results
- runtime = time.time() - start_time
- print(f"{thread_name}: {results[0]} ({runtime} s)")
- threads = [PropagatingThread(target=query, name=f"verify{i}") for i in range(10)]
- for thread in threads:
- thread.start()
- end_time = time.time() + 150
- while time.time() < end_time:
- # Only reaches one execution every 4 seconds currently, instead of the targetted 1 second, so probably need to parallelize this
- start_time = time.time()
- c.run_testdrive_files(
- "--no-reset",
- "--max-errors=1",
- f"--seed={seed}",
- "rtr/ingest.td",
- )
- runtime = time.time() - start_time
- print(f"ingest: {runtime} s")
- running = False
- for thread in threads:
- thread.join()
|