# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.

# Reading all that data after starting up can take longer than the default timeout.
$ set-sql-timeout duration=60s

$ set keyschema={
    "type": "record",
    "name": "Key",
    "fields": [
        {"name": "f1", "type": "string"}
    ]
  }

$ set schema={
        "type" : "record",
        "name" : "test",
        "fields" : [
            {"name":"f2", "type":"string"}
        ]
    }

> SELECT COUNT(*), COUNT(DISTINCT f1), COUNT(DISTINCT f2) FROM failpoint;
100000 100000 100000

# We expect that we have successfully read and persisted some messages before the failpoint was activated
> SELECT SUM(CAST(statistics->'topics'->'testdrive-failpoint-${testdrive.seed}'->'partitions'->'0'->'msgs' AS INT)) < 100000 FROM mz_kafka_source_statistics;
true

# Make sure that ingestion can continue

$ kafka-ingest format=avro topic=failpoint key-format=avro key-schema=${keyschema} schema=${schema} repeat=10000
{"f1": "k${kafka-ingest.iteration}"} {"f2": "k${kafka-ingest.iteration}"}

$ kafka-ingest format=avro topic=failpoint key-format=avro key-schema=${keyschema} schema=${schema} repeat=10000
{"f1": "l${kafka-ingest.iteration}"} {"f2": "l${kafka-ingest.iteration}"}

> SELECT COUNT(*), COUNT(DISTINCT f1), COUNT(DISTINCT f2) FROM failpoint;
120000 120000 120000


# Validate the output of the sink as well

> CREATE CONNECTION IF NOT EXISTS csr_conn TO CONFLUENT SCHEMA REGISTRY (
    URL '${testdrive.schema-registry-url}'
  );

> CREATE CONNECTION kafka_conn
  TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT);

> CREATE CLUSTER single_replica_cluster SIZE '${arg.default-replica-size}';

> CREATE SOURCE failpoint_from_sink
  IN CLUSTER single_replica_cluster
  FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-failpoint-sink-${testdrive.seed}')

> CREATE TABLE failpoint_from_sink_tbl FROM SOURCE failpoint_from_sink (REFERENCE "testdrive-failpoint-sink-${testdrive.seed}")
  KEY FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  VALUE FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  INCLUDE KEY AS f1
  ENVELOPE UPSERT

> SELECT COUNT(*), COUNT(DISTINCT f1), COUNT(DISTINCT after) FROM failpoint_from_sink_tbl
120000 120000 120000

# Delete some values inserted pre-restart
$ kafka-ingest format=avro topic=failpoint key-format=avro key-schema=${keyschema} schema=${schema} repeat=10000
{"f1": "a${kafka-ingest.iteration}"}

$ kafka-ingest format=avro topic=failpoint key-format=avro key-schema=${keyschema} schema=${schema} repeat=10000
{"f1": "b${kafka-ingest.iteration}"}

# And validate again
> SELECT COUNT(*), COUNT(DISTINCT f1), COUNT(DISTINCT f2) FROM failpoint;
100000 100000 100000

> SELECT COUNT(*) FROM failpoint_from_sink_tbl;
120000