rust
/
Materialize


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
							# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.

$ set-arg-default default-replica-size=1
$ set-arg-default single-replica-cluster=quickstart

# Test creating and dropping various views and sources that depend upon
# on another, and indices on those views and sources.

$ set schema={
    "type": "record",
    "name": "row",
    "fields": [
      {"name": "a", "type": "long"},
      {"name": "b", "type": "long"}
    ]
  }

$ kafka-create-topic topic=data

> CREATE CONNECTION kafka_conn
  TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT);

> CREATE CONNECTION IF NOT EXISTS csr_conn TO CONFLUENT SCHEMA REGISTRY (
    URL '${testdrive.schema-registry-url}'
  );

> CREATE SOURCE data
  IN CLUSTER ${arg.single-replica-cluster}
  FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-data-${testdrive.seed}')
  FORMAT AVRO USING SCHEMA '${schema}'

> SELECT * FROM data

> CREATE VIEW data_view as SELECT * from data

> SELECT * FROM data_view

> CREATE MATERIALIZED VIEW test1 AS
  SELECT b, sum(a) FROM data GROUP BY b

> SHOW VIEWS
name       comment
------------------
data_view  ""

> SHOW MATERIALIZED VIEWS
name    cluster     comment
---------------------------
test1   quickstart  ""

> SELECT * FROM test1
b  sum
------

$ kafka-ingest format=avro topic=data schema=${schema} timestamp=1
{"a": 1, "b": 1}
{"a": 2, "b": 1}
{"a": 3, "b": 1}
{"a": 1, "b": 2}

> SELECT * FROM test1
b  sum
------
1  6
2  1

> SHOW COLUMNS FROM test1
name nullable type     comment
------------------------------
b     false   bigint   ""
sum   false   numeric  ""

> SHOW VIEWS LIKE '%data%'
data_view  ""

# Materialized view can be built on a not-materialized view.
> CREATE MATERIALIZED VIEW test2 AS
  SELECT b, 1 + sum(a + 1) FROM data_view GROUP BY b

> SELECT * FROM test2
b  ?column?
-----------
1  10
2  3

# Materialize data_view.
> CREATE DEFAULT INDEX ON data_view

> SELECT * FROM data_view
a  b
----
1  1
2  1
3  1
1  2

> CREATE VIEW test3 AS
  SELECT b, min(a) FROM data_view GROUP BY b

> SELECT * FROM test3
b  min
------
1  1
2  1

> CREATE MATERIALIZED VIEW test4 AS
  SELECT b, max(a) FROM data_view GROUP BY b

> SELECT * FROM test4
b  max
------
1  3
2  1

# Unmaterialize data view.
> DROP INDEX data_view_primary_idx

# Can continue to select from view that depends on the unmaterialized view.
> SELECT * FROM test4
b  max
------
1  3
2  1

> SELECT * FROM test4 where b = 2
b  max
------
2  1

> SELECT * from data_view
a  b
----
1  1
2  1
3  1
1  2

# Cannot create sink from unmaterialized view.
! CREATE SINK not_mat_sink2
  IN CLUSTER ${arg.single-replica-cluster}
  FROM data_view
  INTO KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-data-view2-sink-${testdrive.seed}')
  FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  ENVELOPE DEBEZIUM
contains:data_view is a view, which cannot be exported as a sink

# Can create indexed view from unmaterialized view.
> CREATE VIEW test5 AS
  SELECT b, max(a) AS c FROM data_view GROUP BY b
> CREATE DEFAULT INDEX ON test5

# or from an indexed unmaterialized view
! CREATE SINK not_mat_sink2
  IN CLUSTER ${arg.single-replica-cluster}
  FROM test5
  INTO KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-data-view2-sink-${testdrive.seed}')
  FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  ENVELOPE DEBEZIUM
contains:test5 is a view, which cannot be exported as a sink

$ set-regex match=(\s\(u\d+\)) replacement=

?[version>=13500] EXPLAIN OPTIMIZED PLAN AS VERBOSE TEXT FOR SELECT * FROM test5;
Explained Query (fast path):
  ReadIndex on=materialize.public.test5 test5_primary_idx=[*** full scan ***]

Used Indexes:
  - materialize.public.test5_primary_idx (*** full scan ***)

Target cluster: quickstart

?[version<13500] EXPLAIN OPTIMIZED PLAN FOR SELECT * FROM test5;
Explained Query (fast path):
  ReadIndex on=materialize.public.test5 test5_primary_idx=[*** full scan ***]

Used Indexes:
  - materialize.public.test5_primary_idx (*** full scan ***)

Target cluster: quickstart

> SELECT * FROM test5
b  c
------
1  3
2  1

> SELECT c+b from test5
4
3

> CREATE INDEX idx1 ON test5(c)

! SELECT * FROM idx1
contains:catalog item 'materialize.public.idx1' is an index and so cannot be depended upon

# If there exists a second primary index, dropping one primary index will not
# unmaterialize the view.
> DROP INDEX test5_primary_idx

?[version>=13500] EXPLAIN OPTIMIZED PLAN AS VERBOSE TEXT FOR SELECT * FROM test5
Explained Query (fast path):
  Project (#1, #0)
    ReadIndex on=materialize.public.test5 idx1=[*** full scan ***]

Used Indexes:
  - materialize.public.idx1 (*** full scan ***)

Target cluster: quickstart

?[version<13500] EXPLAIN OPTIMIZED PLAN FOR SELECT * FROM test5
Explained Query (fast path):
  Project (#1, #0)
    ReadIndex on=materialize.public.test5 idx1=[*** full scan ***]

Used Indexes:
  - materialize.public.idx1 (*** full scan ***)

Target cluster: quickstart

> SELECT * from test5
b  c
------
1  3
2  1

> SELECT c-b from test5
2
-1

# Unmaterialize test5.
> DROP INDEX idx1

# Still works.
> SELECT * from test5
b  c
------
1  3
2  1

# Test that materialized views can be even if it requires multiple layers of
# recursing through the AST to find a source.
> CREATE MATERIALIZED VIEW test6 AS SELECT (-c + 2*b) AS d FROM test5

> SELECT * from test6
d
----
-1
3

# Dependencies are still queryable after creating a dependent
# materialized view.
> SELECT * from test5
b  c
------
1  3
2  1

> SELECT * from data_view
a  b
----
1  1
2  1
3  1
1  2

# Rematerialize data_view creating an index on it.
> CREATE INDEX data_view_idx on data_view(a)

> SELECT * from data_view
a b
---
1 1
2 1
3 1
1 2

# Existing materialized dependencies can be selected from as normal.
> SELECT * from test6
d
----
-1
3

# Dependencies can be selected from again if they do not depend on any other raw
# source.
> SELECT * from test5
b  c
------
1  3
2  1

# Create a suboptimal second index on the same column in data_view.
> CREATE INDEX data_view_idx2 on data_view(a)

> SELECT * from data_view
a b
---
1 1
2 1
3 1
1 2

> SELECT * from test6
d
----
-1
3

> SELECT * from test5
b  c
------
1  3
2  1

# Delete the first copy of the same index and ensure everything selects as
# normal.
> DROP INDEX data_view_idx

> SELECT * from data_view
a b
---
1 1
2 1
3 1
1 2

> SELECT * from test6
d
----
-1
3

> SELECT * from test5
b  c
------
1  3
2  1

# Materialized sources tests

$ kafka-create-topic topic=mat

> CREATE SOURCE mat_data
  IN CLUSTER ${arg.single-replica-cluster}
  FROM KAFKA CONNECTION kafka_conn (TOPIC 'testdrive-mat-${testdrive.seed}')
  FORMAT AVRO USING SCHEMA '${schema}'

> CREATE DEFAULT INDEX ON mat_data

> SELECT * from mat_data

$ kafka-ingest format=avro topic=mat schema=${schema} timestamp=1
{"a": -1, "b": 0}
{"a": -1, "b": 1}
{"a": 3, "b": 4}
{"a": 1, "b": 2}

> SELECT * from mat_data
a  b
----
-1 0
-1 1
3  4
1  2

> SHOW SOURCES
name              type    cluster                          comment
-------------------------------------------------------------------
data              kafka     ${arg.single-replica-cluster}  ""
data_progress     progress  <null>                         ""
mat_data          kafka     ${arg.single-replica-cluster}  ""
mat_data_progress progress  <null>                         ""

# If there exists another index, dropping the primary index will not #
# unmaterialize the source. This also tests creating a default index when the
# default index name is already taken.

> CREATE DEFAULT INDEX ON mat_data

> DROP INDEX mat_data_primary_idx

> SELECT a+b from mat_data
-1
0
7
3

# Can create both materialized and unmaterialized views from materialized
# source.
> CREATE MATERIALIZED VIEW test7 as SELECT count(*) from mat_data

> SELECT * from test7
count
-----
4

> CREATE VIEW test8 as SELECT -b as c, -a as d from mat_data

> SELECT * from test8
c  d
-----
0  1
-1 1
-4 -3
-2 -1

# Unmaterialize source.
> DROP INDEX mat_data_primary_idx1

# Still works.
> SELECT * from mat_data
a  b
----
-1 0
-1 1
3  4
1  2

> SELECT * from test7
count
-----
4

> SELECT * from test8
c  d
-----
0  1
-1 1
-4 -3
-2 -1

$ kafka-ingest format=avro topic=mat schema=${schema} timestamp=2
{"a": -3, "b": 0}
{"a": -1, "b": 0}
{"a": 0, "b": 4}
{"a": 1, "b": 2}

# Rematerialize source.
> CREATE INDEX mat_data_idx3 on mat_data(b)

> SELECT * from mat_data
a  b
----
-1 0
-1 1
3  4
1  2
-3 0
-1 0
0  4
1  2

> SELECT * from test7
count
-----
8

> SELECT * from test8
c    d
------
0    1
-1   1
-4  -3
-2  -1
0    3
0    1
-4   0
-2  -1

# Check arrangements, seeing new arrangements can mean a significant increase
# in memory consumptions and should be understood before adapting the values.
> SET cluster_replica = r1

>[version>=15000] SELECT mdod.dataflow_name, mdod.name
  FROM mz_introspection.mz_arrangement_sharing mash
  JOIN mz_introspection.mz_dataflow_operator_dataflows mdod ON mash.operator_id = mdod.id
  JOIN mz_introspection.mz_compute_exports USING (dataflow_id)
  WHERE export_id LIKE 'u%'
"Dataflow: materialize.public.data_view_idx" "ArrangeBy[[Column(0, \"a\")]]"
"Dataflow: materialize.public.data_view_idx" "ArrangeBy[[Column(0, \"a\")]]-errors"
"Dataflow: materialize.public.data_view_primary_idx" "ArrangeBy[[Column(0, \"a\"), Column(1, \"b\")]]"
"Dataflow: materialize.public.data_view_primary_idx" "ArrangeBy[[Column(0, \"a\"), Column(1, \"b\")]]-errors"
"Dataflow: materialize.public.mat_data_idx3" "ArrangeBy[[Column(1, \"b\")]]"
"Dataflow: materialize.public.mat_data_idx3" "ArrangeBy[[Column(1, \"b\")]]-errors"
"Dataflow: materialize.public.mat_data_primary_idx" "ArrangeBy[[Column(0, \"a\"), Column(1, \"b\")]]"
"Dataflow: materialize.public.mat_data_primary_idx" "ArrangeBy[[Column(0, \"a\"), Column(1, \"b\")]]-errors"
"Dataflow: materialize.public.test1" AccumulableErrorCheck
"Dataflow: materialize.public.test1" "ArrangeAccumulable [val: empty]"
"Dataflow: materialize.public.test1" ReduceAccumulable
"Dataflow: materialize.public.test2" AccumulableErrorCheck
"Dataflow: materialize.public.test2" "ArrangeAccumulable [val: empty]"
"Dataflow: materialize.public.test2" ReduceAccumulable
"Dataflow: materialize.public.test4" "ArrangeMonotonic [val: empty]"
"Dataflow: materialize.public.test4" ReduceMonotonic
"Dataflow: materialize.public.test6" "ArrangeMonotonic [val: empty]"
"Dataflow: materialize.public.test6" ReduceMonotonic
"Dataflow: materialize.public.test7" AccumulableErrorCheck
"Dataflow: materialize.public.test7" "ArrangeAccumulable [val: empty]"
"Dataflow: materialize.public.test7" ReduceAccumulable