primary-key-optimizations.td 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. $ set-arg-default single-replica-cluster=quickstart
  10. $ postgres-execute connection=postgres://mz_system:materialize@${testdrive.materialize-internal-sql-addr}
  11. ALTER SYSTEM SET unsafe_enable_table_keys = true
  12. ALTER SYSTEM SET enable_primary_key_not_enforced = true
  13. #
  14. # Test that the knowledge that a given set of fields forms a primary key is used by the optimizer to remove grouping and distinct operations
  15. #
  16. # Remove references to internal table identifiers and "materialize.public" strings
  17. $ set-regex match=(\s\(u\d+\)|materialize\.public\.) replacement=
  18. $ set keyschema-2keys={
  19. "type": "record",
  20. "name": "Key",
  21. "fields": [
  22. {"name": "key1", "type": "string"},
  23. {"name": "key2", "type": "string"}
  24. ]
  25. }
  26. $ set schema={
  27. "type" : "record",
  28. "name" : "test",
  29. "fields" : [
  30. {"name":"nokey", "type":"string"}
  31. ]
  32. }
  33. $ kafka-create-topic topic=t1
  34. $ kafka-ingest format=avro topic=t1 key-format=avro key-schema=${keyschema-2keys} schema=${schema}
  35. > CREATE CONNECTION IF NOT EXISTS csr_conn TO CONFLUENT SCHEMA REGISTRY (
  36. URL '${testdrive.schema-registry-url}'
  37. );
  38. > CREATE CONNECTION kafka_conn
  39. TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT);
  40. > CREATE SOURCE t1
  41. IN CLUSTER ${arg.single-replica-cluster}
  42. FROM KAFKA CONNECTION kafka_conn (TOPIC
  43. 'testdrive-t1-${testdrive.seed}');
  44. > CREATE TABLE t1_tbl FROM SOURCE t1 (REFERENCE "testdrive-t1-${testdrive.seed}")
  45. FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION csr_conn
  46. ENVELOPE UPSERT;
  47. > CREATE DEFAULT INDEX ON t1_tbl
  48. # Optimization is possible - no distinct is mentioned in the plan
  49. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT DISTINCT key1, key2 FROM t1_tbl;
  50. Explained Query (fast path):
  51. Project (#0, #1)
  52. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  53. Used Indexes:
  54. - t1_tbl_primary_idx (*** full scan ***)
  55. Target cluster: quickstart
  56. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT DISTINCT key2, key1 FROM t1_tbl;
  57. Explained Query (fast path):
  58. Project (#1, #0)
  59. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  60. Used Indexes:
  61. - t1_tbl_primary_idx (*** full scan ***)
  62. Target cluster: quickstart
  63. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT DISTINCT key2, key1, key2 FROM t1_tbl;
  64. Explained Query (fast path):
  65. Project (#1, #0, #1)
  66. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  67. Used Indexes:
  68. - t1_tbl_primary_idx (*** full scan ***)
  69. Target cluster: quickstart
  70. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT key2, key1 FROM t1_tbl GROUP BY key1, key2;
  71. Explained Query (fast path):
  72. Project (#1, #0)
  73. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  74. Used Indexes:
  75. - t1_tbl_primary_idx (*** full scan ***)
  76. Target cluster: quickstart
  77. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT key2, key1 FROM t1_tbl GROUP BY key1, key2, key2 || 'a';
  78. Explained Query (fast path):
  79. Project (#1, #0)
  80. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  81. Used Indexes:
  82. - t1_tbl_primary_idx (*** full scan ***)
  83. Target cluster: quickstart
  84. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT DISTINCT key1, key2, nokey FROM t1_tbl;
  85. Explained Query (fast path):
  86. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  87. Used Indexes:
  88. - t1_tbl_primary_idx (*** full scan ***)
  89. Target cluster: quickstart
  90. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT key1, key2, nokey FROM t1_tbl GROUP BY key1, key2, nokey;
  91. Explained Query (fast path):
  92. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  93. Used Indexes:
  94. - t1_tbl_primary_idx (*** full scan ***)
  95. Target cluster: quickstart
  96. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT key1, key2 FROM t1_tbl GROUP BY key1, key2 HAVING key1 = 'a';
  97. Explained Query (fast path):
  98. Project (#0, #1)
  99. Filter (#0 = "a")
  100. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  101. Used Indexes:
  102. - t1_tbl_primary_idx (*** full scan ***)
  103. Target cluster: quickstart
  104. # Optimization not possible - explicit distinct is present in planFor certain types of tests the 
  105. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT DISTINCT key1 FROM t1_tbl;
  106. Explained Query:
  107. Distinct project=[#0]
  108. Project (#0)
  109. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  110. Used Indexes:
  111. - t1_tbl_primary_idx (*** full scan ***)
  112. Target cluster: quickstart
  113. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT DISTINCT key2 FROM t1_tbl;
  114. Explained Query:
  115. Distinct project=[#0]
  116. Project (#1)
  117. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  118. Used Indexes:
  119. - t1_tbl_primary_idx (*** full scan ***)
  120. Target cluster: quickstart
  121. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT DISTINCT key1, upper(key2) FROM t1_tbl;
  122. Explained Query:
  123. Distinct project=[#0, upper(#1{key2})]
  124. Project (#0, #1)
  125. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  126. Used Indexes:
  127. - t1_tbl_primary_idx (*** full scan ***)
  128. Target cluster: quickstart
  129. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT DISTINCT key1, key2 || 'a' FROM t1_tbl;
  130. Explained Query:
  131. Distinct project=[#0, (#1{key2} || "a")]
  132. Project (#0, #1)
  133. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  134. Used Indexes:
  135. - t1_tbl_primary_idx (*** full scan ***)
  136. Target cluster: quickstart
  137. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT key1 FROM t1_tbl GROUP BY key1;
  138. Explained Query:
  139. Distinct project=[#0]
  140. Project (#0)
  141. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  142. Used Indexes:
  143. - t1_tbl_primary_idx (*** full scan ***)
  144. Target cluster: quickstart
  145. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT key2 FROM t1_tbl GROUP BY key2;
  146. Explained Query:
  147. Distinct project=[#0]
  148. Project (#1)
  149. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  150. Used Indexes:
  151. - t1_tbl_primary_idx (*** full scan ***)
  152. Target cluster: quickstart
  153. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT COUNT(DISTINCT key1) FROM t1_tbl;
  154. Explained Query:
  155. With
  156. cte l0 =
  157. Reduce aggregates=[count(distinct #0{key1})]
  158. Project (#0)
  159. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  160. Return
  161. Union
  162. Get l0
  163. Map (0)
  164. Union
  165. Negate
  166. Project ()
  167. Get l0
  168. Constant
  169. - ()
  170. Used Indexes:
  171. - t1_tbl_primary_idx (*** full scan ***)
  172. Target cluster: quickstart
  173. # Make sure that primary key information is inherited from the source
  174. > CREATE VIEW v1 AS SELECT * FROM t1_tbl;
  175. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT DISTINCT key1, key2 FROM v1;
  176. Explained Query (fast path):
  177. Project (#0, #1)
  178. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  179. Used Indexes:
  180. - t1_tbl_primary_idx (*** full scan ***)
  181. Target cluster: quickstart
  182. > CREATE VIEW v2 AS SELECT * FROM t1_tbl;
  183. > CREATE DEFAULT INDEX ON v2;
  184. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT DISTINCT key1, key2 FROM v2;
  185. Explained Query (fast path):
  186. Project (#0, #1)
  187. ReadIndex on=v2 v2_primary_idx=[*** full scan ***]
  188. Used Indexes:
  189. - v2_primary_idx (*** full scan ***)
  190. Target cluster: quickstart
  191. # Make sure that having a DISTINCT or GROUP BY confers PK semantics on upstream views
  192. > CREATE VIEW distinct_view AS SELECT DISTINCT nokey FROM t1_tbl;
  193. > CREATE DEFAULT INDEX ON distinct_view;
  194. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT DISTINCT nokey FROM distinct_view
  195. Explained Query (fast path):
  196. ReadIndex on=distinct_view distinct_view_primary_idx=[*** full scan ***]
  197. Used Indexes:
  198. - distinct_view_primary_idx (*** full scan ***)
  199. Target cluster: quickstart
  200. > CREATE VIEW group_by_view AS SELECT nokey || 'a' AS f1 , nokey || 'b' AS f2 FROM t1_tbl GROUP BY nokey || 'a', nokey || 'b';
  201. > CREATE DEFAULT INDEX ON group_by_view;
  202. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT DISTINCT f1, f2 FROM group_by_view;
  203. Explained Query (fast path):
  204. ReadIndex on=group_by_view group_by_view_primary_idx=[*** full scan ***]
  205. Used Indexes:
  206. - group_by_view_primary_idx (*** full scan ***)
  207. Target cluster: quickstart
  208. # Redundant table is eliminated from an inner join using PK information
  209. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT a1.* FROM t1_tbl AS a1, t1_tbl AS a2 WHERE a1.key1 = a2.key1 AND a1.key2 = a2.key2;
  210. Explained Query (fast path):
  211. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  212. Used Indexes:
  213. - t1_tbl_primary_idx (*** full scan ***)
  214. Target cluster: quickstart
  215. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT a1.* FROM v1 AS a1, v1 AS a2 WHERE a1.key1 = a2.key1 AND a1.key2 = a2.key2;
  216. Explained Query (fast path):
  217. ReadIndex on=t1_tbl t1_tbl_primary_idx=[*** full scan ***]
  218. Used Indexes:
  219. - t1_tbl_primary_idx (*** full scan ***)
  220. Target cluster: quickstart
  221. ? EXPLAIN OPTIMIZED PLAN WITH(no notices) AS VERBOSE TEXT FOR SELECT a1.* FROM v2 AS a1, v2 AS a2 WHERE a1.key1 = a2.key1 AND a1.key2 = a2.key2;
  222. Explained Query (fast path):
  223. ReadIndex on=v2 v2_primary_idx=[*** full scan ***]
  224. Used Indexes:
  225. - v2_primary_idx (*** full scan ***)
  226. Target cluster: quickstart
  227. # Declare a key constraint (PRIMARY KEY NOT ENFORCED); otherwise identical tests as above.
  228. $ set schema={
  229. "type" : "record",
  230. "name" : "test",
  231. "fields" : [
  232. {"name":"key1","type":"string"},
  233. {"name":"key2","type":"string"},
  234. {"name":"nokey", "type":"string"}
  235. ]
  236. }