expected_group_size_tuning.td 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. # This test uses introspection queries that need to be targeted to a replica
  10. > SET cluster_replica = r1
  11. # Start from a TPC-H load generator source with small data.
  12. > CREATE CLUSTER lgtpch_cluster_tuning SIZE '1';
  13. > CREATE SOURCE lgtpch IN CLUSTER lgtpch_cluster_tuning FROM LOAD GENERATOR TPCH (SCALE FACTOR 0.0001, TICK INTERVAL 0.1, UP TO 1000);
  14. > CREATE TABLE customer FROM SOURCE lgtpch (REFERENCE customer);
  15. > CREATE TABLE lineitem FROM SOURCE lgtpch (REFERENCE lineitem);
  16. > CREATE TABLE nation FROM SOURCE lgtpch (REFERENCE nation);
  17. > CREATE TABLE orders FROM SOURCE lgtpch (REFERENCE orders);
  18. > CREATE TABLE part FROM SOURCE lgtpch (REFERENCE part);
  19. > CREATE TABLE partsupp FROM SOURCE lgtpch (REFERENCE partsupp);
  20. > CREATE TABLE region FROM SOURCE lgtpch (REFERENCE region);
  21. > CREATE TABLE supplier FROM SOURCE lgtpch (REFERENCE supplier);
  22. # Create a set of materialized views for testing based on the TPC-H schema.
  23. > CREATE MATERIALIZED VIEW lineitem_by_orderkey AS
  24. SELECT l1.l_orderkey,
  25. MAX(l1.l_extendedprice),
  26. (SELECT l2.l_quantity FROM lineitem l2
  27. WHERE l2.l_orderkey = l1.l_orderkey
  28. ORDER BY l2.l_extendedprice DESC
  29. LIMIT 1)
  30. FROM lineitem l1
  31. GROUP BY l1.l_orderkey;
  32. > CREATE MATERIALIZED VIEW lineitem_by_partsuppkey AS
  33. SELECT l1.l_partkey,
  34. l_suppkey,
  35. MAX(l1.l_extendedprice),
  36. (SELECT l2.l_quantity FROM lineitem l2
  37. WHERE l2.l_partkey = l1.l_partkey
  38. AND l2.l_suppkey = l1.l_suppkey
  39. ORDER BY l2.l_extendedprice DESC
  40. LIMIT 1)
  41. FROM lineitem l1
  42. GROUP BY l1.l_partkey, l_suppkey;
  43. > CREATE MATERIALIZED VIEW lineitem_by_partkey AS
  44. SELECT l1.l_partkey,
  45. MAX(l1.l_extendedprice),
  46. (SELECT l2.l_quantity FROM lineitem l2
  47. WHERE l2.l_partkey = l1.l_partkey
  48. ORDER BY l2.l_extendedprice DESC
  49. LIMIT 1)
  50. FROM lineitem l1
  51. GROUP BY l1.l_partkey;
  52. > CREATE MATERIALIZED VIEW lineitem_by_suppkey AS
  53. SELECT l1.l_suppkey,
  54. MAX(l1.l_extendedprice),
  55. (SELECT l2.l_quantity FROM lineitem l2
  56. WHERE l2.l_suppkey = l1.l_suppkey
  57. ORDER BY l2.l_extendedprice DESC
  58. LIMIT 1)
  59. FROM lineitem l1
  60. GROUP BY l1.l_suppkey;
  61. # Check that we know the values of the perfect hints for TPC-H.
  62. > SELECT pow(16, ceil(log(16, MAX(group_size)))) - 1 AS perfect_hint
  63. FROM (
  64. SELECT l_orderkey, COUNT(*) AS group_size
  65. FROM lineitem
  66. GROUP BY l_orderkey
  67. );
  68. 15
  69. > SELECT pow(16, ceil(log(16, MAX(group_size)))) - 1 perfect_hint
  70. FROM (
  71. SELECT l_partkey, l_suppkey, COUNT(*) AS group_size
  72. FROM lineitem
  73. GROUP BY l_partkey, l_suppkey
  74. );
  75. 255
  76. > SELECT pow(16, ceil(log(16, MAX(group_size)))) - 1 perfect_hint
  77. FROM (
  78. SELECT l_partkey, COUNT(*) AS group_size
  79. FROM lineitem
  80. GROUP BY l_partkey
  81. );
  82. 255
  83. > SELECT pow(16, ceil(log(16, MAX(group_size)))) - 1 perfect_hint
  84. FROM (
  85. SELECT l_suppkey, COUNT(*) AS group_size
  86. FROM lineitem
  87. GROUP BY l_suppkey
  88. );
  89. 4095
  90. > SELECT dataflow_name, region_name, levels, to_cut, hint
  91. FROM mz_introspection.mz_expected_group_size_advice
  92. ORDER BY dataflow_name, region_name;
  93. "Dataflow: materialize.public.lineitem_by_orderkey" ReduceHierarchical 8 7 15
  94. "Dataflow: materialize.public.lineitem_by_orderkey" TopK 8 7 15
  95. "Dataflow: materialize.public.lineitem_by_partsuppkey" TopK 8 6 255
  96. "Dataflow: materialize.public.lineitem_by_partsuppkey" ReduceHierarchical 8 6 255
  97. "Dataflow: materialize.public.lineitem_by_partkey" ReduceHierarchical 8 6 255
  98. "Dataflow: materialize.public.lineitem_by_partkey" TopK 8 6 255
  99. "Dataflow: materialize.public.lineitem_by_suppkey" ReduceHierarchical 8 5 4095
  100. "Dataflow: materialize.public.lineitem_by_suppkey" TopK 8 5 4095
  101. # Validate that there are positive memory savings listed for the entries above.
  102. > SELECT COUNT(savings > 0)
  103. FROM mz_introspection.mz_expected_group_size_advice;
  104. 8
  105. # Create partly hinted versions of the views and check that the advice gets revised accordingly.
  106. > DROP MATERIALIZED VIEW lineitem_by_suppkey;
  107. > CREATE MATERIALIZED VIEW lineitem_by_suppkey AS
  108. SELECT l1.l_suppkey,
  109. MAX(l1.l_extendedprice),
  110. (SELECT l2.l_quantity FROM lineitem l2
  111. WHERE l2.l_suppkey = l1.l_suppkey
  112. ORDER BY l2.l_extendedprice DESC
  113. LIMIT 1)
  114. FROM lineitem l1
  115. GROUP BY l1.l_suppkey
  116. OPTIONS (AGGREGATE INPUT GROUP SIZE = 4095);
  117. > DROP MATERIALIZED VIEW IF EXISTS lineitem_by_orderkey;
  118. > CREATE MATERIALIZED VIEW lineitem_by_orderkey AS
  119. SELECT l1.l_orderkey,
  120. MAX(l1.l_extendedprice),
  121. (SELECT l2.l_quantity FROM lineitem l2
  122. WHERE l2.l_orderkey = l1.l_orderkey
  123. OPTIONS (LIMIT INPUT GROUP SIZE = 15)
  124. ORDER BY l2.l_extendedprice DESC
  125. LIMIT 1)
  126. FROM lineitem l1
  127. GROUP BY l1.l_orderkey;
  128. > SELECT dataflow_name, region_name, levels, to_cut, hint
  129. FROM mz_introspection.mz_expected_group_size_advice
  130. ORDER BY dataflow_name, region_name;
  131. "Dataflow: materialize.public.lineitem_by_orderkey" ReduceHierarchical 8 7 15
  132. "Dataflow: materialize.public.lineitem_by_partsuppkey" TopK 8 6 255
  133. "Dataflow: materialize.public.lineitem_by_partsuppkey" ReduceHierarchical 8 6 255
  134. "Dataflow: materialize.public.lineitem_by_partkey" ReduceHierarchical 8 6 255
  135. "Dataflow: materialize.public.lineitem_by_partkey" TopK 8 6 255
  136. "Dataflow: materialize.public.lineitem_by_suppkey" TopK 8 5 4095
  137. # Validate that there are positive memory savings listed for the entries above.
  138. > SELECT COUNT(savings > 0)
  139. FROM mz_introspection.mz_expected_group_size_advice;
  140. 6
  141. > DROP SOURCE lgtpch CASCADE