skew.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. from math import floor
  10. from textwrap import dedent
  11. from materialize.feature_benchmark.measurement_source import MeasurementSource, Td
  12. from materialize.feature_benchmark.scenario import Scenario
  13. class SkewedJoin(Scenario):
  14. def benchmark(self) -> MeasurementSource:
  15. scale = self.scale()
  16. count = 10**scale
  17. return Td(
  18. dedent(
  19. f"""
  20. > DROP TABLE IF EXISTS skewed_table CASCADE;
  21. > DROP TABLE IF EXISTS uniform_table CASCADE;
  22. > CREATE TABLE skewed_table(f1 INTEGER);
  23. > CREATE TABLE uniform_table (f1 INTEGER);
  24. > CREATE MATERIALIZED VIEW v1 AS SELECT COUNT(*) > 0 FROM skewed_table JOIN uniform_table USING (f1)
  25. /* A */
  26. > INSERT INTO uniform_table (f1) SELECT generate_series FROM generate_series(0, {count-1}::integer);
  27. # Make sure 0 is overrepresented
  28. > INSERT INTO skewed_table (f1) SELECT 0 FROM generate_series(1, {count}::integer);
  29. """
  30. )
  31. + "\n".join(
  32. [
  33. f"> INSERT INTO skewed_table (f1) SELECT MOD(generate_series, POW(10, {i})) FROM generate_series(1, ({count} / {scale})::integer);"
  34. for i in range(floor(scale))
  35. ]
  36. )
  37. + dedent(
  38. """
  39. > SELECT * FROM v1
  40. /* B */
  41. true
  42. """
  43. )
  44. )