predefined_pg_queries.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. from materialize.output_consistency.data_type.data_type import DataType
  10. from materialize.output_consistency.execution.value_storage_layout import (
  11. ValueStorageLayout,
  12. )
  13. from materialize.output_consistency.expression.constant_expression import (
  14. ConstantStringExpression,
  15. )
  16. from materialize.output_consistency.expression.expression import (
  17. Expression,
  18. LeafExpression,
  19. )
  20. from materialize.output_consistency.expression.expression_with_args import (
  21. ExpressionWithArgs,
  22. )
  23. from materialize.output_consistency.input_data.operations.boolean_operations_provider import (
  24. AND_OPERATION,
  25. NOT_OPERATION,
  26. )
  27. from materialize.output_consistency.input_data.operations.set_operations_provider import (
  28. create_in_operation,
  29. )
  30. from materialize.output_consistency.input_data.operations.string_operations_provider import (
  31. LOWER_OPERATION,
  32. REGEXP_REPLACE,
  33. STRING_NOT_LIKE_OPERATION,
  34. )
  35. from materialize.output_consistency.input_data.types.boolean_type_provider import (
  36. BOOLEAN_DATA_TYPE,
  37. )
  38. from materialize.output_consistency.input_data.types.date_time_types_provider import (
  39. INTERVAL_TYPE,
  40. )
  41. from materialize.output_consistency.input_data.types.string_type_provider import (
  42. TEXT_DATA_TYPE,
  43. )
  44. from materialize.output_consistency.query.data_source import DataSource
  45. from materialize.output_consistency.query.query_template import QueryTemplate
  46. from materialize.output_consistency.selection.row_selection import (
  47. ALL_ROWS_SELECTION,
  48. )
  49. def create_custom_pg_consistency_queries() -> list[QueryTemplate]:
  50. return [create_pg_timezone_abbrevs_query(), create_pg_timezone_names_query()]
  51. def create_pg_timezone_abbrevs_query() -> QueryTemplate:
  52. data_source = DataSource(custom_db_object_name="pg_catalog.pg_timezone_abbrevs")
  53. abbrev_col_expr = _create_simple_leaf_expression(
  54. "abbrev", TEXT_DATA_TYPE, data_source
  55. )
  56. pg_timezone_abbrevs_cols: list[Expression] = [
  57. abbrev_col_expr,
  58. _create_simple_leaf_expression("utc_offset", INTERVAL_TYPE, data_source),
  59. _create_simple_leaf_expression("is_dst", BOOLEAN_DATA_TYPE, data_source),
  60. ]
  61. pg_timezone_abbrevs = QueryTemplate(
  62. expect_error=False,
  63. select_expressions=pg_timezone_abbrevs_cols,
  64. where_expression=None,
  65. storage_layout=ValueStorageLayout.VERTICAL,
  66. contains_aggregations=False,
  67. row_selection=ALL_ROWS_SELECTION,
  68. data_source=data_source,
  69. custom_order_expressions=[abbrev_col_expr],
  70. )
  71. return pg_timezone_abbrevs
  72. def create_pg_timezone_names_query() -> QueryTemplate:
  73. data_source = DataSource(custom_db_object_name="pg_catalog.pg_timezone_names")
  74. pg_timezone_name_col_expr = _create_simple_leaf_expression(
  75. "name", TEXT_DATA_TYPE, data_source
  76. )
  77. pg_timezone_abbrev_col_expr = _create_simple_leaf_expression(
  78. "abbrev", TEXT_DATA_TYPE, data_source
  79. )
  80. pg_timezone_names_cols: list[Expression] = [
  81. pg_timezone_name_col_expr,
  82. pg_timezone_abbrev_col_expr,
  83. _create_simple_leaf_expression("utc_offset", INTERVAL_TYPE, data_source),
  84. _create_simple_leaf_expression("is_dst", BOOLEAN_DATA_TYPE, data_source),
  85. ]
  86. no_posix_timezones = ExpressionWithArgs(
  87. operation=STRING_NOT_LIKE_OPERATION,
  88. args=[
  89. pg_timezone_name_col_expr,
  90. ConstantStringExpression("posix/%"),
  91. ],
  92. )
  93. # TODO database-issues#7851: time zones differ
  94. excluded_timezones = [
  95. # abbrev, utc_offset, is_dst differ (as of 2024-04-08)
  96. "America/Godthab",
  97. "America/Nuuk",
  98. "Asia/Gaza",
  99. "Asia/Hebron",
  100. # abbrev, utc_offset, is_dst differ (as of 2024-04-25)
  101. "Africa/Cairo",
  102. "Egypt",
  103. # abbrev differs
  104. "Europe/Kirov",
  105. "Europe/Volgograd",
  106. ]
  107. # further time zones that differ in CI (due to the used libtz version)
  108. excluded_timezones.extend(
  109. [
  110. "America/Scoresbysund",
  111. "Antarctica/Casey",
  112. "Antarctica/Vostok",
  113. "Asia/Almaty",
  114. "Asia/Qostanay",
  115. ]
  116. )
  117. # do not exist in mz
  118. excluded_timezones.extend(
  119. [
  120. "Factory",
  121. "localtime",
  122. "posixrules",
  123. ]
  124. )
  125. # excluded because they cause pain with sorting
  126. excluded_timezones.extend(
  127. [
  128. "Etc/GMT+0",
  129. "Etc/GMT-0",
  130. "GMT+0",
  131. "GMT-0",
  132. ]
  133. )
  134. excluded_timezones_expr = ExpressionWithArgs(
  135. operation=NOT_OPERATION,
  136. args=[
  137. ExpressionWithArgs(
  138. operation=create_in_operation(len(excluded_timezones)),
  139. args=[
  140. pg_timezone_name_col_expr,
  141. *[ConstantStringExpression(tz) for tz in excluded_timezones],
  142. ],
  143. )
  144. ],
  145. )
  146. exclusion_expression = ExpressionWithArgs(
  147. operation=AND_OPERATION,
  148. args=[no_posix_timezones, excluded_timezones_expr],
  149. )
  150. # remove special characters for ordering due to different sort order
  151. order_by_sanitized_name_expr = ExpressionWithArgs(
  152. operation=LOWER_OPERATION,
  153. args=[
  154. ExpressionWithArgs(
  155. operation=REGEXP_REPLACE,
  156. args=[
  157. pg_timezone_name_col_expr,
  158. ConstantStringExpression("[^A-Za-z0-9]"),
  159. ConstantStringExpression(""),
  160. # all occurrences
  161. ConstantStringExpression("g"),
  162. ],
  163. )
  164. ],
  165. )
  166. pg_timezone_names = QueryTemplate(
  167. expect_error=False,
  168. select_expressions=pg_timezone_names_cols,
  169. where_expression=exclusion_expression,
  170. storage_layout=ValueStorageLayout.VERTICAL,
  171. contains_aggregations=False,
  172. row_selection=ALL_ROWS_SELECTION,
  173. data_source=data_source,
  174. custom_order_expressions=[
  175. order_by_sanitized_name_expr,
  176. pg_timezone_abbrev_col_expr,
  177. ],
  178. )
  179. return pg_timezone_names
  180. def _create_simple_leaf_expression(
  181. column_name: str, data_type: DataType, data_source: DataSource
  182. ) -> LeafExpression:
  183. return LeafExpression(
  184. column_name=column_name,
  185. data_type=data_type,
  186. data_source=data_source,
  187. characteristics=set(),
  188. storage_layout=ValueStorageLayout.VERTICAL,
  189. )