123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216 |
- # Copyright Materialize, Inc. and contributors. All rights reserved.
- #
- # Use of this software is governed by the Business Source License
- # included in the LICENSE file at the root of this repository.
- #
- # As of the Change Date specified in that file, in accordance with
- # the Business Source License, use of this software will be governed
- # by the Apache License, Version 2.0.
- import math
- import re
- from decimal import Decimal
- from functools import partial
- from typing import Any
- from materialize.output_consistency.expression.expression import Expression
- from materialize.output_consistency.ignore_filter.expression_matchers import (
- is_operation_tagged,
- is_table_function,
- )
- from materialize.output_consistency.ignore_filter.inconsistency_ignore_filter import (
- GenericInconsistencyIgnoreFilter,
- )
- from materialize.output_consistency.input_data.operations.jsonb_operations_provider import (
- TAG_JSONB_OBJECT_GENERATION,
- )
- from materialize.output_consistency.query.query_result import QueryExecution
- from materialize.output_consistency.validation.error_message_normalizer import (
- ErrorMessageNormalizer,
- )
- from materialize.output_consistency.validation.result_comparator import ResultComparator
- # Examples:
- # * 2038-01-19 03:14:18
- # * 2038-01-19 03:14:18.123
- # * 2038-01-19 03:14:18.123+00
- # * 2038-01-19 03:14:18.123+00 BC
- # * 2038-01-19 03:14:18+00
- # * 2038-01-19 03:14:18-03:00
- # * 2038-01-19T03:14:18+00 (when used in JSONB)
- TIMESTAMP_PATTERN = re.compile(
- r"^\d{4,}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}(\.\d+)?([+-]\d+(:\d+)?)?( (BC|AC))?$"
- )
- # Examples:
- # * NaN
- # * 1
- # * -1.23
- # * 1.23e-3
- # * 1.23e+3
- DECIMAL_PATTERN = re.compile(r"^NaN|[+-]?\d+(\.\d+)?(e[+-]?\d+)?$")
- # Examples:
- # * ["1","2"]
- # * [1,2]
- # * [1, 2]
- # * [1, [1, 2]]
- SIMPLIFIED_ARRAY_PATTERN = re.compile(r"\[(.*),(.*)\]")
- # Examples:
- # * {"a": 1, "c": 3}
- JSON_OBJECT_PATTERN = re.compile(r"\{(.*)[:,](.*)\}")
- class PostgresResultComparator(ResultComparator):
- """Compares the outcome (result or failure) of multiple query executions"""
- def __init__(
- self,
- ignore_filter: GenericInconsistencyIgnoreFilter,
- error_message_normalizer: ErrorMessageNormalizer,
- ):
- super().__init__(ignore_filter, error_message_normalizer)
- self.floating_precision = 1e-03
- def shall_validate_error_message(self, query_execution: QueryExecution) -> bool:
- # do not compare error messages at all
- return False
- def is_value_equal(
- self,
- value1: Any,
- value2: Any,
- expression: Expression,
- is_tolerant: bool = False,
- ) -> bool:
- if super().is_value_equal(value1, value2, expression, is_tolerant=is_tolerant):
- return True
- if isinstance(value1, Decimal):
- if isinstance(value2, Decimal):
- return self.is_decimal_equal(value1, value2)
- if isinstance(value2, float):
- return self.is_decimal_equal(value1, Decimal(value2))
- if isinstance(value1, float):
- if isinstance(value2, float):
- return self.is_float_equal(value1, value2)
- if isinstance(value2, Decimal):
- return self.is_decimal_equal(Decimal(value1), value2)
- if isinstance(value1, str) and isinstance(value2, str):
- return self.is_str_equal(value1, value2, is_tolerant)
- if is_tolerant:
- type1 = type(value1)
- type2 = type(value2)
- if type1 in {int, float, Decimal} and type2 in {int, float, Decimal}:
- # This is needed for imprecise results of floating type operations that are returned as int or float
- # values in dicts of JSONB data.
- return self.is_decimal_equal(Decimal(value1), Decimal(value2))
- return False
- def is_decimal_equal(self, value1: Decimal, value2: Decimal) -> bool:
- if value1.is_nan():
- return value2.is_nan()
- return math.isclose(value1, value2, rel_tol=self.floating_precision)
- def is_float_equal(self, value1: float, value2: float) -> bool:
- if math.isnan(value1):
- return math.isnan(value2)
- return math.isclose(value1, value2, rel_tol=self.floating_precision)
- def is_str_equal(self, value1: str, value2: str, is_tolerant: bool) -> bool:
- if self.is_timestamp(value1) and self.is_timestamp(value2):
- return self.is_timestamp_equal(value1, value2)
- if (
- SIMPLIFIED_ARRAY_PATTERN.search(value1)
- or JSON_OBJECT_PATTERN.search(value1)
- ) and (
- SIMPLIFIED_ARRAY_PATTERN.search(value2)
- or JSON_OBJECT_PATTERN.search(value2)
- ):
- # This is a rather eager pattern to also match concatenated strings.
- # tracked with database-issues#7085
- value1 = value1.replace(", ", ",").replace(": ", ":")
- value2 = value2.replace(", ", ",").replace(": ", ":")
- # Postgres uses 'mon' / 'mons' instead of 'month' / 'months'
- value1 = value1.replace(" month", " mon")
- value2 = value2.replace(" month", " mon")
- if is_tolerant and self.is_decimal(value1) and self.is_decimal(value2):
- try:
- return self.is_decimal_equal(Decimal(value1), Decimal(value2))
- except Exception:
- return True
- return value1 == value2
- def is_decimal(self, value: str):
- return DECIMAL_PATTERN.match(value) is not None
- def is_timestamp(self, value: str) -> bool:
- return TIMESTAMP_PATTERN.match(value) is not None
- def is_timestamp_equal(self, value1: str, value2: str) -> bool:
- # try to match any of these
- last_second_and_milliseconds_regex = r"(\d\.\d+)"
- last_second_before_timezone_regex = r"(?<=:\d)(\d)(?=\+)"
- last_second_at_the_end_regex = r"(?<=:\d)(\d$)"
- last_second_and_milliseconds_pattern = re.compile(
- f"{last_second_and_milliseconds_regex}|{last_second_before_timezone_regex}|{last_second_at_the_end_regex}"
- )
- if last_second_and_milliseconds_pattern.search(
- value1
- ) and last_second_and_milliseconds_pattern.search(value2):
- # drop milliseconds and, if present, trunc last digit of second
- value1 = last_second_and_milliseconds_pattern.sub("0", value1)
- value2 = last_second_and_milliseconds_pattern.sub("0", value2)
- value1 = self._normalize_jsonb_timestamp(value1)
- value2 = self._normalize_jsonb_timestamp(value2)
- assert self.is_timestamp(value1)
- assert self.is_timestamp(value2)
- return value1 == value2
- def _normalize_jsonb_timestamp(self, value: str) -> str:
- # this is due to database-issues#8247
- pattern_for_date = r"\d+-\d+-\d+"
- pattern_for_time = r"\d+:\d+:\d+[+-]\d+"
- pattern_for_value_without_t_sep_and_timezone_mins = (
- rf"({pattern_for_date})T({pattern_for_time}):\d+"
- )
- match = re.match(pattern_for_value_without_t_sep_and_timezone_mins, value)
- if match is None:
- return value
- return match.group(1) + " " + match.group(2)
- def ignore_row_order(self, expression: Expression) -> bool:
- if expression.matches(
- is_table_function,
- True,
- ):
- # inconsistent sort order
- return True
- return False
- def ignore_order_when_comparing_collection(self, expression: Expression) -> bool:
- if expression.matches(
- partial(is_operation_tagged, tag=TAG_JSONB_OBJECT_GENERATION),
- True,
- ):
- # this is because of database-issues#8266
- return True
- return False
|