all_parts_essential.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. """
  10. Test that all parts of the query are important. This is done by
  11. commenting out parts of the query -- if any part of the query
  12. can be commented out without this affecting the result of the query
  13. this means that the query contains constructs and predicates that
  14. do not contribute to the final result in any way.
  15. On the other hand, if all parts of the query are deemed essential,
  16. the query is such that if any part of it is lost during optimization
  17. or execution, the entire query will start producing a different result.
  18. Such queries are suitable for inclusion in regression tests
  19. """
  20. from pg8000.dbapi import DatabaseError
  21. from materialize.query_fitness.fitness_function import FitnessFunction
  22. class AllPartsEssential(FitnessFunction):
  23. def _result_checksum(self, query: str) -> str | None:
  24. """Execute the query and return a 'checksum' of the result.
  25. In this implementation, the checksum is simply the serialization of the entire result set
  26. """
  27. try:
  28. self._cur.execute("COMMIT")
  29. self._cur.execute(query)
  30. return str(self._cur.fetchall())
  31. except DatabaseError:
  32. return None
  33. def fitness(self, query: str) -> float:
  34. """Test if all parts of a query are essential to producing the same result. This is done
  35. by commenting out parts of the query and checking if the result is the same. If it is, then
  36. the query contains a non-essential part and is thus rejected (fitness = 0).
  37. """
  38. query = query.strip(" ;\n")
  39. if not query:
  40. return 0
  41. orig_checksum = self._result_checksum(query)
  42. if not orig_checksum:
  43. return 0
  44. tokens = query.split()
  45. l = len(tokens)
  46. for start_token in reversed(range(0, l)):
  47. for end_token in reversed(range(start_token, l)):
  48. new_tokens = [*tokens]
  49. new_tokens.insert(end_token + 1, " */ ")
  50. new_tokens.insert(start_token, " /* ")
  51. new_query = " ".join(new_tokens)
  52. new_checksum = self._result_checksum(new_query)
  53. if new_checksum == orig_checksum:
  54. return 0
  55. return 1