search_utility.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. import re
  10. from materialize.terminal import COLOR_GREEN, STYLE_BOLD, with_formattings
  11. def _search_value_to_pattern(search_value: str, use_regex: bool) -> re.Pattern[str]:
  12. regex_pattern = _search_value_to_regex(search_value, use_regex)
  13. return re.compile(f"({regex_pattern})", re.IGNORECASE | re.DOTALL)
  14. def _search_value_to_regex(search_value: str, use_regex: bool) -> str:
  15. if use_regex:
  16. return search_value
  17. return re.escape(search_value)
  18. def highlight_match(
  19. input: str,
  20. search_value: str,
  21. use_regex: bool,
  22. style: list[str] = [COLOR_GREEN, STYLE_BOLD],
  23. ) -> str:
  24. case_insensitive_pattern = _search_value_to_pattern(search_value, use_regex)
  25. match_replacement = with_formattings(r"\1", style)
  26. return case_insensitive_pattern.sub(match_replacement, input)
  27. def trim_match(
  28. match_text: str,
  29. search_value: str,
  30. use_regex: bool,
  31. one_line_match_presentation: bool,
  32. max_chars_before_match: int = 300,
  33. max_chars_after_match: int = 300,
  34. search_offset: int = 0,
  35. ) -> str:
  36. match_text = match_text.strip()
  37. case_insensitive_pattern = _search_value_to_pattern(search_value, use_regex)
  38. match = case_insensitive_pattern.search(match_text, pos=search_offset)
  39. assert match is not None
  40. match_begin_index = match.start()
  41. match_end_index = match.end()
  42. if one_line_match_presentation:
  43. match_text, (match_begin_index, match_end_index) = _trim_match_to_one_line(
  44. match_text, match_begin_index, match_end_index
  45. )
  46. match_text = _trim_match_to_max_length(
  47. match_text,
  48. match_begin_index,
  49. match_end_index,
  50. max_chars_after_match,
  51. max_chars_before_match,
  52. )
  53. return match_text
  54. def _trim_match_to_one_line(
  55. input: str, match_begin_index: int, match_end_index: int
  56. ) -> tuple[str, tuple[int, int]]:
  57. """
  58. :return: trimmed text, new match_begin_index, new match_end_index
  59. """
  60. cut_off_index_begin = input.rfind("\n", 0, match_begin_index)
  61. if cut_off_index_begin == -1:
  62. cut_off_index_begin = 0
  63. cut_off_index_end = input.find("\n", match_end_index)
  64. if cut_off_index_end == -1:
  65. cut_off_index_end = len(input)
  66. input = input[cut_off_index_begin:cut_off_index_end]
  67. return input, (
  68. match_begin_index - cut_off_index_begin,
  69. match_end_index - cut_off_index_begin,
  70. )
  71. def _trim_match_to_max_length(
  72. input: str,
  73. match_begin_index: int,
  74. match_end_index: int,
  75. max_chars_after_match: int,
  76. max_chars_before_match: int,
  77. ) -> str:
  78. # identify cut-off point before first match
  79. if match_begin_index > max_chars_before_match:
  80. cut_off_index_begin = input.find(
  81. " ", match_begin_index - max_chars_before_match, match_begin_index
  82. )
  83. if cut_off_index_begin == -1:
  84. cut_off_index_begin = match_begin_index - max_chars_before_match
  85. else:
  86. cut_off_index_begin = 0
  87. # identify cut-off point after first match
  88. if len(input) > match_end_index + max_chars_after_match:
  89. cut_off_index_end = input.rfind(
  90. " ", match_end_index, match_end_index + max_chars_after_match
  91. )
  92. if cut_off_index_end == -1:
  93. cut_off_index_end = match_end_index + max_chars_after_match
  94. else:
  95. cut_off_index_end = len(input)
  96. result = input[cut_off_index_begin:cut_off_index_end]
  97. result = result.strip()
  98. if cut_off_index_begin > 0:
  99. result = f"[...] {result}"
  100. if cut_off_index_end != len(input):
  101. result = f"{result} [...]"
  102. return result
  103. def determine_line_number(input: str, position: int) -> int:
  104. return 1 + input[:position].count("\n")
  105. def determine_position_in_line(input: str, position: int) -> int:
  106. position_line_start = input[:position].rfind("\n")
  107. return position - position_line_start