crop_resize.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. import cv2
  2. import numpy as np
  3. def padding_image(img, size=(640, 640)):
  4. """
  5. Padding an image using OpenCV:
  6. - If the image is smaller than the target size, pad it to 640x640.
  7. - If the image is larger than the target size, split it into multiple 640x640 images and record positions.
  8. :param image_path: Path to the input image.
  9. :param output_dir: Directory to save the output images.
  10. :param size: The target size for padding or splitting (default 640x640).
  11. :return: List of tuples containing the coordinates of the top-left corner of each cropped 640x640 image.
  12. """
  13. img_height, img_width = img.shape[:2]
  14. target_width, target_height = size
  15. # If image is smaller than target size, pad the image to 640x640
  16. # Calculate padding amounts (top, bottom, left, right)
  17. pad_top = 0
  18. pad_bottom = target_height - img_height
  19. pad_left = 0
  20. pad_right = target_width - img_width
  21. # Pad the image (white padding, border type: constant)
  22. padded_img = cv2.copyMakeBorder(img,
  23. pad_top,
  24. pad_bottom,
  25. pad_left,
  26. pad_right,
  27. cv2.BORDER_CONSTANT,
  28. value=[0, 0, 0])
  29. # Return the padded area positions (top-left and bottom-right coordinates of the original image)
  30. return padded_img
  31. def is_poly_outside_rect(poly, x, y, w, h):
  32. poly = np.array(poly)
  33. if poly[:, 0].max() < x or poly[:, 0].min() > x + w:
  34. return True
  35. if poly[:, 1].max() < y or poly[:, 1].min() > y + h:
  36. return True
  37. return False
  38. def split_regions(axis):
  39. regions = []
  40. min_axis = 0
  41. for i in range(1, axis.shape[0]):
  42. if axis[i] != axis[i - 1] + 1:
  43. region = axis[min_axis:i]
  44. min_axis = i
  45. regions.append(region)
  46. return regions
  47. def random_select(axis, max_size):
  48. xx = np.random.choice(axis, size=2)
  49. xmin = np.min(xx)
  50. xmax = np.max(xx)
  51. xmin = np.clip(xmin, 0, max_size - 1)
  52. xmax = np.clip(xmax, 0, max_size - 1)
  53. return xmin, xmax
  54. def region_wise_random_select(regions, max_size):
  55. selected_index = list(np.random.choice(len(regions), 2))
  56. selected_values = []
  57. for index in selected_index:
  58. axis = regions[index]
  59. xx = int(np.random.choice(axis, size=1))
  60. selected_values.append(xx)
  61. xmin = min(selected_values)
  62. xmax = max(selected_values)
  63. return xmin, xmax
  64. def crop_area(im, text_polys, min_crop_side_ratio, max_tries):
  65. h, w, _ = im.shape
  66. h_array = np.zeros(h, dtype=np.int32)
  67. w_array = np.zeros(w, dtype=np.int32)
  68. for points in text_polys:
  69. points = np.round(points, decimals=0).astype(np.int32)
  70. minx = np.min(points[:, 0])
  71. maxx = np.max(points[:, 0])
  72. w_array[minx:maxx] = 1
  73. miny = np.min(points[:, 1])
  74. maxy = np.max(points[:, 1])
  75. h_array[miny:maxy] = 1
  76. # ensure the cropped area not across a text
  77. h_axis = np.where(h_array == 0)[0]
  78. w_axis = np.where(w_array == 0)[0]
  79. if len(h_axis) == 0 or len(w_axis) == 0:
  80. return 0, 0, w, h
  81. h_regions = split_regions(h_axis)
  82. w_regions = split_regions(w_axis)
  83. for i in range(max_tries):
  84. if len(w_regions) > 1:
  85. xmin, xmax = region_wise_random_select(w_regions, w)
  86. else:
  87. xmin, xmax = random_select(w_axis, w)
  88. if len(h_regions) > 1:
  89. ymin, ymax = region_wise_random_select(h_regions, h)
  90. else:
  91. ymin, ymax = random_select(h_axis, h)
  92. if (xmax - xmin < min_crop_side_ratio * w or ymax - ymin < min_crop_side_ratio * h):
  93. # area too small
  94. continue
  95. num_poly_in_rect = 0
  96. for poly in text_polys:
  97. if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin,
  98. ymax - ymin):
  99. num_poly_in_rect += 1
  100. break
  101. if num_poly_in_rect > 0:
  102. return xmin, ymin, xmax - xmin, ymax - ymin
  103. return 0, 0, w, h
  104. class EastRandomCropData(object):
  105. def __init__(
  106. self,
  107. size=(640, 640),
  108. max_tries=10,
  109. min_crop_side_ratio=0.1,
  110. keep_ratio=True,
  111. **kwargs,
  112. ):
  113. self.size = size
  114. self.max_tries = max_tries
  115. self.min_crop_side_ratio = min_crop_side_ratio
  116. self.keep_ratio = keep_ratio
  117. def __call__(self, data):
  118. img = data['image']
  119. text_polys = data['polys']
  120. ignore_tags = data['ignore_tags']
  121. texts = data['texts']
  122. all_care_polys = [
  123. text_polys[i] for i, tag in enumerate(ignore_tags) if not tag
  124. ]
  125. # 计算crop区域
  126. crop_x, crop_y, crop_w, crop_h = crop_area(img, all_care_polys,
  127. self.min_crop_side_ratio,
  128. self.max_tries)
  129. # crop 图片 保持比例填充
  130. scale_w = self.size[0] / crop_w
  131. scale_h = self.size[1] / crop_h
  132. scale = min(scale_w, scale_h)
  133. h = int(crop_h * scale)
  134. w = int(crop_w * scale)
  135. if self.keep_ratio:
  136. padimg = np.zeros((self.size[1], self.size[0], img.shape[2]),
  137. img.dtype)
  138. padimg[:h, :w] = cv2.resize(
  139. img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
  140. img = padimg
  141. else:
  142. img = cv2.resize(
  143. img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],
  144. tuple(self.size),
  145. )
  146. # crop 文本框
  147. text_polys_crop = []
  148. ignore_tags_crop = []
  149. texts_crop = []
  150. for poly, text, tag in zip(text_polys, texts, ignore_tags):
  151. poly = ((poly - (crop_x, crop_y)) * scale).tolist()
  152. if not is_poly_outside_rect(poly, 0, 0, w, h):
  153. text_polys_crop.append(poly)
  154. ignore_tags_crop.append(tag)
  155. texts_crop.append(text)
  156. data['image'] = img
  157. data['polys'] = np.array(text_polys_crop)
  158. data['ignore_tags'] = ignore_tags_crop
  159. data['texts'] = texts_crop
  160. return data
  161. class CropResize(object):
  162. def __init__(self, size=(640, 640), interpolation=cv2.INTER_LINEAR):
  163. self.size = size
  164. self.interpolation = interpolation
  165. def __call__(self, data):
  166. """
  167. Resize an image using OpenCV:
  168. - If the image is smaller than the target size, pad it to 640x640.
  169. - If the image is larger than the target size, split it into multiple 640x640 images and record positions.
  170. :param image_path: Path to the input image.
  171. :param output_dir: Directory to save the output images.
  172. :param size: The target size for padding or splitting (default 640x640).
  173. :return: List of tuples containing the coordinates of the top-left corner of each cropped 640x640 image.
  174. """
  175. img = data['image']
  176. img_height, img_width = img.shape[:2]
  177. target_width, target_height = self.size
  178. # If image is smaller than target size, pad the image to 640x640
  179. if img_width <= target_width and img_height <= target_height:
  180. # Calculate padding amounts (top, bottom, left, right)
  181. if img_width == target_width and img_height == target_height:
  182. return [img], [[0, 0, img_width, img_height]]
  183. padded_img = padding_image(img, self.size)
  184. # Return the padded area positions (top-left and bottom-right coordinates of the original image)
  185. return [padded_img], [[0, 0, img_width, img_height]]
  186. if img_width < target_width:
  187. img = cv2.copyMakeBorder(img,
  188. 0,
  189. 0,
  190. 0,
  191. target_width - img_width,
  192. cv2.BORDER_CONSTANT,
  193. value=[0, 0, 0])
  194. if img_height < target_height:
  195. img = cv2.copyMakeBorder(img,
  196. 0,
  197. target_height - img_height,
  198. 0,
  199. 0,
  200. cv2.BORDER_CONSTANT,
  201. value=[0, 0, 0])
  202. # raise ValueError("Image dimensions must be greater than or equal to target size")
  203. img_height, img_width = img.shape[:2]
  204. # If image is larger than or equal to target size, crop it into 640x640 tiles
  205. crop_positions = []
  206. count = 0
  207. cropped_img_list = []
  208. for top in range(0, img_height - target_height // 2,
  209. target_height // 2):
  210. for left in range(0, img_width - target_height // 2,
  211. target_width // 2):
  212. # Calculate the bottom and right boundaries for the crop
  213. right = min(left + target_width, img_width)
  214. bottom = min(top + target_height, img_height)
  215. if right > img_width:
  216. right = img_width
  217. left = max(0, right - target_width)
  218. if bottom > img_height:
  219. bottom = img_height
  220. top = max(0, bottom - target_height)
  221. # Crop the image
  222. cropped_img = img[top:bottom, left:right]
  223. if bottom - top < target_height or right - left < target_width:
  224. cropped_img = padding_image(cropped_img, self.size)
  225. count += 1
  226. cropped_img_list.append(cropped_img)
  227. # Record the position of the cropped image
  228. crop_positions.append([left, top, right, bottom])
  229. # print(f"Images cropped and saved at {output_dir}.")
  230. return cropped_img_list, crop_positions