resize.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. import math
  2. import random
  3. import cv2
  4. import numpy as np
  5. from PIL import Image
  6. class CDistNetResize(object):
  7. def __init__(self, image_shape, **kwargs):
  8. self.image_shape = image_shape
  9. def __call__(self, data):
  10. img = data['image']
  11. _, h, w = self.image_shape
  12. # keep_aspect_ratio = False
  13. image_pil = Image.fromarray(np.uint8(img))
  14. image = image_pil.resize((w, h), Image.LANCZOS)
  15. image = np.array(image)
  16. # rgb2gray = False
  17. image = image.transpose((2, 0, 1))
  18. image = image.astype(np.float32) / 128.0 - 1.0
  19. data['image'] = image
  20. data['valid_ratio'] = 1
  21. return data
  22. class ABINetResize(object):
  23. def __init__(self, image_shape, **kwargs):
  24. self.image_shape = image_shape
  25. def __call__(self, data):
  26. img = data['image']
  27. h, w = img.shape[:2]
  28. norm_img, valid_ratio = resize_norm_img_abinet(img, self.image_shape)
  29. data['image'] = norm_img
  30. data['valid_ratio'] = valid_ratio
  31. r = float(w) / float(h)
  32. data['real_ratio'] = max(1, round(r))
  33. return data
  34. def resize_norm_img_abinet(img, image_shape):
  35. imgC, imgH, imgW = image_shape
  36. resized_image = cv2.resize(img, (imgW, imgH),
  37. interpolation=cv2.INTER_LINEAR)
  38. resized_w = imgW
  39. resized_image = resized_image.astype('float32')
  40. resized_image = resized_image / 255.0
  41. mean = np.array([0.485, 0.456, 0.406])
  42. std = np.array([0.229, 0.224, 0.225])
  43. resized_image = (resized_image - mean[None, None, ...]) / std[None, None,
  44. ...]
  45. resized_image = resized_image.transpose((2, 0, 1))
  46. resized_image = resized_image.astype('float32')
  47. valid_ratio = min(1.0, float(resized_w / imgW))
  48. return resized_image, valid_ratio
  49. class SVTRResize(object):
  50. def __init__(self, image_shape, padding=True, **kwargs):
  51. self.image_shape = image_shape
  52. self.padding = padding
  53. def __call__(self, data):
  54. img = data['image']
  55. h, w = img.shape[:2]
  56. norm_img, valid_ratio = resize_norm_img(img, self.image_shape,
  57. self.padding)
  58. data['image'] = norm_img
  59. data['valid_ratio'] = valid_ratio
  60. r = float(w) / float(h)
  61. data['real_ratio'] = max(1, round(r))
  62. return data
  63. class RecTVResize(object):
  64. def __init__(self, image_shape=[32, 128], padding=True, **kwargs):
  65. from torchvision import transforms as T
  66. from torchvision.transforms import functional as F
  67. self.F = F
  68. self.padding = padding
  69. self.image_shape = image_shape
  70. self.interpolation = T.InterpolationMode.BICUBIC
  71. transforms = []
  72. transforms.extend([
  73. T.ToTensor(),
  74. T.Normalize(0.5, 0.5),
  75. ])
  76. self.transforms = T.Compose(transforms)
  77. def __call__(self, data):
  78. img = data['image']
  79. imgH, imgW = self.image_shape
  80. w, h = img.size
  81. if not self.padding:
  82. resized_w = imgW
  83. else:
  84. ratio = w / float(h)
  85. if math.ceil(imgH * ratio) > imgW:
  86. resized_w = imgW
  87. else:
  88. resized_w = int(math.ceil(imgH * ratio))
  89. resized_image = self.F.resize(img, (imgH, resized_w),
  90. interpolation=self.interpolation)
  91. img = self.transforms(resized_image)
  92. if resized_w < imgW:
  93. img = self.F.pad(img, [0, 0, imgW - resized_w, 0], fill=0.)
  94. valid_ratio = min(1.0, float(resized_w / imgW))
  95. data['image'] = img
  96. data['valid_ratio'] = valid_ratio
  97. r = float(w) / float(h)
  98. data['real_ratio'] = max(1, round(r))
  99. return data
  100. class LongResize(object):
  101. def __init__(self,
  102. base_shape=[[64, 64], [96, 48], [112, 40], [128, 32]],
  103. max_ratio=12,
  104. base_h=32,
  105. padding_rand=False,
  106. padding_bi=False,
  107. padding=True,
  108. **kwargs):
  109. self.base_shape = base_shape
  110. self.max_ratio = max_ratio
  111. self.base_h = base_h
  112. self.padding = padding
  113. self.padding_rand = padding_rand
  114. self.padding_bi = padding_bi
  115. def __call__(self, data):
  116. data = resize_norm_img_long(
  117. data,
  118. self.base_shape,
  119. self.max_ratio,
  120. self.base_h,
  121. self.padding,
  122. self.padding_rand,
  123. self.padding_bi,
  124. )
  125. return data
  126. class SliceResize(object):
  127. def __init__(self, image_shape, padding=True, max_ratio=12, **kwargs):
  128. self.image_shape = image_shape
  129. self.padding = padding
  130. self.max_ratio = max_ratio
  131. def __call__(self, data):
  132. img = data['image']
  133. h, w = img.shape[:2]
  134. w_bi = w // 2
  135. img_list = [
  136. img[:, :w_bi, :], img[:, w_bi:2 * w_bi, :],
  137. img[:, w_bi // 2:(w_bi // 2) + w_bi, :]
  138. ]
  139. img_reshape = []
  140. for img_s in img_list:
  141. norm_img, valid_ratio = resize_norm_img_slice(
  142. img_s, self.image_shape, max_ratio=self.max_ratio)
  143. img_reshape.append(norm_img[None, :, :, :])
  144. data['image'] = np.concatenate(img_reshape, 0)
  145. data['valid_ratio'] = valid_ratio
  146. return data
  147. class SliceTVResize(object):
  148. def __init__(self,
  149. image_shape,
  150. padding=True,
  151. base_shape=[[64, 64], [96, 48], [112, 40], [128, 32]],
  152. max_ratio=12,
  153. base_h=32,
  154. **kwargs):
  155. import torch
  156. from torchvision import transforms as T
  157. from torchvision.transforms import functional as F
  158. self.F = F
  159. self.torch = torch
  160. self.image_shape = image_shape
  161. self.padding = padding
  162. self.max_ratio = max_ratio
  163. self.base_h = base_h
  164. self.interpolation = T.InterpolationMode.BICUBIC
  165. transforms = []
  166. transforms.extend([
  167. T.ToTensor(),
  168. T.Normalize(0.5, 0.5),
  169. ])
  170. self.transforms = T.Compose(transforms)
  171. def __call__(self, data):
  172. img = data['image']
  173. w, h = img.size
  174. w_ratio = ((w // h) // 2) * 2
  175. w_ratio = max(6, w_ratio)
  176. img = self.F.resize(img, (self.base_h, self.base_h * w_ratio),
  177. interpolation=self.interpolation)
  178. img = self.transforms(img)
  179. img_list = []
  180. for i in range(0, w_ratio // 2 - 1):
  181. img_list.append(img[None, :, :,
  182. i * 2 * self.base_h:(i * 2 + 4) * self.base_h])
  183. data['image'] = self.torch.concat(img_list, 0)
  184. data['valid_ratio'] = float(w_ratio) / w
  185. return data
  186. class RecTVResizeRatio(object):
  187. def __init__(self,
  188. image_shape=[32, 128],
  189. padding=True,
  190. base_shape=[[64, 64], [96, 48], [112, 40], [128, 32]],
  191. max_ratio=12,
  192. base_h=32,
  193. **kwargs):
  194. from torchvision import transforms as T
  195. from torchvision.transforms import functional as F
  196. self.F = F
  197. self.padding = padding
  198. self.image_shape = image_shape
  199. self.max_ratio = max_ratio
  200. self.base_shape = base_shape
  201. self.base_h = base_h
  202. self.interpolation = T.InterpolationMode.BICUBIC
  203. transforms = []
  204. transforms.extend([
  205. T.ToTensor(),
  206. T.Normalize(0.5, 0.5),
  207. ])
  208. self.transforms = T.Compose(transforms)
  209. def __call__(self, data):
  210. img = data['image']
  211. imgH, imgW = self.image_shape
  212. w, h = img.size
  213. gen_ratio = round(float(w) / float(h))
  214. ratio_resize = 1 if gen_ratio == 0 else gen_ratio
  215. ratio_resize = min(ratio_resize, self.max_ratio)
  216. imgW, imgH = self.base_shape[ratio_resize -
  217. 1] if ratio_resize <= 4 else [
  218. self.base_h *
  219. ratio_resize, self.base_h
  220. ]
  221. if not self.padding:
  222. resized_w = imgW
  223. else:
  224. ratio = w / float(h)
  225. if math.ceil(imgH * ratio) > imgW:
  226. resized_w = imgW
  227. else:
  228. resized_w = int(math.ceil(imgH * ratio))
  229. resized_image = self.F.resize(img, (imgH, resized_w),
  230. interpolation=self.interpolation)
  231. img = self.transforms(resized_image)
  232. if resized_w < imgW:
  233. img = self.F.pad(img, [0, 0, imgW - resized_w, 0], fill=0.)
  234. valid_ratio = min(1.0, float(resized_w / imgW))
  235. data['image'] = img
  236. data['valid_ratio'] = valid_ratio
  237. return data
  238. class RecDynamicResize(object):
  239. def __init__(self, image_shape=[32, 128], padding=True, **kwargs):
  240. self.padding = padding
  241. self.image_shape = image_shape
  242. self.max_ratio = image_shape[1] * 1.0 / image_shape[0]
  243. def __call__(self, data):
  244. img = data['image']
  245. imgH, imgW = self.image_shape
  246. h, w, imgC = img.shape
  247. ratio = w / float(h)
  248. max_wh_ratio = max(ratio, self.max_ratio)
  249. imgW = int(imgH * max_wh_ratio)
  250. if math.ceil(imgH * ratio) > imgW:
  251. resized_w = imgW
  252. else:
  253. resized_w = int(math.ceil(imgH * ratio))
  254. resized_image = cv2.resize(img, (resized_w, imgH))
  255. resized_image = resized_image.astype('float32')
  256. resized_image = resized_image.transpose((2, 0, 1)) / 255
  257. resized_image -= 0.5
  258. resized_image /= 0.5
  259. padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
  260. padding_im[:, :, 0:resized_w] = resized_image
  261. data['image'] = padding_im
  262. return data
  263. def resize_norm_img_slice(
  264. img,
  265. image_shape,
  266. base_shape=[[64, 64], [96, 48], [112, 40], [128, 32]],
  267. max_ratio=12,
  268. base_h=32,
  269. padding=True,
  270. ):
  271. imgC, imgH, imgW = image_shape
  272. h = img.shape[0]
  273. w = img.shape[1]
  274. gen_ratio = round(float(w) / float(h))
  275. ratio_resize = 1 if gen_ratio == 0 else gen_ratio
  276. ratio_resize = min(ratio_resize, max_ratio)
  277. imgW, imgH = base_shape[ratio_resize - 1] if ratio_resize <= 4 else [
  278. base_h * ratio_resize, base_h
  279. ]
  280. padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
  281. if not padding:
  282. resized_image = cv2.resize(img, (imgW, imgH))
  283. resized_w = imgW
  284. else:
  285. ratio = w / float(h)
  286. if math.ceil(imgH * ratio) > imgW:
  287. resized_w = imgW
  288. else:
  289. resized_w = int(math.ceil(imgH * ratio * (random.random() + 0.5)))
  290. resized_w = min(imgW, resized_w)
  291. resized_image = cv2.resize(img, (resized_w, imgH))
  292. resized_image = resized_image.transpose((2, 0, 1)) / 255
  293. resized_image -= 0.5
  294. resized_image /= 0.5
  295. padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
  296. padding_im[:, :, :resized_w] = resized_image
  297. valid_ratio = min(1.0, float(resized_w / imgW))
  298. return padding_im, valid_ratio
  299. def resize_norm_img(img,
  300. image_shape,
  301. padding=True,
  302. interpolation=cv2.INTER_LINEAR):
  303. imgC, imgH, imgW = image_shape
  304. h = img.shape[0]
  305. w = img.shape[1]
  306. if not padding:
  307. resized_image = cv2.resize(img, (imgW, imgH),
  308. interpolation=interpolation)
  309. resized_w = imgW
  310. else:
  311. ratio = w / float(h)
  312. if math.ceil(imgH * ratio) > imgW:
  313. resized_w = imgW
  314. else:
  315. resized_w = int(math.ceil(imgH * ratio))
  316. resized_image = cv2.resize(img, (resized_w, imgH))
  317. resized_image = resized_image.astype('float32')
  318. if image_shape[0] == 1:
  319. resized_image = resized_image / 255
  320. resized_image = resized_image[np.newaxis, :]
  321. else:
  322. resized_image = resized_image.transpose((2, 0, 1)) / 255
  323. resized_image -= 0.5
  324. resized_image /= 0.5
  325. padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
  326. padding_im[:, :, 0:resized_w] = resized_image
  327. valid_ratio = min(1.0, float(resized_w / imgW))
  328. return padding_im, valid_ratio
  329. def resize_norm_img_long(
  330. data,
  331. base_shape=[[64, 64], [96, 48], [112, 40], [128, 32]],
  332. max_ratio=12,
  333. base_h=32,
  334. padding=True,
  335. padding_rand=False,
  336. padding_bi=False,
  337. ):
  338. img = data['image']
  339. h = img.shape[0]
  340. w = img.shape[1]
  341. gen_ratio = data.get('gen_ratio', 0)
  342. if gen_ratio == 0:
  343. ratio = w / float(h)
  344. gen_ratio = round(ratio) if ratio > 0.5 else 1
  345. gen_ratio = min(data['gen_ratio'], max_ratio)
  346. if padding_rand and random.random() < 0.5:
  347. padding = False if padding else True
  348. imgW, imgH = base_shape[gen_ratio -
  349. 1] if gen_ratio <= len(base_shape) else [
  350. base_h * gen_ratio, base_h
  351. ]
  352. if not padding:
  353. resized_image = cv2.resize(img, (imgW, imgH),
  354. interpolation=cv2.INTER_LINEAR)
  355. resized_w = imgW
  356. else:
  357. ratio = w / float(h)
  358. if math.ceil(imgH * ratio) > imgW:
  359. resized_w = imgW
  360. else:
  361. resized_w = int(math.ceil(imgH * ratio * (random.random() + 0.5)))
  362. resized_w = min(imgW, resized_w)
  363. resized_image = cv2.resize(img, (resized_w, imgH))
  364. resized_image = resized_image.astype('float32')
  365. resized_image = resized_image.transpose((2, 0, 1)) / 255
  366. resized_image -= 0.5
  367. resized_image /= 0.5
  368. padding_im = np.zeros((3, imgH, imgW), dtype=np.float32)
  369. if padding_bi and random.random() < 0.5:
  370. padding_im[:, :, -resized_w:] = resized_image
  371. else:
  372. padding_im[:, :, :resized_w] = resized_image
  373. valid_ratio = min(1.0, float(resized_w / imgW))
  374. data['image'] = padding_im
  375. data['valid_ratio'] = valid_ratio
  376. data['gen_ratio'] = imgW // imgH
  377. data['real_ratio'] = w // h
  378. return data
  379. class VisionLANResize(object):
  380. def __init__(self, image_shape, **kwargs):
  381. self.image_shape = image_shape
  382. def __call__(self, data):
  383. img = data['image']
  384. imgC, imgH, imgW = self.image_shape
  385. resized_image = cv2.resize(img, (imgW, imgH))
  386. resized_image = resized_image.astype('float32')
  387. if imgC == 1:
  388. resized_image = resized_image / 255
  389. norm_img = resized_image[np.newaxis, :]
  390. else:
  391. norm_img = resized_image.transpose((2, 0, 1)) / 255
  392. data['image'] = norm_img
  393. data['valid_ratio'] = 1.0
  394. return data
  395. class RobustScannerRecResizeImg(object):
  396. def __init__(self, image_shape, width_downsample_ratio=0.25, **kwargs):
  397. self.image_shape = image_shape
  398. self.width_downsample_ratio = width_downsample_ratio
  399. def __call__(self, data):
  400. img = data['image']
  401. norm_img, resize_shape, pad_shape, valid_ratio = resize_norm_img_sar(
  402. img, self.image_shape, self.width_downsample_ratio)
  403. data['image'] = norm_img
  404. data['resized_shape'] = resize_shape
  405. data['pad_shape'] = pad_shape
  406. data['valid_ratio'] = valid_ratio
  407. return data
  408. def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25):
  409. imgC, imgH, imgW_min, imgW_max = image_shape
  410. h = img.shape[0]
  411. w = img.shape[1]
  412. valid_ratio = 1.0
  413. # make sure new_width is an integral multiple of width_divisor.
  414. width_divisor = int(1 / width_downsample_ratio)
  415. # resize
  416. ratio = w / float(h)
  417. resize_w = math.ceil(imgH * ratio)
  418. if resize_w % width_divisor != 0:
  419. resize_w = round(resize_w / width_divisor) * width_divisor
  420. if imgW_min is not None:
  421. resize_w = max(imgW_min, resize_w)
  422. if imgW_max is not None:
  423. valid_ratio = min(1.0, 1.0 * resize_w / imgW_max)
  424. resize_w = min(imgW_max, resize_w)
  425. resized_image = cv2.resize(img, (resize_w, imgH))
  426. resized_image = resized_image.astype('float32')
  427. # norm
  428. if image_shape[0] == 1:
  429. resized_image = resized_image / 255
  430. resized_image = resized_image[np.newaxis, :]
  431. else:
  432. resized_image = resized_image.transpose((2, 0, 1)) / 255
  433. resized_image -= 0.5
  434. resized_image /= 0.5
  435. resize_shape = resized_image.shape
  436. padding_im = -1.0 * np.ones((imgC, imgH, imgW_max), dtype=np.float32)
  437. padding_im[:, :, 0:resize_w] = resized_image
  438. pad_shape = padding_im.shape
  439. return padding_im, resize_shape, pad_shape, valid_ratio
  440. class SRNRecResizeImg(object):
  441. def __init__(self, image_shape, **kwargs):
  442. self.image_shape = image_shape
  443. def __call__(self, data):
  444. img = data['image']
  445. norm_img = resize_norm_img_srn(img, self.image_shape)
  446. data['image'] = norm_img
  447. return data
  448. def resize_norm_img_srn(img, image_shape):
  449. imgC, imgH, imgW = image_shape
  450. img_black = np.zeros((imgH, imgW))
  451. im_hei = img.shape[0]
  452. im_wid = img.shape[1]
  453. if im_wid <= im_hei * 1:
  454. img_new = cv2.resize(img, (imgH * 1, imgH))
  455. elif im_wid <= im_hei * 2:
  456. img_new = cv2.resize(img, (imgH * 2, imgH))
  457. elif im_wid <= im_hei * 3:
  458. img_new = cv2.resize(img, (imgH * 3, imgH))
  459. else:
  460. img_new = cv2.resize(img, (imgW, imgH))
  461. img_np = np.asarray(img_new)
  462. img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY)
  463. img_black[:, 0:img_np.shape[1]] = img_np
  464. img_black = img_black[:, :, np.newaxis]
  465. row, col, c = img_black.shape
  466. c = 1
  467. return np.reshape(img_black, (c, row, col)).astype(np.float32)