parseq_aug.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. # Scene Text Recognition Model Hub
  2. # Copyright 2022 Darwin Bautista
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # https://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. from functools import partial
  16. import imgaug.augmenters as iaa
  17. import numpy as np
  18. from PIL import Image, ImageFilter
  19. from openrec.preprocess import auto_augment
  20. from openrec.preprocess.auto_augment import _LEVEL_DENOM, LEVEL_TO_ARG, NAME_TO_OP, _randomly_negate, rotate
  21. def rotate_expand(img, degrees, **kwargs):
  22. """Rotate operation with expand=True to avoid cutting off the
  23. characters."""
  24. kwargs['expand'] = True
  25. return rotate(img, degrees, **kwargs)
  26. def _level_to_arg(level, hparams, key, default):
  27. magnitude = hparams.get(key, default)
  28. level = (level / _LEVEL_DENOM) * magnitude
  29. level = _randomly_negate(level)
  30. return level,
  31. def apply():
  32. # Overrides
  33. NAME_TO_OP.update({'Rotate': rotate_expand})
  34. LEVEL_TO_ARG.update({
  35. 'Rotate':
  36. partial(_level_to_arg, key='rotate_deg', default=30.),
  37. 'ShearX':
  38. partial(_level_to_arg, key='shear_x_pct', default=0.3),
  39. 'ShearY':
  40. partial(_level_to_arg, key='shear_y_pct', default=0.3),
  41. 'TranslateXRel':
  42. partial(_level_to_arg, key='translate_x_pct', default=0.45),
  43. 'TranslateYRel':
  44. partial(_level_to_arg, key='translate_y_pct', default=0.45),
  45. })
  46. apply()
  47. _OP_CACHE = {}
  48. def _get_op(key, factory):
  49. try:
  50. op = _OP_CACHE[key]
  51. except KeyError:
  52. op = factory()
  53. _OP_CACHE[key] = op
  54. return op
  55. def _get_param(level, img, max_dim_factor, min_level=1):
  56. max_level = max(min_level, max_dim_factor * max(img.size))
  57. return round(min(level, max_level))
  58. def gaussian_blur(img, radius, **__):
  59. radius = _get_param(radius, img, 0.02)
  60. key = 'gaussian_blur_' + str(radius)
  61. op = _get_op(key, lambda: ImageFilter.GaussianBlur(radius))
  62. return img.filter(op)
  63. def motion_blur(img, k, **__):
  64. k = _get_param(k, img, 0.08, 3) | 1 # bin to odd values
  65. key = 'motion_blur_' + str(k)
  66. op = _get_op(key, lambda: iaa.MotionBlur(k))
  67. return Image.fromarray(op(image=np.asarray(img)))
  68. def gaussian_noise(img, scale, **_):
  69. scale = _get_param(scale, img, 0.25) | 1 # bin to odd values
  70. key = 'gaussian_noise_' + str(scale)
  71. op = _get_op(key, lambda: iaa.AdditiveGaussianNoise(scale=scale))
  72. return Image.fromarray(op(image=np.asarray(img)))
  73. def poisson_noise(img, lam, **_):
  74. lam = _get_param(lam, img, 0.2) | 1 # bin to odd values
  75. key = 'poisson_noise_' + str(lam)
  76. op = _get_op(key, lambda: iaa.AdditivePoissonNoise(lam))
  77. return Image.fromarray(op(image=np.asarray(img)))
  78. def _level_to_arg(level, _hparams, max):
  79. level = max * level / auto_augment._LEVEL_DENOM
  80. return level,
  81. _RAND_TRANSFORMS = auto_augment._RAND_INCREASING_TRANSFORMS.copy()
  82. _RAND_TRANSFORMS.remove(
  83. 'SharpnessIncreasing') # remove, interferes with *blur ops
  84. _RAND_TRANSFORMS.extend([
  85. 'GaussianBlur',
  86. # 'MotionBlur',
  87. # 'GaussianNoise',
  88. 'PoissonNoise'
  89. ])
  90. auto_augment.LEVEL_TO_ARG.update({
  91. 'GaussianBlur':
  92. partial(_level_to_arg, max=4),
  93. 'MotionBlur':
  94. partial(_level_to_arg, max=20),
  95. 'GaussianNoise':
  96. partial(_level_to_arg, max=0.1 * 255),
  97. 'PoissonNoise':
  98. partial(_level_to_arg, max=40)
  99. })
  100. auto_augment.NAME_TO_OP.update({
  101. 'GaussianBlur': gaussian_blur,
  102. 'MotionBlur': motion_blur,
  103. 'GaussianNoise': gaussian_noise,
  104. 'PoissonNoise': poisson_noise
  105. })
  106. def rand_augment_transform(magnitude=5, num_layers=3):
  107. # These are tuned for magnitude=5, which means that effective magnitudes are half of these values.
  108. hparams = {
  109. 'rotate_deg': 30,
  110. 'shear_x_pct': 0.9,
  111. 'shear_y_pct': 0.2,
  112. 'translate_x_pct': 0.10,
  113. 'translate_y_pct': 0.30
  114. }
  115. ra_ops = auto_augment.rand_augment_ops(magnitude,
  116. hparams=hparams,
  117. transforms=_RAND_TRANSFORMS)
  118. # Supply weights to disable replacement in random selection (i.e. avoid applying the same op twice)
  119. choice_weights = [1. / len(ra_ops) for _ in range(len(ra_ops))]
  120. return auto_augment.RandAugment(ra_ops, num_layers, choice_weights)