123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150 |
- # Scene Text Recognition Model Hub
- # Copyright 2022 Darwin Bautista
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # https://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from functools import partial
- import imgaug.augmenters as iaa
- import numpy as np
- from PIL import Image, ImageFilter
- from openrec.preprocess import auto_augment
- from openrec.preprocess.auto_augment import _LEVEL_DENOM, LEVEL_TO_ARG, NAME_TO_OP, _randomly_negate, rotate
- def rotate_expand(img, degrees, **kwargs):
- """Rotate operation with expand=True to avoid cutting off the
- characters."""
- kwargs['expand'] = True
- return rotate(img, degrees, **kwargs)
- def _level_to_arg(level, hparams, key, default):
- magnitude = hparams.get(key, default)
- level = (level / _LEVEL_DENOM) * magnitude
- level = _randomly_negate(level)
- return level,
- def apply():
- # Overrides
- NAME_TO_OP.update({'Rotate': rotate_expand})
- LEVEL_TO_ARG.update({
- 'Rotate':
- partial(_level_to_arg, key='rotate_deg', default=30.),
- 'ShearX':
- partial(_level_to_arg, key='shear_x_pct', default=0.3),
- 'ShearY':
- partial(_level_to_arg, key='shear_y_pct', default=0.3),
- 'TranslateXRel':
- partial(_level_to_arg, key='translate_x_pct', default=0.45),
- 'TranslateYRel':
- partial(_level_to_arg, key='translate_y_pct', default=0.45),
- })
- apply()
- _OP_CACHE = {}
- def _get_op(key, factory):
- try:
- op = _OP_CACHE[key]
- except KeyError:
- op = factory()
- _OP_CACHE[key] = op
- return op
- def _get_param(level, img, max_dim_factor, min_level=1):
- max_level = max(min_level, max_dim_factor * max(img.size))
- return round(min(level, max_level))
- def gaussian_blur(img, radius, **__):
- radius = _get_param(radius, img, 0.02)
- key = 'gaussian_blur_' + str(radius)
- op = _get_op(key, lambda: ImageFilter.GaussianBlur(radius))
- return img.filter(op)
- def motion_blur(img, k, **__):
- k = _get_param(k, img, 0.08, 3) | 1 # bin to odd values
- key = 'motion_blur_' + str(k)
- op = _get_op(key, lambda: iaa.MotionBlur(k))
- return Image.fromarray(op(image=np.asarray(img)))
- def gaussian_noise(img, scale, **_):
- scale = _get_param(scale, img, 0.25) | 1 # bin to odd values
- key = 'gaussian_noise_' + str(scale)
- op = _get_op(key, lambda: iaa.AdditiveGaussianNoise(scale=scale))
- return Image.fromarray(op(image=np.asarray(img)))
- def poisson_noise(img, lam, **_):
- lam = _get_param(lam, img, 0.2) | 1 # bin to odd values
- key = 'poisson_noise_' + str(lam)
- op = _get_op(key, lambda: iaa.AdditivePoissonNoise(lam))
- return Image.fromarray(op(image=np.asarray(img)))
- def _level_to_arg(level, _hparams, max):
- level = max * level / auto_augment._LEVEL_DENOM
- return level,
- _RAND_TRANSFORMS = auto_augment._RAND_INCREASING_TRANSFORMS.copy()
- _RAND_TRANSFORMS.remove(
- 'SharpnessIncreasing') # remove, interferes with *blur ops
- _RAND_TRANSFORMS.extend([
- 'GaussianBlur',
- # 'MotionBlur',
- # 'GaussianNoise',
- 'PoissonNoise'
- ])
- auto_augment.LEVEL_TO_ARG.update({
- 'GaussianBlur':
- partial(_level_to_arg, max=4),
- 'MotionBlur':
- partial(_level_to_arg, max=20),
- 'GaussianNoise':
- partial(_level_to_arg, max=0.1 * 255),
- 'PoissonNoise':
- partial(_level_to_arg, max=40)
- })
- auto_augment.NAME_TO_OP.update({
- 'GaussianBlur': gaussian_blur,
- 'MotionBlur': motion_blur,
- 'GaussianNoise': gaussian_noise,
- 'PoissonNoise': poisson_noise
- })
- def rand_augment_transform(magnitude=5, num_layers=3):
- # These are tuned for magnitude=5, which means that effective magnitudes are half of these values.
- hparams = {
- 'rotate_deg': 30,
- 'shear_x_pct': 0.9,
- 'shear_y_pct': 0.2,
- 'translate_x_pct': 0.10,
- 'translate_y_pct': 0.30
- }
- ra_ops = auto_augment.rand_augment_ops(magnitude,
- hparams=hparams,
- transforms=_RAND_TRANSFORMS)
- # Supply weights to disable replacement in random selection (i.e. avoid applying the same op twice)
- choice_weights = [1. / len(ra_ops) for _ in range(len(ra_ops))]
- return auto_augment.RandAugment(ra_ops, num_layers, choice_weights)
|