python
/
OpenOCR


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
							# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/iaa_augment.py
"""
import os

# Prevent automatic updates in Albumentations for stability in augmentation behavior
os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1'

import numpy as np
import albumentations as A
from albumentations.core.transforms_interface import DualTransform
from albumentations.augmentations.geometric import functional as fgeometric
from packaging import version

ALBU_VERSION = version.parse(A.__version__)
IS_ALBU_NEW_VERSION = ALBU_VERSION >= version.parse('1.4.15')


# Custom resize transformation mimicking Imgaug's behavior with scaling
class ImgaugLikeResize(DualTransform):

    def __init__(self, scale_range=(0.5, 3.0), interpolation=1, p=1.0):
        super(ImgaugLikeResize, self).__init__(p)
        self.scale_range = scale_range
        self.interpolation = interpolation

    # Resize the image based on a randomly chosen scale within the scale range
    def apply(self, img, scale=1.0, **params):
        height, width = img.shape[:2]
        new_height = int(height * scale)
        new_width = int(width * scale)

        if IS_ALBU_NEW_VERSION:
            return fgeometric.resize(img, (new_height, new_width),
                                     interpolation=self.interpolation)
        return fgeometric.resize(img,
                                 new_height,
                                 new_width,
                                 interpolation=self.interpolation)

    # Apply the same scaling transformation to keypoints (e.g., polygon points)
    def apply_to_keypoints(self, keypoints, scale=1.0, **params):
        return np.array([(x * scale, y * scale) + tuple(rest)
                         for x, y, *rest in keypoints])

    # Get random scale parameter within the specified range
    def get_params(self):
        scale = np.random.uniform(self.scale_range[0], self.scale_range[1])
        return {'scale': scale}


# Builder class to translate custom augmenter arguments into Albumentations-compatible format
class AugmenterBuilder(object):

    def __init__(self):
        # Map common Imgaug transformations to equivalent Albumentations transforms
        self.imgaug_to_albu = {
            'Fliplr': 'HorizontalFlip',
            'Flipud': 'VerticalFlip',
            'Affine': 'Affine',
            # Additional mappings can be added here if needed
        }

    # Recursive method to construct augmentation pipeline based on provided arguments
    def build(self, args, root=True):
        if args is None or len(args) == 0:
            return None
        elif isinstance(args, list):
            # Build the full augmentation sequence if it's a root-level call
            if root:
                sequence = [self.build(value, root=False) for value in args]
                return A.Compose(
                    sequence,
                    keypoint_params=A.KeypointParams(format='xy',
                                                     remove_invisible=False),
                )
            else:
                # Build individual augmenters for nested arguments
                augmenter_type = args[0]
                augmenter_args = args[1] if len(args) > 1 else {}
                augmenter_args_mapped = self.map_arguments(
                    augmenter_type, augmenter_args)
                augmenter_type_mapped = self.imgaug_to_albu.get(
                    augmenter_type, augmenter_type)
                if augmenter_type_mapped == 'Resize':
                    return ImgaugLikeResize(**augmenter_args_mapped)
                else:
                    cls = getattr(A, augmenter_type_mapped)
                    return cls(
                        **{
                            k: self.to_tuple_if_list(v)
                            for k, v in augmenter_args_mapped.items()
                        })
        elif isinstance(args, dict):
            # Process individual transformation specified as dictionary
            augmenter_type = args['type']
            augmenter_args = args.get('args', {})
            augmenter_args_mapped = self.map_arguments(augmenter_type,
                                                       augmenter_args)
            augmenter_type_mapped = self.imgaug_to_albu.get(
                augmenter_type, augmenter_type)
            if augmenter_type_mapped == 'Resize':
                return ImgaugLikeResize(**augmenter_args_mapped)
            else:
                cls = getattr(A, augmenter_type_mapped)
                return cls(
                    **{
                        k: self.to_tuple_if_list(v)
                        for k, v in augmenter_args_mapped.items()
                    })
        else:
            raise RuntimeError('Unknown augmenter arg: ' + str(args))

    # Map arguments to expected format for each augmenter type
    def map_arguments(self, augmenter_type, augmenter_args):
        augmenter_args = augmenter_args.copy(
        )  # Avoid modifying the original arguments
        if augmenter_type == 'Resize':
            # Ensure size is a valid 2-element list or tuple
            size = augmenter_args.get('size')
            if size:
                if not isinstance(size, (list, tuple)) or len(size) != 2:
                    raise ValueError(
                        f"'size' must be a list or tuple of two numbers, but got {size}"
                    )
                min_scale, max_scale = size
                return {
                    'scale_range': (min_scale, max_scale),
                    'interpolation': 1,  # Linear interpolation
                    'p': 1.0,
                }
            else:
                return {
                    'scale_range': (1.0, 1.0),
                    'interpolation': 1,
                    'p': 1.0
                }
        elif augmenter_type == 'Affine':
            # Map rotation to a tuple and ensure p=1.0 to apply transformation
            rotate = augmenter_args.get('rotate', 0)
            if isinstance(rotate, list):
                rotate = tuple(rotate)
            elif isinstance(rotate, (int, float)):
                rotate = (float(rotate), float(rotate))
            augmenter_args['rotate'] = rotate
            augmenter_args['p'] = 1.0
            return augmenter_args
        else:
            # For other augmenters, ensure 'p' probability is specified
            p = augmenter_args.get('p', 1.0)
            augmenter_args['p'] = p
            return augmenter_args

    # Convert lists to tuples for Albumentations compatibility
    def to_tuple_if_list(self, obj):
        if isinstance(obj, list):
            return tuple(obj)
        return obj


# Wrapper class for image and polygon transformations using Imgaug-style augmentation
class IaaAugment:

    def __init__(self, augmenter_args=None, **kwargs):
        if augmenter_args is None:
            # Default augmenters if none are specified
            augmenter_args = [
                {
                    'type': 'Fliplr',
                    'args': {
                        'p': 0.5
                    }
                },
                {
                    'type': 'Affine',
                    'args': {
                        'rotate': [-10, 10]
                    }
                },
                {
                    'type': 'Resize',
                    'args': {
                        'size': [0.5, 3]
                    }
                },
            ]
        self.augmenter = AugmenterBuilder().build(augmenter_args)

    # Apply the augmentations to image and polygon data
    def __call__(self, data):
        image = data['image']

        if self.augmenter:
            # Flatten polygons to individual keypoints for transformation
            keypoints = []
            keypoints_lengths = []
            for poly in data['polys']:
                keypoints.extend([tuple(point) for point in poly])
                keypoints_lengths.append(len(poly))

            # Apply the augmentation pipeline to image and keypoints
            transformed = self.augmenter(image=image, keypoints=keypoints)
            data['image'] = transformed['image']

            # Extract transformed keypoints and reconstruct polygon structures
            transformed_keypoints = transformed['keypoints']

            # Reassemble polygons from transformed keypoints
            new_polys = []
            idx = 0
            for length in keypoints_lengths:
                new_poly = transformed_keypoints[idx:idx + length]
                new_polys.append(np.array([kp[:2] for kp in new_poly]))
                idx += length
            data['polys'] = np.array(new_polys)
        return data