iaa_augment.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is refer from:
  16. https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/iaa_augment.py
  17. """
  18. import os
  19. # Prevent automatic updates in Albumentations for stability in augmentation behavior
  20. os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1'
  21. import numpy as np
  22. import albumentations as A
  23. from albumentations.core.transforms_interface import DualTransform
  24. from albumentations.augmentations.geometric import functional as fgeometric
  25. from packaging import version
  26. ALBU_VERSION = version.parse(A.__version__)
  27. IS_ALBU_NEW_VERSION = ALBU_VERSION >= version.parse('1.4.15')
  28. # Custom resize transformation mimicking Imgaug's behavior with scaling
  29. class ImgaugLikeResize(DualTransform):
  30. def __init__(self, scale_range=(0.5, 3.0), interpolation=1, p=1.0):
  31. super(ImgaugLikeResize, self).__init__(p)
  32. self.scale_range = scale_range
  33. self.interpolation = interpolation
  34. # Resize the image based on a randomly chosen scale within the scale range
  35. def apply(self, img, scale=1.0, **params):
  36. height, width = img.shape[:2]
  37. new_height = int(height * scale)
  38. new_width = int(width * scale)
  39. if IS_ALBU_NEW_VERSION:
  40. return fgeometric.resize(img, (new_height, new_width),
  41. interpolation=self.interpolation)
  42. return fgeometric.resize(img,
  43. new_height,
  44. new_width,
  45. interpolation=self.interpolation)
  46. # Apply the same scaling transformation to keypoints (e.g., polygon points)
  47. def apply_to_keypoints(self, keypoints, scale=1.0, **params):
  48. return np.array([(x * scale, y * scale) + tuple(rest)
  49. for x, y, *rest in keypoints])
  50. # Get random scale parameter within the specified range
  51. def get_params(self):
  52. scale = np.random.uniform(self.scale_range[0], self.scale_range[1])
  53. return {'scale': scale}
  54. # Builder class to translate custom augmenter arguments into Albumentations-compatible format
  55. class AugmenterBuilder(object):
  56. def __init__(self):
  57. # Map common Imgaug transformations to equivalent Albumentations transforms
  58. self.imgaug_to_albu = {
  59. 'Fliplr': 'HorizontalFlip',
  60. 'Flipud': 'VerticalFlip',
  61. 'Affine': 'Affine',
  62. # Additional mappings can be added here if needed
  63. }
  64. # Recursive method to construct augmentation pipeline based on provided arguments
  65. def build(self, args, root=True):
  66. if args is None or len(args) == 0:
  67. return None
  68. elif isinstance(args, list):
  69. # Build the full augmentation sequence if it's a root-level call
  70. if root:
  71. sequence = [self.build(value, root=False) for value in args]
  72. return A.Compose(
  73. sequence,
  74. keypoint_params=A.KeypointParams(format='xy',
  75. remove_invisible=False),
  76. )
  77. else:
  78. # Build individual augmenters for nested arguments
  79. augmenter_type = args[0]
  80. augmenter_args = args[1] if len(args) > 1 else {}
  81. augmenter_args_mapped = self.map_arguments(
  82. augmenter_type, augmenter_args)
  83. augmenter_type_mapped = self.imgaug_to_albu.get(
  84. augmenter_type, augmenter_type)
  85. if augmenter_type_mapped == 'Resize':
  86. return ImgaugLikeResize(**augmenter_args_mapped)
  87. else:
  88. cls = getattr(A, augmenter_type_mapped)
  89. return cls(
  90. **{
  91. k: self.to_tuple_if_list(v)
  92. for k, v in augmenter_args_mapped.items()
  93. })
  94. elif isinstance(args, dict):
  95. # Process individual transformation specified as dictionary
  96. augmenter_type = args['type']
  97. augmenter_args = args.get('args', {})
  98. augmenter_args_mapped = self.map_arguments(augmenter_type,
  99. augmenter_args)
  100. augmenter_type_mapped = self.imgaug_to_albu.get(
  101. augmenter_type, augmenter_type)
  102. if augmenter_type_mapped == 'Resize':
  103. return ImgaugLikeResize(**augmenter_args_mapped)
  104. else:
  105. cls = getattr(A, augmenter_type_mapped)
  106. return cls(
  107. **{
  108. k: self.to_tuple_if_list(v)
  109. for k, v in augmenter_args_mapped.items()
  110. })
  111. else:
  112. raise RuntimeError('Unknown augmenter arg: ' + str(args))
  113. # Map arguments to expected format for each augmenter type
  114. def map_arguments(self, augmenter_type, augmenter_args):
  115. augmenter_args = augmenter_args.copy(
  116. ) # Avoid modifying the original arguments
  117. if augmenter_type == 'Resize':
  118. # Ensure size is a valid 2-element list or tuple
  119. size = augmenter_args.get('size')
  120. if size:
  121. if not isinstance(size, (list, tuple)) or len(size) != 2:
  122. raise ValueError(
  123. f"'size' must be a list or tuple of two numbers, but got {size}"
  124. )
  125. min_scale, max_scale = size
  126. return {
  127. 'scale_range': (min_scale, max_scale),
  128. 'interpolation': 1, # Linear interpolation
  129. 'p': 1.0,
  130. }
  131. else:
  132. return {
  133. 'scale_range': (1.0, 1.0),
  134. 'interpolation': 1,
  135. 'p': 1.0
  136. }
  137. elif augmenter_type == 'Affine':
  138. # Map rotation to a tuple and ensure p=1.0 to apply transformation
  139. rotate = augmenter_args.get('rotate', 0)
  140. if isinstance(rotate, list):
  141. rotate = tuple(rotate)
  142. elif isinstance(rotate, (int, float)):
  143. rotate = (float(rotate), float(rotate))
  144. augmenter_args['rotate'] = rotate
  145. augmenter_args['p'] = 1.0
  146. return augmenter_args
  147. else:
  148. # For other augmenters, ensure 'p' probability is specified
  149. p = augmenter_args.get('p', 1.0)
  150. augmenter_args['p'] = p
  151. return augmenter_args
  152. # Convert lists to tuples for Albumentations compatibility
  153. def to_tuple_if_list(self, obj):
  154. if isinstance(obj, list):
  155. return tuple(obj)
  156. return obj
  157. # Wrapper class for image and polygon transformations using Imgaug-style augmentation
  158. class IaaAugment:
  159. def __init__(self, augmenter_args=None, **kwargs):
  160. if augmenter_args is None:
  161. # Default augmenters if none are specified
  162. augmenter_args = [
  163. {
  164. 'type': 'Fliplr',
  165. 'args': {
  166. 'p': 0.5
  167. }
  168. },
  169. {
  170. 'type': 'Affine',
  171. 'args': {
  172. 'rotate': [-10, 10]
  173. }
  174. },
  175. {
  176. 'type': 'Resize',
  177. 'args': {
  178. 'size': [0.5, 3]
  179. }
  180. },
  181. ]
  182. self.augmenter = AugmenterBuilder().build(augmenter_args)
  183. # Apply the augmentations to image and polygon data
  184. def __call__(self, data):
  185. image = data['image']
  186. if self.augmenter:
  187. # Flatten polygons to individual keypoints for transformation
  188. keypoints = []
  189. keypoints_lengths = []
  190. for poly in data['polys']:
  191. keypoints.extend([tuple(point) for point in poly])
  192. keypoints_lengths.append(len(poly))
  193. # Apply the augmentation pipeline to image and keypoints
  194. transformed = self.augmenter(image=image, keypoints=keypoints)
  195. data['image'] = transformed['image']
  196. # Extract transformed keypoints and reconstruct polygon structures
  197. transformed_keypoints = transformed['keypoints']
  198. # Reassemble polygons from transformed keypoints
  199. new_polys = []
  200. idx = 0
  201. for length in keypoints_lengths:
  202. new_poly = transformed_keypoints[idx:idx + length]
  203. new_polys.append(np.array([kp[:2] for kp in new_poly]))
  204. idx += length
  205. data['polys'] = np.array(new_polys)
  206. return data