import numpy as np import json import cv2 np.seterr(divide='ignore', invalid='ignore') import pyclipper from shapely.geometry import Polygon import warnings warnings.simplefilter('ignore') class DetLabelEncode(object): def __init__(self, **kwargs): pass def __call__(self, data): label = data['label'] label = json.loads(label) nBox = len(label) boxes, txts, txt_tags = [], [], [] for bno in range(0, nBox): box = label[bno]['points'] txt = label[bno]['transcription'] boxes.append(box) txts.append(txt) if txt in ['*', '###']: txt_tags.append(True) else: txt_tags.append(False) if len(boxes) == 0: return None boxes = self.expand_points_num(boxes) boxes = np.array(boxes, dtype=np.float32) txt_tags = np.array(txt_tags, dtype=np.bool_) data['polys'] = boxes data['texts'] = txts data['ignore_tags'] = txt_tags return data def order_points_clockwise(self, pts): rect = np.zeros((4, 2), dtype='float32') s = pts.sum(axis=1) rect[0] = pts[np.argmin(s)] rect[2] = pts[np.argmax(s)] tmp = np.delete(pts, (np.argmin(s), np.argmax(s)), axis=0) diff = np.diff(np.array(tmp), axis=1) rect[1] = tmp[np.argmin(diff)] rect[3] = tmp[np.argmax(diff)] return rect def expand_points_num(self, boxes): max_points_num = 0 for box in boxes: if len(box) > max_points_num: max_points_num = len(box) ex_boxes = [] for box in boxes: ex_box = box + [box[-1]] * (max_points_num - len(box)) ex_boxes.append(ex_box) return ex_boxes class MakeBorderMap(object): def __init__(self, shrink_ratio=0.4, thresh_min=0.3, thresh_max=0.7, **kwargs): self.shrink_ratio = shrink_ratio self.thresh_min = thresh_min self.thresh_max = thresh_max if 'total_epoch' in kwargs and 'epoch' in kwargs and kwargs[ 'epoch'] != 'None': self.shrink_ratio = self.shrink_ratio + 0.2 * kwargs[ 'epoch'] / float(kwargs['total_epoch']) def __call__(self, data): img = data['image'] text_polys = data['polys'] ignore_tags = data['ignore_tags'] canvas = np.zeros(img.shape[:2], dtype=np.float32) mask = np.zeros(img.shape[:2], dtype=np.float32) for i in range(len(text_polys)): if ignore_tags[i]: continue self.draw_border_map(text_polys[i], canvas, mask=mask) canvas = canvas * (self.thresh_max - self.thresh_min) + self.thresh_min data['threshold_map'] = canvas data['threshold_mask'] = mask return data def draw_border_map(self, polygon, canvas, mask): polygon = np.array(polygon) assert polygon.ndim == 2 assert polygon.shape[1] == 2 polygon_shape = Polygon(polygon) if polygon_shape.area <= 0: return distance = (polygon_shape.area * (1 - np.power(self.shrink_ratio, 2)) / polygon_shape.length) subject = [tuple(l) for l in polygon] padding = pyclipper.PyclipperOffset() padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) padded_polygon = np.array(padding.Execute(distance)[0]) cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0) xmin = padded_polygon[:, 0].min() xmax = padded_polygon[:, 0].max() ymin = padded_polygon[:, 1].min() ymax = padded_polygon[:, 1].max() width = xmax - xmin + 1 height = ymax - ymin + 1 polygon[:, 0] = polygon[:, 0] - xmin polygon[:, 1] = polygon[:, 1] - ymin xs = np.broadcast_to( np.linspace(0, width - 1, num=width).reshape(1, width), (height, width)) ys = np.broadcast_to( np.linspace(0, height - 1, num=height).reshape(height, 1), (height, width)) distance_map = np.zeros((polygon.shape[0], height, width), dtype=np.float32) for i in range(polygon.shape[0]): j = (i + 1) % polygon.shape[0] absolute_distance = self._distance(xs, ys, polygon[i], polygon[j]) distance_map[i] = np.clip(absolute_distance / distance, 0, 1) distance_map = distance_map.min(axis=0) xmin_valid = min(max(0, xmin), canvas.shape[1] - 1) xmax_valid = min(max(0, xmax), canvas.shape[1] - 1) ymin_valid = min(max(0, ymin), canvas.shape[0] - 1) ymax_valid = min(max(0, ymax), canvas.shape[0] - 1) canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax( 1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height, xmin_valid - xmin:xmax_valid - xmax + width, ], canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1], ) def _distance(self, xs, ys, point_1, point_2): """ compute the distance from point to a line ys: coordinates in the first axis xs: coordinates in the second axis point_1, point_2: (x, y), the end of the line """ height, width = xs.shape[:2] square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[1]) square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[1]) square_distance = np.square(point_1[0] - point_2[0]) + np.square(point_1[1] - point_2[1]) cosin = (square_distance - square_distance_1 - square_distance_2) / ( 2 * np.sqrt(square_distance_1 * square_distance_2)) square_sin = 1 - np.square(cosin) square_sin = np.nan_to_num(square_sin) result = np.sqrt(square_distance_1 * square_distance_2 * square_sin / square_distance) result[cosin < 0] = np.sqrt( np.fmin(square_distance_1, square_distance_2))[cosin < 0] # self.extend_line(point_1, point_2, result) return result def extend_line(self, point_1, point_2, result, shrink_ratio): ex_point_1 = ( int( round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))), int( round(point_1[1] + (point_1[1] - point_2[1]) * (1 + shrink_ratio))), ) cv2.line( result, tuple(ex_point_1), tuple(point_1), 4096.0, 1, lineType=cv2.LINE_AA, shift=0, ) ex_point_2 = ( int( round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))), int( round(point_2[1] + (point_2[1] - point_1[1]) * (1 + shrink_ratio))), ) cv2.line( result, tuple(ex_point_2), tuple(point_2), 4096.0, 1, lineType=cv2.LINE_AA, shift=0, ) return ex_point_1, ex_point_2 class MakeShrinkMap(object): r""" Making binary mask from detection data with ICDAR format. Typically following the process of class `MakeICDARData`. """ def __init__(self, min_text_size=8, shrink_ratio=0.4, **kwargs): self.min_text_size = min_text_size self.shrink_ratio = shrink_ratio if 'total_epoch' in kwargs and 'epoch' in kwargs and kwargs[ 'epoch'] != 'None': self.shrink_ratio = self.shrink_ratio + 0.2 * kwargs[ 'epoch'] / float(kwargs['total_epoch']) def __call__(self, data): image = data['image'] text_polys = data['polys'] ignore_tags = data['ignore_tags'] h, w = image.shape[:2] text_polys, ignore_tags = self.validate_polygons( text_polys, ignore_tags, h, w) gt = np.zeros((h, w), dtype=np.float32) mask = np.ones((h, w), dtype=np.float32) for i in range(len(text_polys)): polygon = text_polys[i] height = max(polygon[:, 1]) - min(polygon[:, 1]) width = max(polygon[:, 0]) - min(polygon[:, 0]) if ignore_tags[i] or min(height, width) < self.min_text_size: cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0) ignore_tags[i] = True else: polygon_shape = Polygon(polygon) subject = [tuple(l) for l in polygon] padding = pyclipper.PyclipperOffset() padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) shrunk = [] # Increase the shrink ratio every time we get multiple polygon returned back possible_ratios = np.arange(self.shrink_ratio, 1, self.shrink_ratio) np.append(possible_ratios, 1) # print(possible_ratios) for ratio in possible_ratios: # print(f"Change shrink ratio to {ratio}") distance = (polygon_shape.area * (1 - np.power(ratio, 2)) / polygon_shape.length) shrunk = padding.Execute(-distance) if len(shrunk) == 1: break if shrunk == []: cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0) ignore_tags[i] = True continue for each_shrink in shrunk: shrink = np.array(each_shrink).reshape(-1, 2) cv2.fillPoly(gt, [shrink.astype(np.int32)], 1) data['shrink_map'] = gt data['shrink_mask'] = mask return data def validate_polygons(self, polygons, ignore_tags, h, w): """ polygons (numpy.array, required): of shape (num_instances, num_points, 2) """ if len(polygons) == 0: return polygons, ignore_tags assert len(polygons) == len(ignore_tags) for polygon in polygons: polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1) polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1) for i in range(len(polygons)): area = self.polygon_area(polygons[i]) if abs(area) < 1: ignore_tags[i] = True if area > 0: polygons[i] = polygons[i][::-1, :] return polygons, ignore_tags def polygon_area(self, polygon): """ compute polygon area """ area = 0 q = polygon[-1] for p in polygon: area += p[0] * q[1] - p[1] * q[0] q = p return area / 2.0