123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313 |
- import numpy as np
- import json
- import cv2
- np.seterr(divide='ignore', invalid='ignore')
- import pyclipper
- from shapely.geometry import Polygon
- import warnings
- warnings.simplefilter('ignore')
- class DetLabelEncode(object):
- def __init__(self, **kwargs):
- pass
- def __call__(self, data):
- label = data['label']
- label = json.loads(label)
- nBox = len(label)
- boxes, txts, txt_tags = [], [], []
- for bno in range(0, nBox):
- box = label[bno]['points']
- txt = label[bno]['transcription']
- boxes.append(box)
- txts.append(txt)
- if txt in ['*', '###']:
- txt_tags.append(True)
- else:
- txt_tags.append(False)
- if len(boxes) == 0:
- return None
- boxes = self.expand_points_num(boxes)
- boxes = np.array(boxes, dtype=np.float32)
- txt_tags = np.array(txt_tags, dtype=np.bool_)
- data['polys'] = boxes
- data['texts'] = txts
- data['ignore_tags'] = txt_tags
- return data
- def order_points_clockwise(self, pts):
- rect = np.zeros((4, 2), dtype='float32')
- s = pts.sum(axis=1)
- rect[0] = pts[np.argmin(s)]
- rect[2] = pts[np.argmax(s)]
- tmp = np.delete(pts, (np.argmin(s), np.argmax(s)), axis=0)
- diff = np.diff(np.array(tmp), axis=1)
- rect[1] = tmp[np.argmin(diff)]
- rect[3] = tmp[np.argmax(diff)]
- return rect
- def expand_points_num(self, boxes):
- max_points_num = 0
- for box in boxes:
- if len(box) > max_points_num:
- max_points_num = len(box)
- ex_boxes = []
- for box in boxes:
- ex_box = box + [box[-1]] * (max_points_num - len(box))
- ex_boxes.append(ex_box)
- return ex_boxes
- class MakeBorderMap(object):
- def __init__(self,
- shrink_ratio=0.4,
- thresh_min=0.3,
- thresh_max=0.7,
- **kwargs):
- self.shrink_ratio = shrink_ratio
- self.thresh_min = thresh_min
- self.thresh_max = thresh_max
- if 'total_epoch' in kwargs and 'epoch' in kwargs and kwargs[
- 'epoch'] != 'None':
- self.shrink_ratio = self.shrink_ratio + 0.2 * kwargs[
- 'epoch'] / float(kwargs['total_epoch'])
- def __call__(self, data):
- img = data['image']
- text_polys = data['polys']
- ignore_tags = data['ignore_tags']
- canvas = np.zeros(img.shape[:2], dtype=np.float32)
- mask = np.zeros(img.shape[:2], dtype=np.float32)
- for i in range(len(text_polys)):
- if ignore_tags[i]:
- continue
- self.draw_border_map(text_polys[i], canvas, mask=mask)
- canvas = canvas * (self.thresh_max - self.thresh_min) + self.thresh_min
- data['threshold_map'] = canvas
- data['threshold_mask'] = mask
- return data
- def draw_border_map(self, polygon, canvas, mask):
- polygon = np.array(polygon)
- assert polygon.ndim == 2
- assert polygon.shape[1] == 2
- polygon_shape = Polygon(polygon)
- if polygon_shape.area <= 0:
- return
- distance = (polygon_shape.area * (1 - np.power(self.shrink_ratio, 2)) /
- polygon_shape.length)
- subject = [tuple(l) for l in polygon]
- padding = pyclipper.PyclipperOffset()
- padding.AddPath(subject, pyclipper.JT_ROUND,
- pyclipper.ET_CLOSEDPOLYGON)
- padded_polygon = np.array(padding.Execute(distance)[0])
- cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
- xmin = padded_polygon[:, 0].min()
- xmax = padded_polygon[:, 0].max()
- ymin = padded_polygon[:, 1].min()
- ymax = padded_polygon[:, 1].max()
- width = xmax - xmin + 1
- height = ymax - ymin + 1
- polygon[:, 0] = polygon[:, 0] - xmin
- polygon[:, 1] = polygon[:, 1] - ymin
- xs = np.broadcast_to(
- np.linspace(0, width - 1, num=width).reshape(1, width),
- (height, width))
- ys = np.broadcast_to(
- np.linspace(0, height - 1, num=height).reshape(height, 1),
- (height, width))
- distance_map = np.zeros((polygon.shape[0], height, width),
- dtype=np.float32)
- for i in range(polygon.shape[0]):
- j = (i + 1) % polygon.shape[0]
- absolute_distance = self._distance(xs, ys, polygon[i], polygon[j])
- distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
- distance_map = distance_map.min(axis=0)
- xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)
- xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)
- ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)
- ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)
- canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(
- 1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height,
- xmin_valid - xmin:xmax_valid - xmax + width, ],
- canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1],
- )
- def _distance(self, xs, ys, point_1, point_2):
- """
- compute the distance from point to a line
- ys: coordinates in the first axis
- xs: coordinates in the second axis
- point_1, point_2: (x, y), the end of the line
- """
- height, width = xs.shape[:2]
- square_distance_1 = np.square(xs - point_1[0]) + np.square(ys -
- point_1[1])
- square_distance_2 = np.square(xs - point_2[0]) + np.square(ys -
- point_2[1])
- square_distance = np.square(point_1[0] -
- point_2[0]) + np.square(point_1[1] -
- point_2[1])
- cosin = (square_distance - square_distance_1 - square_distance_2) / (
- 2 * np.sqrt(square_distance_1 * square_distance_2))
- square_sin = 1 - np.square(cosin)
- square_sin = np.nan_to_num(square_sin)
- result = np.sqrt(square_distance_1 * square_distance_2 * square_sin /
- square_distance)
- result[cosin < 0] = np.sqrt(
- np.fmin(square_distance_1, square_distance_2))[cosin < 0]
- # self.extend_line(point_1, point_2, result)
- return result
- def extend_line(self, point_1, point_2, result, shrink_ratio):
- ex_point_1 = (
- int(
- round(point_1[0] + (point_1[0] - point_2[0]) *
- (1 + shrink_ratio))),
- int(
- round(point_1[1] + (point_1[1] - point_2[1]) *
- (1 + shrink_ratio))),
- )
- cv2.line(
- result,
- tuple(ex_point_1),
- tuple(point_1),
- 4096.0,
- 1,
- lineType=cv2.LINE_AA,
- shift=0,
- )
- ex_point_2 = (
- int(
- round(point_2[0] + (point_2[0] - point_1[0]) *
- (1 + shrink_ratio))),
- int(
- round(point_2[1] + (point_2[1] - point_1[1]) *
- (1 + shrink_ratio))),
- )
- cv2.line(
- result,
- tuple(ex_point_2),
- tuple(point_2),
- 4096.0,
- 1,
- lineType=cv2.LINE_AA,
- shift=0,
- )
- return ex_point_1, ex_point_2
- class MakeShrinkMap(object):
- r"""
- Making binary mask from detection data with ICDAR format.
- Typically following the process of class `MakeICDARData`.
- """
- def __init__(self, min_text_size=8, shrink_ratio=0.4, **kwargs):
- self.min_text_size = min_text_size
- self.shrink_ratio = shrink_ratio
- if 'total_epoch' in kwargs and 'epoch' in kwargs and kwargs[
- 'epoch'] != 'None':
- self.shrink_ratio = self.shrink_ratio + 0.2 * kwargs[
- 'epoch'] / float(kwargs['total_epoch'])
- def __call__(self, data):
- image = data['image']
- text_polys = data['polys']
- ignore_tags = data['ignore_tags']
- h, w = image.shape[:2]
- text_polys, ignore_tags = self.validate_polygons(
- text_polys, ignore_tags, h, w)
- gt = np.zeros((h, w), dtype=np.float32)
- mask = np.ones((h, w), dtype=np.float32)
- for i in range(len(text_polys)):
- polygon = text_polys[i]
- height = max(polygon[:, 1]) - min(polygon[:, 1])
- width = max(polygon[:, 0]) - min(polygon[:, 0])
- if ignore_tags[i] or min(height, width) < self.min_text_size:
- cv2.fillPoly(mask,
- polygon.astype(np.int32)[np.newaxis, :, :], 0)
- ignore_tags[i] = True
- else:
- polygon_shape = Polygon(polygon)
- subject = [tuple(l) for l in polygon]
- padding = pyclipper.PyclipperOffset()
- padding.AddPath(subject, pyclipper.JT_ROUND,
- pyclipper.ET_CLOSEDPOLYGON)
- shrunk = []
- # Increase the shrink ratio every time we get multiple polygon returned back
- possible_ratios = np.arange(self.shrink_ratio, 1,
- self.shrink_ratio)
- np.append(possible_ratios, 1)
- # print(possible_ratios)
- for ratio in possible_ratios:
- # print(f"Change shrink ratio to {ratio}")
- distance = (polygon_shape.area * (1 - np.power(ratio, 2)) /
- polygon_shape.length)
- shrunk = padding.Execute(-distance)
- if len(shrunk) == 1:
- break
- if shrunk == []:
- cv2.fillPoly(mask,
- polygon.astype(np.int32)[np.newaxis, :, :], 0)
- ignore_tags[i] = True
- continue
- for each_shrink in shrunk:
- shrink = np.array(each_shrink).reshape(-1, 2)
- cv2.fillPoly(gt, [shrink.astype(np.int32)], 1)
- data['shrink_map'] = gt
- data['shrink_mask'] = mask
- return data
- def validate_polygons(self, polygons, ignore_tags, h, w):
- """
- polygons (numpy.array, required): of shape (num_instances, num_points, 2)
- """
- if len(polygons) == 0:
- return polygons, ignore_tags
- assert len(polygons) == len(ignore_tags)
- for polygon in polygons:
- polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1)
- polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1)
- for i in range(len(polygons)):
- area = self.polygon_area(polygons[i])
- if abs(area) < 1:
- ignore_tags[i] = True
- if area > 0:
- polygons[i] = polygons[i][::-1, :]
- return polygons, ignore_tags
- def polygon_area(self, polygon):
- """
- compute polygon area
- """
- area = 0
- q = polygon[-1]
- for p in polygon:
- area += p[0] * q[1] - p[1] * q[0]
- q = p
- return area / 2.0
|