# Part of the implementation is borrowed and modified from SegLink, # publicly available at https://github.com/bgshih/seglink import math import os import shutil import sys import uuid import absl.flags as absl_flags import cv2 import numpy as np import tensorflow as tf from . import utils if tf.__version__ >= '2.0': tf = tf.compat.v1 # test # skip parse sys.argv in tf, so fix bug: # absl.flags._exceptions.UnrecognizedFlagError: # Unknown command line flag 'OCRDetectionPipeline: Unknown command line flag absl_flags.FLAGS(sys.argv, known_only=True) FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string('weight_init_method', 'xavier', 'Weight initialization method') # constants OFFSET_DIM = 6 RBOX_DIM = 5 N_LOCAL_LINKS = 8 N_CROSS_LINKS = 4 N_SEG_CLASSES = 2 N_LNK_CLASSES = 4 MATCH_STATUS_POS = 1 MATCH_STATUS_NEG = -1 MATCH_STATUS_IGNORE = 0 MUT_LABEL = 3 POS_LABEL = 1 NEG_LABEL = 0 N_DET_LAYERS = 6 def load_oplib(lib_name): """ Load TensorFlow operator library. """ # use absolute path so that ops.py can be called from other directory lib_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'lib{0}.so'.format(lib_name)) # duplicate library with a random new name so that # a running program will not be interrupted when the original library is updated lib_copy_path = '/tmp/lib{0}_{1}.so'.format( str(uuid.uuid4())[:8], LIB_NAME) shutil.copyfile(lib_path, lib_copy_path) oplib = tf.load_op_library(lib_copy_path) return oplib def _nn_variable(name, shape, init_method, collection=None, **kwargs): """ Create or reuse a variable ARGS name: variable name shape: variable shape init_method: 'zero', 'kaiming', 'xavier', or (mean, std) collection: if not none, add variable to this collection kwargs: extra parameters passed to tf.get_variable RETURN var: a new or existing variable """ if init_method == 'zero': initializer = tf.constant_initializer(0.0) elif init_method == 'kaiming': if len(shape) == 4: # convolutional filters kh, kw, n_in = shape[:3] init_std = math.sqrt(2.0 / (kh * kw * n_in)) elif len(shape) == 2: # linear weights n_in, n_out = shape init_std = math.sqrt(1.0 / n_out) else: raise 'Unsupported shape' initializer = tf.truncated_normal_initializer(0.0, init_std) elif init_method == 'xavier': if len(shape) == 4: initializer = tf.keras.initializers.glorot_normal() else: initializer = tf.keras.initializers.glorot_normal() elif isinstance(init_method, tuple): assert (len(init_method) == 2) initializer = tf.truncated_normal_initializer(init_method[0], init_method[1]) else: raise 'Unsupported weight initialization method: ' + init_method var = tf.get_variable(name, shape=shape, initializer=initializer) if collection is not None: tf.add_to_collection(collection, var) return var def conv2d(x, n_in, n_out, ksize, stride=1, padding='SAME', weight_init=None, bias=True, relu=False, scope=None, **kwargs): weight_init = weight_init or FLAGS.weight_init_method trainable = kwargs.get('trainable', True) # input_dim = n_in if (padding == 'SAME'): in_height = x.get_shape()[1] in_width = x.get_shape()[2] if (in_height % stride == 0): pad_along_height = max(ksize - stride, 0) else: pad_along_height = max(ksize - (in_height % stride), 0) if (in_width % stride == 0): pad_along_width = max(ksize - stride, 0) else: pad_along_width = max(ksize - (in_width % stride), 0) pad_bottom = pad_along_height // 2 pad_top = pad_along_height - pad_bottom pad_right = pad_along_width // 2 pad_left = pad_along_width - pad_right paddings = tf.constant([[0, 0], [pad_top, pad_bottom], [pad_left, pad_right], [0, 0]]) input_padded = tf.pad(x, paddings, 'CONSTANT') else: input_padded = x with tf.variable_scope(scope or 'conv2d'): # convolution kernel = _nn_variable( 'weight', [ksize, ksize, n_in, n_out], weight_init, collection='weights' if trainable else None, **kwargs) yc = tf.nn.conv2d( input_padded, kernel, [1, stride, stride, 1], padding='VALID') # add bias if bias is True: bias = _nn_variable( 'bias', [n_out], 'zero', collection='biases' if trainable else None, **kwargs) yb = tf.nn.bias_add(yc, bias) # apply ReLU y = yb if relu is True: y = tf.nn.relu(yb) return yb, y def group_conv2d_relu(x, n_in, n_out, ksize, stride=1, group=4, padding='SAME', weight_init=None, bias=True, relu=False, name='group_conv2d', **kwargs): group_axis = len(x.get_shape()) - 1 splits = tf.split(x, [int(n_in / group)] * group, group_axis) conv_list = [] for i in range(group): conv_split, relu_split = conv2d( splits[i], n_in / group, n_out / group, ksize=ksize, stride=stride, padding=padding, weight_init=weight_init, bias=bias, relu=relu, scope='%s_%d' % (name, i)) conv_list.append(conv_split) conv = tf.concat(values=conv_list, axis=group_axis, name=name + '_concat') relu = tf.nn.relu(conv) return conv, relu def group_conv2d_bn_relu(x, n_in, n_out, ksize, stride=1, group=4, padding='SAME', weight_init=None, bias=True, relu=False, name='group_conv2d', **kwargs): group_axis = len(x.get_shape()) - 1 splits = tf.split(x, [int(n_in / group)] * group, group_axis) conv_list = [] for i in range(group): conv_split, relu_split = conv2d( splits[i], n_in / group, n_out / group, ksize=ksize, stride=stride, padding=padding, weight_init=weight_init, bias=bias, relu=relu, scope='%s_%d' % (name, i)) conv_list.append(conv_split) conv = tf.concat(values=conv_list, axis=group_axis, name=name + '_concat') with tf.variable_scope(name + '_bn'): bn = tf.layers.batch_normalization( conv, momentum=0.9, epsilon=1e-5, scale=True, training=True) relu = tf.nn.relu(bn) return conv, relu def next_conv(x, n_in, n_out, ksize, stride=1, group=4, padding='SAME', weight_init=None, bias=True, relu=False, name='next_conv2d', **kwargs): conv_a, relu_a = conv_relu( x, n_in, n_in / 2, ksize=1, stride=1, padding=padding, weight_init=weight_init, bias=bias, relu=relu, scope=name + '_a', **kwargs) conv_b, relu_b = group_conv2d_relu( relu_a, n_in / 2, n_out / 2, ksize=ksize, stride=stride, group=group, padding=padding, weight_init=weight_init, bias=bias, relu=relu, name=name + '_b', **kwargs) conv_c, relu_c = conv_relu( relu_b, n_out / 2, n_out, ksize=1, stride=1, padding=padding, weight_init=weight_init, bias=bias, relu=relu, scope=name + '_c', **kwargs) return conv_c, relu_c def next_conv_bn(x, n_in, n_out, ksize, stride=1, group=4, padding='SAME', weight_init=None, bias=True, relu=False, name='next_conv2d', **kwargs): conv_a, relu_a = conv_bn_relu( x, n_in, n_in / 2, ksize=1, stride=1, padding=padding, weight_init=weight_init, bias=bias, relu=relu, scope=name + '_a', **kwargs) conv_b, relu_b = group_conv2d_bn_relu( relu_a, n_in / 2, n_out / 2, ksize=ksize, stride=stride, group=group, padding=padding, weight_init=weight_init, bias=bias, relu=relu, name=name + '_b', **kwargs) conv_c, relu_c = conv_bn_relu( relu_b, n_out / 2, n_out, ksize=1, stride=1, padding=padding, weight_init=weight_init, bias=bias, relu=relu, scope=name + '_c', **kwargs) return conv_c, relu_c def conv2d_ori(x, n_in, n_out, ksize, stride=1, padding='SAME', weight_init=None, bias=True, relu=False, scope=None, **kwargs): weight_init = weight_init or FLAGS.weight_init_method trainable = kwargs.get('trainable', True) with tf.variable_scope(scope or 'conv2d'): # convolution kernel = _nn_variable( 'weight', [ksize, ksize, n_in, n_out], weight_init, collection='weights' if trainable else None, **kwargs) y = tf.nn.conv2d(x, kernel, [1, stride, stride, 1], padding=padding) # add bias if bias is True: bias = _nn_variable( 'bias', [n_out], 'zero', collection='biases' if trainable else None, **kwargs) y = tf.nn.bias_add(y, bias) # apply ReLU if relu is True: y = tf.nn.relu(y) return y def conv_relu(*args, **kwargs): kwargs['relu'] = True if 'scope' not in kwargs: kwargs['scope'] = 'conv_relu' return conv2d(*args, **kwargs) def conv_bn_relu(*args, **kwargs): kwargs['relu'] = True if 'scope' not in kwargs: kwargs['scope'] = 'conv_relu' conv, relu = conv2d(*args, **kwargs) with tf.variable_scope(kwargs['scope'] + '_bn'): bn = tf.layers.batch_normalization( conv, momentum=0.9, epsilon=1e-5, scale=True, training=True) bn_relu = tf.nn.relu(bn) return bn, bn_relu def conv_relu_ori(*args, **kwargs): kwargs['relu'] = True if 'scope' not in kwargs: kwargs['scope'] = 'conv_relu' return conv2d_ori(*args, **kwargs) def atrous_conv2d(x, n_in, n_out, ksize, dilation, padding='SAME', weight_init=None, bias=True, relu=False, scope=None, **kwargs): weight_init = weight_init or FLAGS.weight_init_method trainable = kwargs.get('trainable', True) with tf.variable_scope(scope or 'atrous_conv2d'): # atrous convolution kernel = _nn_variable( 'weight', [ksize, ksize, n_in, n_out], weight_init, collection='weights' if trainable else None, **kwargs) y = tf.nn.atrous_conv2d(x, kernel, dilation, padding=padding) # add bias if bias is True: bias = _nn_variable( 'bias', [n_out], 'zero', collection='biases' if trainable else None, **kwargs) y = tf.nn.bias_add(y, bias) # apply ReLU if relu is True: y = tf.nn.relu(y) return y def avg_pool(x, ksize, stride, padding='SAME', scope=None): with tf.variable_scope(scope or 'avg_pool'): y = tf.nn.avg_pool(x, [1, ksize, ksize, 1], [1, stride, stride, 1], padding) return y def max_pool(x, ksize, stride, padding='SAME', scope=None): with tf.variable_scope(scope or 'max_pool'): y = tf.nn.max_pool(x, [1, ksize, ksize, 1], [1, stride, stride, 1], padding) return y def score_loss(gt_labels, match_scores, n_classes): """ Classification loss ARGS gt_labels: int32 [n] match_scores: [n, n_classes] RETURN loss """ embeddings = tf.one_hot(tf.cast(gt_labels, tf.int64), n_classes, 1.0, 0.0) losses = tf.nn.softmax_cross_entropy_with_logits(match_scores, embeddings) return tf.reduce_sum(losses) def smooth_l1_loss(offsets, gt_offsets, scope=None): """ Smooth L1 loss between offsets and encoded_gt ARGS offsets: [m?, 5], predicted offsets for one example gt_offsets: [m?, 5], corresponding groundtruth offsets RETURN loss: scalar """ with tf.variable_scope(scope or 'smooth_l1_loss'): gt_offsets = tf.stop_gradient(gt_offsets) diff = tf.abs(offsets - gt_offsets) lesser_mask = tf.cast(tf.less(diff, 1.0), tf.float32) larger_mask = 1.0 - lesser_mask losses1 = (0.5 * tf.square(diff)) * lesser_mask losses2 = (diff - 0.5) * larger_mask return tf.reduce_sum(losses1 + losses2, 1) def polygon_to_rboxe(polygon): x1 = polygon[0] y1 = polygon[1] x2 = polygon[2] y2 = polygon[3] x3 = polygon[4] y3 = polygon[5] x4 = polygon[6] y4 = polygon[7] c_x = (x1 + x2 + x3 + x4) / 4 c_y = (y1 + y2 + y3 + y4) / 4 w1 = point_dist(x1, y1, x2, y2) w2 = point_dist(x3, y3, x4, y4) h1 = point_line_dist(c_x, c_y, x1, y1, x2, y2) h2 = point_line_dist(c_x, c_y, x3, y3, x4, y4) h = h1 + h2 w = (w1 + w2) / 2 theta1 = np.arctan2(y2 - y1, x2 - x1) theta2 = np.arctan2(y3 - y4, x3 - x4) theta = (theta1 + theta2) / 2 return np.array([c_x, c_y, w, h, theta]) def point_dist(x1, y1, x2, y2): return np.sqrt((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)) def point_line_dist(px, py, x1, y1, x2, y2): eps = 1e-6 dx = x2 - x1 dy = y2 - y1 div = np.sqrt(dx * dx + dy * dy) + eps dist = np.abs(px * dy - py * dx + x2 * y1 - y2 * x1) / div return dist def get_combined_polygon(rboxes, resize_size): image_w = resize_size[1] image_h = resize_size[0] img = np.zeros((image_h, image_w, 3), np.uint8) for i in range(rboxes.shape[0]): segment = np.reshape( np.array(utils.rboxes_to_polygons(rboxes)[i, :], np.int32), (-1, 1, 2)) cv2.drawContours(img, [segment], 0, (255, 255, 255), -1) img2gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(img2gray, 127, 255, cv2.THRESH_BINARY) im2, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if len(contours) > 0: cnt = contours[0] max_area = cv2.contourArea(cnt) # get max_area for cont in contours: if cv2.contourArea(cont) > max_area: cnt = cont max_area = cv2.contourArea(cont) rect = cv2.minAreaRect(cnt) combined_polygon = np.array(cv2.boxPoints(rect)).reshape(-1) else: combined_polygon = np.array([0, 0, 0, 0, 0, 0, 0, 0]) return combined_polygon def combine_segs(segs): segs = np.asarray(segs) assert segs.ndim == 2, 'invalid segs ndim' assert segs.shape[-1] == 6, 'invalid segs shape' if len(segs) == 1: cx = segs[0, 0] cy = segs[0, 1] w = segs[0, 2] h = segs[0, 3] theta_sin = segs[0, 4] theta_cos = segs[0, 5] theta = np.arctan2(theta_sin, theta_cos) return np.array([cx, cy, w, h, theta]) # find the best straight line fitting all center points: y = kx + b cxs = segs[:, 0] cys = segs[:, 1] theta_coss = segs[:, 4] theta_sins = segs[:, 5] bar_theta = np.arctan2(theta_sins.sum(), theta_coss.sum()) k = np.tan(bar_theta) b = np.mean(cys - k * cxs) proj_xs = (k * cys + cxs - k * b) / (k**2 + 1) proj_ys = (k * k * cys + k * cxs + b) / (k**2 + 1) proj_points = np.stack((proj_xs, proj_ys), -1) # find the max distance max_dist = -1 idx1 = -1 idx2 = -1 for i in range(len(proj_points)): point1 = proj_points[i, :] for j in range(i + 1, len(proj_points)): point2 = proj_points[j, :] dist = np.sqrt(np.sum((point1 - point2)**2)) if dist > max_dist: idx1 = i idx2 = j max_dist = dist assert idx1 >= 0 and idx2 >= 0 # the bbox: bcx, bcy, bw, bh, average_theta seg1 = segs[idx1, :] seg2 = segs[idx2, :] bcx, bcy = (seg1[:2] + seg2[:2]) / 2.0 bh = np.mean(segs[:, 3]) bw = max_dist + (seg1[2] + seg2[2]) / 2.0 return bcx, bcy, bw, bh, bar_theta def combine_segments_batch(segments_batch, group_indices_batch, segment_counts_batch): batch_size = 1 combined_rboxes_batch = [] combined_counts_batch = [] for image_id in range(batch_size): group_count = segment_counts_batch[image_id] segments = segments_batch[image_id, :, :] group_indices = group_indices_batch[image_id, :] combined_rboxes = [] for i in range(group_count): segments_group = segments[np.where(group_indices == i)[0], :] if segments_group.shape[0] > 0: combined_rbox = combine_segs(segments_group) combined_rboxes.append(combined_rbox) combined_rboxes_batch.append(combined_rboxes) combined_counts_batch.append(len(combined_rboxes)) max_count = np.max(combined_counts_batch) for image_id in range(batch_size): if not combined_counts_batch[image_id] == max_count: combined_rboxes_pad = (max_count - combined_counts_batch[image_id] ) * [RBOX_DIM * [0.0]] combined_rboxes_batch[image_id] = np.vstack( (combined_rboxes_batch[image_id], np.array(combined_rboxes_pad))) return np.asarray(combined_rboxes_batch, np.float32), np.asarray(combined_counts_batch, np.int32) # combine_segments rewrite in python version def combine_segments_python(segments, group_indices, segment_counts): combined_rboxes, combined_counts = tf.py_func( combine_segments_batch, [segments, group_indices, segment_counts], [tf.float32, tf.int32]) return combined_rboxes, combined_counts # decode_segments_links rewrite in python version def get_coord(offsets, map_size, offsets_defaults): if offsets < offsets_defaults[1][0]: l_idx = 0 x = offsets % map_size[0][1] y = offsets // map_size[0][1] elif offsets < offsets_defaults[2][0]: l_idx = 1 x = (offsets - offsets_defaults[1][0]) % map_size[1][1] y = (offsets - offsets_defaults[1][0]) // map_size[1][1] elif offsets < offsets_defaults[3][0]: l_idx = 2 x = (offsets - offsets_defaults[2][0]) % map_size[2][1] y = (offsets - offsets_defaults[2][0]) // map_size[2][1] elif offsets < offsets_defaults[4][0]: l_idx = 3 x = (offsets - offsets_defaults[3][0]) % map_size[3][1] y = (offsets - offsets_defaults[3][0]) // map_size[3][1] elif offsets < offsets_defaults[5][0]: l_idx = 4 x = (offsets - offsets_defaults[4][0]) % map_size[4][1] y = (offsets - offsets_defaults[4][0]) // map_size[4][1] else: l_idx = 5 x = (offsets - offsets_defaults[5][0]) % map_size[5][1] y = (offsets - offsets_defaults[5][0]) // map_size[5][1] return l_idx, x, y def get_coord_link(offsets, map_size, offsets_defaults): if offsets < offsets_defaults[1][1]: offsets_node = offsets // N_LOCAL_LINKS link_idx = offsets % N_LOCAL_LINKS else: offsets_node = (offsets - offsets_defaults[1][1]) // ( N_LOCAL_LINKS + N_CROSS_LINKS) + offsets_defaults[1][0] link_idx = (offsets - offsets_defaults[1][1]) % ( N_LOCAL_LINKS + N_CROSS_LINKS) l_idx, x, y = get_coord(offsets_node, map_size, offsets_defaults) return l_idx, x, y, link_idx def is_valid_coord(l_idx, x, y, map_size): w = map_size[l_idx][1] h = map_size[l_idx][0] return x >= 0 and x < w and y >= 0 and y < h def get_neighbours(l_idx, x, y, map_size, offsets_defaults): if l_idx == 0: coord = [(0, x - 1, y - 1), (0, x, y - 1), (0, x + 1, y - 1), (0, x - 1, y), (0, x + 1, y), (0, x - 1, y + 1), (0, x, y + 1), (0, x + 1, y + 1)] else: coord = [(l_idx, x - 1, y - 1), (l_idx, x, y - 1), (l_idx, x + 1, y - 1), (l_idx, x - 1, y), (l_idx, x + 1, y), (l_idx, x - 1, y + 1), (l_idx, x, y + 1), (l_idx, x + 1, y + 1), (l_idx - 1, 2 * x, 2 * y), (l_idx - 1, 2 * x + 1, 2 * y), (l_idx - 1, 2 * x, 2 * y + 1), (l_idx - 1, 2 * x + 1, 2 * y + 1)] neighbours_offsets = [] link_idx = 0 for nl_idx, nx, ny in coord: if is_valid_coord(nl_idx, nx, ny, map_size): neighbours_offset_node = offsets_defaults[nl_idx][ 0] + map_size[nl_idx][1] * ny + nx if l_idx == 0: neighbours_offset_link = offsets_defaults[l_idx][1] + ( map_size[l_idx][1] * y + x) * N_LOCAL_LINKS + link_idx else: off_tmp = (map_size[l_idx][1] * y + x) * ( N_LOCAL_LINKS + N_CROSS_LINKS) neighbours_offset_link = offsets_defaults[l_idx][ 1] + off_tmp + link_idx neighbours_offsets.append( [neighbours_offset_node, neighbours_offset_link, link_idx]) link_idx += 1 # [node_offsets, link_offsets, link_idx(0-7/11)] return neighbours_offsets def decode_segments_links_python(image_size, all_nodes, all_links, all_reg, anchor_sizes): batch_size = 1 # FLAGS.test_batch_size # offsets = 12285 #768 all_nodes_flat = tf.concat( [tf.reshape(o, [batch_size, -1, N_SEG_CLASSES]) for o in all_nodes], axis=1) all_links_flat = tf.concat( [tf.reshape(o, [batch_size, -1, N_LNK_CLASSES]) for o in all_links], axis=1) all_reg_flat = tf.concat( [tf.reshape(o, [batch_size, -1, OFFSET_DIM]) for o in all_reg], axis=1) segments, group_indices, segment_counts, group_indices_all = tf.py_func( decode_batch, [ all_nodes_flat, all_links_flat, all_reg_flat, image_size, tf.constant(anchor_sizes) ], [tf.float32, tf.int32, tf.int32, tf.int32]) return segments, group_indices, segment_counts, group_indices_all def decode_segments_links_train(image_size, all_nodes, all_links, all_reg, anchor_sizes): batch_size = FLAGS.train_batch_size # offsets = 12285 #768 all_nodes_flat = tf.concat( [tf.reshape(o, [batch_size, -1, N_SEG_CLASSES]) for o in all_nodes], axis=1) all_links_flat = tf.concat( [tf.reshape(o, [batch_size, -1, N_LNK_CLASSES]) for o in all_links], axis=1) all_reg_flat = tf.concat( [tf.reshape(o, [batch_size, -1, OFFSET_DIM]) for o in all_reg], axis=1) segments, group_indices, segment_counts, group_indices_all = tf.py_func( decode_batch, [ all_nodes_flat, all_links_flat, all_reg_flat, image_size, tf.constant(anchor_sizes) ], [tf.float32, tf.int32, tf.int32, tf.int32]) return segments, group_indices, segment_counts, group_indices_all def decode_batch(all_nodes, all_links, all_reg, image_size, anchor_sizes): batch_size = all_nodes.shape[0] batch_segments = [] batch_group_indices = [] batch_segments_counts = [] batch_group_indices_all = [] for image_id in range(batch_size): image_node_scores = all_nodes[image_id, :, :] image_link_scores = all_links[image_id, :, :] image_reg = all_reg[image_id, :, :] image_segments, image_group_indices, image_segments_counts, image_group_indices_all = decode_image( image_node_scores, image_link_scores, image_reg, image_size, anchor_sizes) batch_segments.append(image_segments) batch_group_indices.append(image_group_indices) batch_segments_counts.append(image_segments_counts) batch_group_indices_all.append(image_group_indices_all) max_count = np.max(batch_segments_counts) for image_id in range(batch_size): if not batch_segments_counts[image_id] == max_count: batch_segments_pad = (max_count - batch_segments_counts[image_id] ) * [OFFSET_DIM * [0.0]] batch_segments[image_id] = np.vstack( (batch_segments[image_id], np.array(batch_segments_pad))) batch_group_indices[image_id] = np.hstack( (batch_group_indices[image_id], np.array( (max_count - batch_segments_counts[image_id]) * [-1]))) return np.asarray(batch_segments, np.float32), np.asarray( batch_group_indices, np.int32), np.asarray(batch_segments_counts, np.int32), np.asarray(batch_group_indices_all, np.int32) def decode_image(image_node_scores, image_link_scores, image_reg, image_size, anchor_sizes): map_size = [] offsets_defaults = [] offsets_default_node = 0 offsets_default_link = 0 for i in range(N_DET_LAYERS): offsets_defaults.append([offsets_default_node, offsets_default_link]) map_size.append(image_size // (2**(2 + i))) offsets_default_node += map_size[i][0] * map_size[i][1] if i == 0: offsets_default_link += map_size[i][0] * map_size[i][ 1] * N_LOCAL_LINKS else: offsets_default_link += map_size[i][0] * map_size[i][1] * ( N_LOCAL_LINKS + N_CROSS_LINKS) image_group_indices_all = decode_image_by_join(image_node_scores, image_link_scores, FLAGS.node_threshold, FLAGS.link_threshold, map_size, offsets_defaults) image_group_indices_all -= 1 image_group_indices = image_group_indices_all[np.where( image_group_indices_all >= 0)[0]] image_segments_counts = len(image_group_indices) # convert image_reg to segments with scores(OFFSET_DIM+1) image_segments = np.zeros((image_segments_counts, OFFSET_DIM), dtype=np.float32) for i, offsets in enumerate(np.where(image_group_indices_all >= 0)[0]): encoded_cx = image_reg[offsets, 0] encoded_cy = image_reg[offsets, 1] encoded_width = image_reg[offsets, 2] encoded_height = image_reg[offsets, 3] encoded_theta_cos = image_reg[offsets, 4] encoded_theta_sin = image_reg[offsets, 5] l_idx, x, y = get_coord(offsets, map_size, offsets_defaults) rs = anchor_sizes[l_idx] eps = 1e-6 image_segments[i, 0] = encoded_cx * rs + (2**(2 + l_idx)) * (x + 0.5) image_segments[i, 1] = encoded_cy * rs + (2**(2 + l_idx)) * (y + 0.5) image_segments[i, 2] = np.exp(encoded_width) * rs - eps image_segments[i, 3] = np.exp(encoded_height) * rs - eps image_segments[i, 4] = encoded_theta_cos image_segments[i, 5] = encoded_theta_sin return image_segments, image_group_indices, image_segments_counts, image_group_indices_all def decode_image_by_join(node_scores, link_scores, node_threshold, link_threshold, map_size, offsets_defaults): node_mask = node_scores[:, POS_LABEL] >= node_threshold link_mask = link_scores[:, POS_LABEL] >= link_threshold group_mask = np.zeros_like(node_mask, np.int32) - 1 offsets_pos = np.where(node_mask == 1)[0] def find_parent(point): return group_mask[point] def set_parent(point, parent): group_mask[point] = parent def is_root(point): return find_parent(point) == -1 def find_root(point): root = point update_parent = False while not is_root(root): root = find_parent(root) update_parent = True # for acceleration of find_root if update_parent: set_parent(point, root) return root def join(p1, p2): root1 = find_root(p1) root2 = find_root(p2) if root1 != root2: set_parent(root1, root2) def get_all(): root_map = {} def get_index(root): if root not in root_map: root_map[root] = len(root_map) + 1 return root_map[root] mask = np.zeros_like(node_mask, dtype=np.int32) for i, point in enumerate(offsets_pos): point_root = find_root(point) bbox_idx = get_index(point_root) mask[point] = bbox_idx return mask # join by link pos_link = 0 for i, offsets in enumerate(offsets_pos): l_idx, x, y = get_coord(offsets, map_size, offsets_defaults) neighbours = get_neighbours(l_idx, x, y, map_size, offsets_defaults) for n_idx, noffsets in enumerate(neighbours): link_value = link_mask[noffsets[1]] node_cls = node_mask[noffsets[0]] if link_value and node_cls: pos_link += 1 join(offsets, noffsets[0]) # print(pos_link) mask = get_all() return mask def get_link_mask(node_mask, offsets_defaults, link_max): link_mask = np.zeros_like(link_max) link_mask[0:offsets_defaults[1][1]] = np.tile( node_mask[0:offsets_defaults[1][0]], (N_LOCAL_LINKS, 1)).transpose().reshape(offsets_defaults[1][1]) link_mask[offsets_defaults[1][1]:offsets_defaults[2][1]] = np.tile( node_mask[offsets_defaults[1][0]:offsets_defaults[2][0]], (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape( (offsets_defaults[2][1] - offsets_defaults[1][1])) link_mask[offsets_defaults[2][1]:offsets_defaults[3][1]] = np.tile( node_mask[offsets_defaults[2][0]:offsets_defaults[3][0]], (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape( (offsets_defaults[3][1] - offsets_defaults[2][1])) link_mask[offsets_defaults[3][1]:offsets_defaults[4][1]] = np.tile( node_mask[offsets_defaults[3][0]:offsets_defaults[4][0]], (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape( (offsets_defaults[4][1] - offsets_defaults[3][1])) link_mask[offsets_defaults[4][1]:offsets_defaults[5][1]] = np.tile( node_mask[offsets_defaults[4][0]:offsets_defaults[5][0]], (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape( (offsets_defaults[5][1] - offsets_defaults[4][1])) link_mask[offsets_defaults[5][1]:] = np.tile( node_mask[offsets_defaults[5][0]:], (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape( (len(link_mask) - offsets_defaults[5][1])) return link_mask def get_link8(link_scores_raw, map_size): # link[i-1] -local- start -16- end -cross- link[i] link8_mask = np.zeros((link_scores_raw.shape[0])) for i in range(N_DET_LAYERS): if i == 0: offsets_start = map_size[i][0] * map_size[i][1] * N_LOCAL_LINKS offsets_end = map_size[i][0] * map_size[i][1] * ( N_LOCAL_LINKS + 16) offsets_link = map_size[i][0] * map_size[i][1] * ( N_LOCAL_LINKS + 16) link8_mask[:offsets_start] = 1 else: offsets_start = offsets_link + map_size[i][0] * map_size[i][ 1] * N_LOCAL_LINKS offsets_end = offsets_link + map_size[i][0] * map_size[i][1] * ( N_LOCAL_LINKS + 16) offsets_link_pre = offsets_link offsets_link += map_size[i][0] * map_size[i][1] * ( N_LOCAL_LINKS + 16 + N_CROSS_LINKS) link8_mask[offsets_link_pre:offsets_start] = 1 link8_mask[offsets_end:offsets_link] = 1 return link_scores_raw[np.where(link8_mask > 0)[0], :] def decode_image_by_mutex(node_scores, link_scores, node_threshold, link_threshold, map_size, offsets_defaults): node_mask = node_scores[:, POS_LABEL] >= node_threshold link_pos = link_scores[:, POS_LABEL] link_mut = link_scores[:, MUT_LABEL] link_max = np.max(np.vstack((link_pos, link_mut)), axis=0) offsets_pos_list = np.where(node_mask == 1)[0].tolist() link_mask_th = link_max >= link_threshold link_mask = get_link_mask(node_mask, offsets_defaults, link_max) offsets_link_max = np.argsort(-(link_max * link_mask * link_mask_th)) offsets_link_max = offsets_link_max[:len(offsets_pos_list) * 8] group_mask = np.zeros_like(node_mask, dtype=np.int32) - 1 mutex_mask = len(node_mask) * [[]] def find_parent(point): return group_mask[point] def set_parent(point, parent): group_mask[point] = parent def set_mutex_constraint(point, mutex_point_list): mutex_mask[point] = mutex_point_list def find_mutex_constraint(point): mutex_point_list = mutex_mask[point] # update mutex_point_list mutex_point_list_new = [] if not mutex_point_list == []: for mutex_point in mutex_point_list: if not is_root(mutex_point): mutex_point = find_root(mutex_point) if mutex_point not in mutex_point_list_new: mutex_point_list_new.append(mutex_point) set_mutex_constraint(point, mutex_point_list_new) return mutex_point_list_new def combine_mutex_constraint(point, parent): mutex_point_list = find_mutex_constraint(point) mutex_parent_list = find_mutex_constraint(parent) for mutex_point in mutex_point_list: if not is_root(mutex_point): mutex_point = find_root(mutex_point) if mutex_point not in mutex_parent_list: mutex_parent_list.append(mutex_point) set_mutex_constraint(parent, mutex_parent_list) def add_mutex_constraint(p1, p2): mutex_point_list1 = find_mutex_constraint(p1) mutex_point_list2 = find_mutex_constraint(p2) if p1 not in mutex_point_list2: mutex_point_list2.append(p1) if p2 not in mutex_point_list1: mutex_point_list1.append(p2) set_mutex_constraint(p1, mutex_point_list1) set_mutex_constraint(p2, mutex_point_list2) def is_root(point): return find_parent(point) == -1 def find_root(point): root = point update_parent = False while not is_root(root): root = find_parent(root) update_parent = True # for acceleration of find_root if update_parent: set_parent(point, root) return root def join(p1, p2): root1 = find_root(p1) root2 = find_root(p2) if root1 != root2 and (root1 not in find_mutex_constraint(root2)): set_parent(root1, root2) combine_mutex_constraint(root1, root2) def disjoin(p1, p2): root1 = find_root(p1) root2 = find_root(p2) if root1 != root2: add_mutex_constraint(root1, root2) def get_all(): root_map = {} def get_index(root): if root not in root_map: root_map[root] = len(root_map) + 1 return root_map[root] mask = np.zeros_like(node_mask, dtype=np.int32) for _, point in enumerate(offsets_pos_list): point_root = find_root(point) bbox_idx = get_index(point_root) mask[point] = bbox_idx return mask # join by link pos_link = 0 mut_link = 0 for _, offsets_link in enumerate(offsets_link_max): l_idx, x, y, link_idx = get_coord_link(offsets_link, map_size, offsets_defaults) offsets = offsets_defaults[l_idx][0] + map_size[l_idx][1] * y + x if offsets in offsets_pos_list: neighbours = get_neighbours(l_idx, x, y, map_size, offsets_defaults) if not len(np.where(np.array(neighbours)[:, 2] == link_idx)[0]) == 0: noffsets = neighbours[np.where( np.array(neighbours)[:, 2] == link_idx)[0][0]] link_pos_value = link_pos[noffsets[1]] link_mut_value = link_mut[noffsets[1]] node_cls = node_mask[noffsets[0]] if node_cls and (link_pos_value > link_mut_value): pos_link += 1 join(offsets, noffsets[0]) elif node_cls and (link_pos_value < link_mut_value): mut_link += 1 disjoin(offsets, noffsets[0]) mask = get_all() return mask