| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108 |
- # Part of the implementation is borrowed and modified from SegLink,
- # publicly available at https://github.com/bgshih/seglink
- import math
- import os
- import shutil
- import sys
- import uuid
- import absl.flags as absl_flags
- import cv2
- import numpy as np
- import tensorflow as tf
- from . import utils
- if tf.__version__ >= '2.0':
- tf = tf.compat.v1
- # test
- # skip parse sys.argv in tf, so fix bug:
- # absl.flags._exceptions.UnrecognizedFlagError:
- # Unknown command line flag 'OCRDetectionPipeline: Unknown command line flag
- absl_flags.FLAGS(sys.argv, known_only=True)
- FLAGS = tf.app.flags.FLAGS
- tf.app.flags.DEFINE_string('weight_init_method', 'xavier',
- 'Weight initialization method')
- # constants
- OFFSET_DIM = 6
- RBOX_DIM = 5
- N_LOCAL_LINKS = 8
- N_CROSS_LINKS = 4
- N_SEG_CLASSES = 2
- N_LNK_CLASSES = 4
- MATCH_STATUS_POS = 1
- MATCH_STATUS_NEG = -1
- MATCH_STATUS_IGNORE = 0
- MUT_LABEL = 3
- POS_LABEL = 1
- NEG_LABEL = 0
- N_DET_LAYERS = 6
- def load_oplib(lib_name):
- """
- Load TensorFlow operator library.
- """
- # use absolute path so that ops.py can be called from other directory
- lib_path = os.path.join(
- os.path.dirname(os.path.realpath(__file__)),
- 'lib{0}.so'.format(lib_name))
- # duplicate library with a random new name so that
- # a running program will not be interrupted when the original library is updated
- lib_copy_path = '/tmp/lib{0}_{1}.so'.format(
- str(uuid.uuid4())[:8], LIB_NAME)
- shutil.copyfile(lib_path, lib_copy_path)
- oplib = tf.load_op_library(lib_copy_path)
- return oplib
- def _nn_variable(name, shape, init_method, collection=None, **kwargs):
- """
- Create or reuse a variable
- ARGS
- name: variable name
- shape: variable shape
- init_method: 'zero', 'kaiming', 'xavier', or (mean, std)
- collection: if not none, add variable to this collection
- kwargs: extra parameters passed to tf.get_variable
- RETURN
- var: a new or existing variable
- """
- if init_method == 'zero':
- initializer = tf.constant_initializer(0.0)
- elif init_method == 'kaiming':
- if len(shape) == 4: # convolutional filters
- kh, kw, n_in = shape[:3]
- init_std = math.sqrt(2.0 / (kh * kw * n_in))
- elif len(shape) == 2: # linear weights
- n_in, n_out = shape
- init_std = math.sqrt(1.0 / n_out)
- else:
- raise 'Unsupported shape'
- initializer = tf.truncated_normal_initializer(0.0, init_std)
- elif init_method == 'xavier':
- if len(shape) == 4:
- initializer = tf.keras.initializers.glorot_normal()
- else:
- initializer = tf.keras.initializers.glorot_normal()
- elif isinstance(init_method, tuple):
- assert (len(init_method) == 2)
- initializer = tf.truncated_normal_initializer(init_method[0],
- init_method[1])
- else:
- raise 'Unsupported weight initialization method: ' + init_method
- var = tf.get_variable(name, shape=shape, initializer=initializer)
- if collection is not None:
- tf.add_to_collection(collection, var)
- return var
- def conv2d(x,
- n_in,
- n_out,
- ksize,
- stride=1,
- padding='SAME',
- weight_init=None,
- bias=True,
- relu=False,
- scope=None,
- **kwargs):
- weight_init = weight_init or FLAGS.weight_init_method
- trainable = kwargs.get('trainable', True)
- # input_dim = n_in
- if (padding == 'SAME'):
- in_height = x.get_shape()[1]
- in_width = x.get_shape()[2]
- if (in_height % stride == 0):
- pad_along_height = max(ksize - stride, 0)
- else:
- pad_along_height = max(ksize - (in_height % stride), 0)
- if (in_width % stride == 0):
- pad_along_width = max(ksize - stride, 0)
- else:
- pad_along_width = max(ksize - (in_width % stride), 0)
- pad_bottom = pad_along_height // 2
- pad_top = pad_along_height - pad_bottom
- pad_right = pad_along_width // 2
- pad_left = pad_along_width - pad_right
- paddings = tf.constant([[0, 0], [pad_top, pad_bottom],
- [pad_left, pad_right], [0, 0]])
- input_padded = tf.pad(x, paddings, 'CONSTANT')
- else:
- input_padded = x
- with tf.variable_scope(scope or 'conv2d'):
- # convolution
- kernel = _nn_variable(
- 'weight', [ksize, ksize, n_in, n_out],
- weight_init,
- collection='weights' if trainable else None,
- **kwargs)
- yc = tf.nn.conv2d(
- input_padded, kernel, [1, stride, stride, 1], padding='VALID')
- # add bias
- if bias is True:
- bias = _nn_variable(
- 'bias', [n_out],
- 'zero',
- collection='biases' if trainable else None,
- **kwargs)
- yb = tf.nn.bias_add(yc, bias)
- # apply ReLU
- y = yb
- if relu is True:
- y = tf.nn.relu(yb)
- return yb, y
- def group_conv2d_relu(x,
- n_in,
- n_out,
- ksize,
- stride=1,
- group=4,
- padding='SAME',
- weight_init=None,
- bias=True,
- relu=False,
- name='group_conv2d',
- **kwargs):
- group_axis = len(x.get_shape()) - 1
- splits = tf.split(x, [int(n_in / group)] * group, group_axis)
- conv_list = []
- for i in range(group):
- conv_split, relu_split = conv2d(
- splits[i],
- n_in / group,
- n_out / group,
- ksize=ksize,
- stride=stride,
- padding=padding,
- weight_init=weight_init,
- bias=bias,
- relu=relu,
- scope='%s_%d' % (name, i))
- conv_list.append(conv_split)
- conv = tf.concat(values=conv_list, axis=group_axis, name=name + '_concat')
- relu = tf.nn.relu(conv)
- return conv, relu
- def group_conv2d_bn_relu(x,
- n_in,
- n_out,
- ksize,
- stride=1,
- group=4,
- padding='SAME',
- weight_init=None,
- bias=True,
- relu=False,
- name='group_conv2d',
- **kwargs):
- group_axis = len(x.get_shape()) - 1
- splits = tf.split(x, [int(n_in / group)] * group, group_axis)
- conv_list = []
- for i in range(group):
- conv_split, relu_split = conv2d(
- splits[i],
- n_in / group,
- n_out / group,
- ksize=ksize,
- stride=stride,
- padding=padding,
- weight_init=weight_init,
- bias=bias,
- relu=relu,
- scope='%s_%d' % (name, i))
- conv_list.append(conv_split)
- conv = tf.concat(values=conv_list, axis=group_axis, name=name + '_concat')
- with tf.variable_scope(name + '_bn'):
- bn = tf.layers.batch_normalization(
- conv, momentum=0.9, epsilon=1e-5, scale=True, training=True)
- relu = tf.nn.relu(bn)
- return conv, relu
- def next_conv(x,
- n_in,
- n_out,
- ksize,
- stride=1,
- group=4,
- padding='SAME',
- weight_init=None,
- bias=True,
- relu=False,
- name='next_conv2d',
- **kwargs):
- conv_a, relu_a = conv_relu(
- x,
- n_in,
- n_in / 2,
- ksize=1,
- stride=1,
- padding=padding,
- weight_init=weight_init,
- bias=bias,
- relu=relu,
- scope=name + '_a',
- **kwargs)
- conv_b, relu_b = group_conv2d_relu(
- relu_a,
- n_in / 2,
- n_out / 2,
- ksize=ksize,
- stride=stride,
- group=group,
- padding=padding,
- weight_init=weight_init,
- bias=bias,
- relu=relu,
- name=name + '_b',
- **kwargs)
- conv_c, relu_c = conv_relu(
- relu_b,
- n_out / 2,
- n_out,
- ksize=1,
- stride=1,
- padding=padding,
- weight_init=weight_init,
- bias=bias,
- relu=relu,
- scope=name + '_c',
- **kwargs)
- return conv_c, relu_c
- def next_conv_bn(x,
- n_in,
- n_out,
- ksize,
- stride=1,
- group=4,
- padding='SAME',
- weight_init=None,
- bias=True,
- relu=False,
- name='next_conv2d',
- **kwargs):
- conv_a, relu_a = conv_bn_relu(
- x,
- n_in,
- n_in / 2,
- ksize=1,
- stride=1,
- padding=padding,
- weight_init=weight_init,
- bias=bias,
- relu=relu,
- scope=name + '_a',
- **kwargs)
- conv_b, relu_b = group_conv2d_bn_relu(
- relu_a,
- n_in / 2,
- n_out / 2,
- ksize=ksize,
- stride=stride,
- group=group,
- padding=padding,
- weight_init=weight_init,
- bias=bias,
- relu=relu,
- name=name + '_b',
- **kwargs)
- conv_c, relu_c = conv_bn_relu(
- relu_b,
- n_out / 2,
- n_out,
- ksize=1,
- stride=1,
- padding=padding,
- weight_init=weight_init,
- bias=bias,
- relu=relu,
- scope=name + '_c',
- **kwargs)
- return conv_c, relu_c
- def conv2d_ori(x,
- n_in,
- n_out,
- ksize,
- stride=1,
- padding='SAME',
- weight_init=None,
- bias=True,
- relu=False,
- scope=None,
- **kwargs):
- weight_init = weight_init or FLAGS.weight_init_method
- trainable = kwargs.get('trainable', True)
- with tf.variable_scope(scope or 'conv2d'):
- # convolution
- kernel = _nn_variable(
- 'weight', [ksize, ksize, n_in, n_out],
- weight_init,
- collection='weights' if trainable else None,
- **kwargs)
- y = tf.nn.conv2d(x, kernel, [1, stride, stride, 1], padding=padding)
- # add bias
- if bias is True:
- bias = _nn_variable(
- 'bias', [n_out],
- 'zero',
- collection='biases' if trainable else None,
- **kwargs)
- y = tf.nn.bias_add(y, bias)
- # apply ReLU
- if relu is True:
- y = tf.nn.relu(y)
- return y
- def conv_relu(*args, **kwargs):
- kwargs['relu'] = True
- if 'scope' not in kwargs:
- kwargs['scope'] = 'conv_relu'
- return conv2d(*args, **kwargs)
- def conv_bn_relu(*args, **kwargs):
- kwargs['relu'] = True
- if 'scope' not in kwargs:
- kwargs['scope'] = 'conv_relu'
- conv, relu = conv2d(*args, **kwargs)
- with tf.variable_scope(kwargs['scope'] + '_bn'):
- bn = tf.layers.batch_normalization(
- conv, momentum=0.9, epsilon=1e-5, scale=True, training=True)
- bn_relu = tf.nn.relu(bn)
- return bn, bn_relu
- def conv_relu_ori(*args, **kwargs):
- kwargs['relu'] = True
- if 'scope' not in kwargs:
- kwargs['scope'] = 'conv_relu'
- return conv2d_ori(*args, **kwargs)
- def atrous_conv2d(x,
- n_in,
- n_out,
- ksize,
- dilation,
- padding='SAME',
- weight_init=None,
- bias=True,
- relu=False,
- scope=None,
- **kwargs):
- weight_init = weight_init or FLAGS.weight_init_method
- trainable = kwargs.get('trainable', True)
- with tf.variable_scope(scope or 'atrous_conv2d'):
- # atrous convolution
- kernel = _nn_variable(
- 'weight', [ksize, ksize, n_in, n_out],
- weight_init,
- collection='weights' if trainable else None,
- **kwargs)
- y = tf.nn.atrous_conv2d(x, kernel, dilation, padding=padding)
- # add bias
- if bias is True:
- bias = _nn_variable(
- 'bias', [n_out],
- 'zero',
- collection='biases' if trainable else None,
- **kwargs)
- y = tf.nn.bias_add(y, bias)
- # apply ReLU
- if relu is True:
- y = tf.nn.relu(y)
- return y
- def avg_pool(x, ksize, stride, padding='SAME', scope=None):
- with tf.variable_scope(scope or 'avg_pool'):
- y = tf.nn.avg_pool(x, [1, ksize, ksize, 1], [1, stride, stride, 1],
- padding)
- return y
- def max_pool(x, ksize, stride, padding='SAME', scope=None):
- with tf.variable_scope(scope or 'max_pool'):
- y = tf.nn.max_pool(x, [1, ksize, ksize, 1], [1, stride, stride, 1],
- padding)
- return y
- def score_loss(gt_labels, match_scores, n_classes):
- """
- Classification loss
- ARGS
- gt_labels: int32 [n]
- match_scores: [n, n_classes]
- RETURN
- loss
- """
- embeddings = tf.one_hot(tf.cast(gt_labels, tf.int64), n_classes, 1.0, 0.0)
- losses = tf.nn.softmax_cross_entropy_with_logits(match_scores, embeddings)
- return tf.reduce_sum(losses)
- def smooth_l1_loss(offsets, gt_offsets, scope=None):
- """
- Smooth L1 loss between offsets and encoded_gt
- ARGS
- offsets: [m?, 5], predicted offsets for one example
- gt_offsets: [m?, 5], corresponding groundtruth offsets
- RETURN
- loss: scalar
- """
- with tf.variable_scope(scope or 'smooth_l1_loss'):
- gt_offsets = tf.stop_gradient(gt_offsets)
- diff = tf.abs(offsets - gt_offsets)
- lesser_mask = tf.cast(tf.less(diff, 1.0), tf.float32)
- larger_mask = 1.0 - lesser_mask
- losses1 = (0.5 * tf.square(diff)) * lesser_mask
- losses2 = (diff - 0.5) * larger_mask
- return tf.reduce_sum(losses1 + losses2, 1)
- def polygon_to_rboxe(polygon):
- x1 = polygon[0]
- y1 = polygon[1]
- x2 = polygon[2]
- y2 = polygon[3]
- x3 = polygon[4]
- y3 = polygon[5]
- x4 = polygon[6]
- y4 = polygon[7]
- c_x = (x1 + x2 + x3 + x4) / 4
- c_y = (y1 + y2 + y3 + y4) / 4
- w1 = point_dist(x1, y1, x2, y2)
- w2 = point_dist(x3, y3, x4, y4)
- h1 = point_line_dist(c_x, c_y, x1, y1, x2, y2)
- h2 = point_line_dist(c_x, c_y, x3, y3, x4, y4)
- h = h1 + h2
- w = (w1 + w2) / 2
- theta1 = np.arctan2(y2 - y1, x2 - x1)
- theta2 = np.arctan2(y3 - y4, x3 - x4)
- theta = (theta1 + theta2) / 2
- return np.array([c_x, c_y, w, h, theta])
- def point_dist(x1, y1, x2, y2):
- return np.sqrt((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1))
- def point_line_dist(px, py, x1, y1, x2, y2):
- eps = 1e-6
- dx = x2 - x1
- dy = y2 - y1
- div = np.sqrt(dx * dx + dy * dy) + eps
- dist = np.abs(px * dy - py * dx + x2 * y1 - y2 * x1) / div
- return dist
- def get_combined_polygon(rboxes, resize_size):
- image_w = resize_size[1]
- image_h = resize_size[0]
- img = np.zeros((image_h, image_w, 3), np.uint8)
- for i in range(rboxes.shape[0]):
- segment = np.reshape(
- np.array(utils.rboxes_to_polygons(rboxes)[i, :], np.int32),
- (-1, 1, 2))
- cv2.drawContours(img, [segment], 0, (255, 255, 255), -1)
- img2gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
- ret, thresh = cv2.threshold(img2gray, 127, 255, cv2.THRESH_BINARY)
- im2, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE,
- cv2.CHAIN_APPROX_SIMPLE)
- if len(contours) > 0:
- cnt = contours[0]
- max_area = cv2.contourArea(cnt)
- # get max_area
- for cont in contours:
- if cv2.contourArea(cont) > max_area:
- cnt = cont
- max_area = cv2.contourArea(cont)
- rect = cv2.minAreaRect(cnt)
- combined_polygon = np.array(cv2.boxPoints(rect)).reshape(-1)
- else:
- combined_polygon = np.array([0, 0, 0, 0, 0, 0, 0, 0])
- return combined_polygon
- def combine_segs(segs):
- segs = np.asarray(segs)
- assert segs.ndim == 2, 'invalid segs ndim'
- assert segs.shape[-1] == 6, 'invalid segs shape'
- if len(segs) == 1:
- cx = segs[0, 0]
- cy = segs[0, 1]
- w = segs[0, 2]
- h = segs[0, 3]
- theta_sin = segs[0, 4]
- theta_cos = segs[0, 5]
- theta = np.arctan2(theta_sin, theta_cos)
- return np.array([cx, cy, w, h, theta])
- # find the best straight line fitting all center points: y = kx + b
- cxs = segs[:, 0]
- cys = segs[:, 1]
- theta_coss = segs[:, 4]
- theta_sins = segs[:, 5]
- bar_theta = np.arctan2(theta_sins.sum(), theta_coss.sum())
- k = np.tan(bar_theta)
- b = np.mean(cys - k * cxs)
- proj_xs = (k * cys + cxs - k * b) / (k**2 + 1)
- proj_ys = (k * k * cys + k * cxs + b) / (k**2 + 1)
- proj_points = np.stack((proj_xs, proj_ys), -1)
- # find the max distance
- max_dist = -1
- idx1 = -1
- idx2 = -1
- for i in range(len(proj_points)):
- point1 = proj_points[i, :]
- for j in range(i + 1, len(proj_points)):
- point2 = proj_points[j, :]
- dist = np.sqrt(np.sum((point1 - point2)**2))
- if dist > max_dist:
- idx1 = i
- idx2 = j
- max_dist = dist
- assert idx1 >= 0 and idx2 >= 0
- # the bbox: bcx, bcy, bw, bh, average_theta
- seg1 = segs[idx1, :]
- seg2 = segs[idx2, :]
- bcx, bcy = (seg1[:2] + seg2[:2]) / 2.0
- bh = np.mean(segs[:, 3])
- bw = max_dist + (seg1[2] + seg2[2]) / 2.0
- return bcx, bcy, bw, bh, bar_theta
- def combine_segments_batch(segments_batch, group_indices_batch,
- segment_counts_batch):
- batch_size = 1
- combined_rboxes_batch = []
- combined_counts_batch = []
- for image_id in range(batch_size):
- group_count = segment_counts_batch[image_id]
- segments = segments_batch[image_id, :, :]
- group_indices = group_indices_batch[image_id, :]
- combined_rboxes = []
- for i in range(group_count):
- segments_group = segments[np.where(group_indices == i)[0], :]
- if segments_group.shape[0] > 0:
- combined_rbox = combine_segs(segments_group)
- combined_rboxes.append(combined_rbox)
- combined_rboxes_batch.append(combined_rboxes)
- combined_counts_batch.append(len(combined_rboxes))
- max_count = np.max(combined_counts_batch)
- for image_id in range(batch_size):
- if not combined_counts_batch[image_id] == max_count:
- combined_rboxes_pad = (max_count - combined_counts_batch[image_id]
- ) * [RBOX_DIM * [0.0]]
- combined_rboxes_batch[image_id] = np.vstack(
- (combined_rboxes_batch[image_id],
- np.array(combined_rboxes_pad)))
- return np.asarray(combined_rboxes_batch,
- np.float32), np.asarray(combined_counts_batch, np.int32)
- # combine_segments rewrite in python version
- def combine_segments_python(segments, group_indices, segment_counts):
- combined_rboxes, combined_counts = tf.py_func(
- combine_segments_batch, [segments, group_indices, segment_counts],
- [tf.float32, tf.int32])
- return combined_rboxes, combined_counts
- # decode_segments_links rewrite in python version
- def get_coord(offsets, map_size, offsets_defaults):
- if offsets < offsets_defaults[1][0]:
- l_idx = 0
- x = offsets % map_size[0][1]
- y = offsets // map_size[0][1]
- elif offsets < offsets_defaults[2][0]:
- l_idx = 1
- x = (offsets - offsets_defaults[1][0]) % map_size[1][1]
- y = (offsets - offsets_defaults[1][0]) // map_size[1][1]
- elif offsets < offsets_defaults[3][0]:
- l_idx = 2
- x = (offsets - offsets_defaults[2][0]) % map_size[2][1]
- y = (offsets - offsets_defaults[2][0]) // map_size[2][1]
- elif offsets < offsets_defaults[4][0]:
- l_idx = 3
- x = (offsets - offsets_defaults[3][0]) % map_size[3][1]
- y = (offsets - offsets_defaults[3][0]) // map_size[3][1]
- elif offsets < offsets_defaults[5][0]:
- l_idx = 4
- x = (offsets - offsets_defaults[4][0]) % map_size[4][1]
- y = (offsets - offsets_defaults[4][0]) // map_size[4][1]
- else:
- l_idx = 5
- x = (offsets - offsets_defaults[5][0]) % map_size[5][1]
- y = (offsets - offsets_defaults[5][0]) // map_size[5][1]
- return l_idx, x, y
- def get_coord_link(offsets, map_size, offsets_defaults):
- if offsets < offsets_defaults[1][1]:
- offsets_node = offsets // N_LOCAL_LINKS
- link_idx = offsets % N_LOCAL_LINKS
- else:
- offsets_node = (offsets - offsets_defaults[1][1]) // (
- N_LOCAL_LINKS + N_CROSS_LINKS) + offsets_defaults[1][0]
- link_idx = (offsets - offsets_defaults[1][1]) % (
- N_LOCAL_LINKS + N_CROSS_LINKS)
- l_idx, x, y = get_coord(offsets_node, map_size, offsets_defaults)
- return l_idx, x, y, link_idx
- def is_valid_coord(l_idx, x, y, map_size):
- w = map_size[l_idx][1]
- h = map_size[l_idx][0]
- return x >= 0 and x < w and y >= 0 and y < h
- def get_neighbours(l_idx, x, y, map_size, offsets_defaults):
- if l_idx == 0:
- coord = [(0, x - 1, y - 1), (0, x, y - 1), (0, x + 1, y - 1),
- (0, x - 1, y), (0, x + 1, y), (0, x - 1, y + 1),
- (0, x, y + 1), (0, x + 1, y + 1)]
- else:
- coord = [(l_idx, x - 1, y - 1),
- (l_idx, x, y - 1), (l_idx, x + 1, y - 1), (l_idx, x - 1, y),
- (l_idx, x + 1, y), (l_idx, x - 1, y + 1), (l_idx, x, y + 1),
- (l_idx, x + 1, y + 1), (l_idx - 1, 2 * x, 2 * y),
- (l_idx - 1, 2 * x + 1, 2 * y), (l_idx - 1, 2 * x, 2 * y + 1),
- (l_idx - 1, 2 * x + 1, 2 * y + 1)]
- neighbours_offsets = []
- link_idx = 0
- for nl_idx, nx, ny in coord:
- if is_valid_coord(nl_idx, nx, ny, map_size):
- neighbours_offset_node = offsets_defaults[nl_idx][
- 0] + map_size[nl_idx][1] * ny + nx
- if l_idx == 0:
- neighbours_offset_link = offsets_defaults[l_idx][1] + (
- map_size[l_idx][1] * y + x) * N_LOCAL_LINKS + link_idx
- else:
- off_tmp = (map_size[l_idx][1] * y + x) * (
- N_LOCAL_LINKS + N_CROSS_LINKS)
- neighbours_offset_link = offsets_defaults[l_idx][
- 1] + off_tmp + link_idx
- neighbours_offsets.append(
- [neighbours_offset_node, neighbours_offset_link, link_idx])
- link_idx += 1
- # [node_offsets, link_offsets, link_idx(0-7/11)]
- return neighbours_offsets
- def decode_segments_links_python(image_size, all_nodes, all_links, all_reg,
- anchor_sizes):
- batch_size = 1 # FLAGS.test_batch_size
- # offsets = 12285 #768
- all_nodes_flat = tf.concat(
- [tf.reshape(o, [batch_size, -1, N_SEG_CLASSES]) for o in all_nodes],
- axis=1)
- all_links_flat = tf.concat(
- [tf.reshape(o, [batch_size, -1, N_LNK_CLASSES]) for o in all_links],
- axis=1)
- all_reg_flat = tf.concat(
- [tf.reshape(o, [batch_size, -1, OFFSET_DIM]) for o in all_reg], axis=1)
- segments, group_indices, segment_counts, group_indices_all = tf.py_func(
- decode_batch, [
- all_nodes_flat, all_links_flat, all_reg_flat, image_size,
- tf.constant(anchor_sizes)
- ], [tf.float32, tf.int32, tf.int32, tf.int32])
- return segments, group_indices, segment_counts, group_indices_all
- def decode_segments_links_train(image_size, all_nodes, all_links, all_reg,
- anchor_sizes):
- batch_size = FLAGS.train_batch_size
- # offsets = 12285 #768
- all_nodes_flat = tf.concat(
- [tf.reshape(o, [batch_size, -1, N_SEG_CLASSES]) for o in all_nodes],
- axis=1)
- all_links_flat = tf.concat(
- [tf.reshape(o, [batch_size, -1, N_LNK_CLASSES]) for o in all_links],
- axis=1)
- all_reg_flat = tf.concat(
- [tf.reshape(o, [batch_size, -1, OFFSET_DIM]) for o in all_reg], axis=1)
- segments, group_indices, segment_counts, group_indices_all = tf.py_func(
- decode_batch, [
- all_nodes_flat, all_links_flat, all_reg_flat, image_size,
- tf.constant(anchor_sizes)
- ], [tf.float32, tf.int32, tf.int32, tf.int32])
- return segments, group_indices, segment_counts, group_indices_all
- def decode_batch(all_nodes, all_links, all_reg, image_size, anchor_sizes):
- batch_size = all_nodes.shape[0]
- batch_segments = []
- batch_group_indices = []
- batch_segments_counts = []
- batch_group_indices_all = []
- for image_id in range(batch_size):
- image_node_scores = all_nodes[image_id, :, :]
- image_link_scores = all_links[image_id, :, :]
- image_reg = all_reg[image_id, :, :]
- image_segments, image_group_indices, image_segments_counts, image_group_indices_all = decode_image(
- image_node_scores, image_link_scores, image_reg, image_size,
- anchor_sizes)
- batch_segments.append(image_segments)
- batch_group_indices.append(image_group_indices)
- batch_segments_counts.append(image_segments_counts)
- batch_group_indices_all.append(image_group_indices_all)
- max_count = np.max(batch_segments_counts)
- for image_id in range(batch_size):
- if not batch_segments_counts[image_id] == max_count:
- batch_segments_pad = (max_count - batch_segments_counts[image_id]
- ) * [OFFSET_DIM * [0.0]]
- batch_segments[image_id] = np.vstack(
- (batch_segments[image_id], np.array(batch_segments_pad)))
- batch_group_indices[image_id] = np.hstack(
- (batch_group_indices[image_id],
- np.array(
- (max_count - batch_segments_counts[image_id]) * [-1])))
- return np.asarray(batch_segments, np.float32), np.asarray(
- batch_group_indices,
- np.int32), np.asarray(batch_segments_counts,
- np.int32), np.asarray(batch_group_indices_all,
- np.int32)
- def decode_image(image_node_scores, image_link_scores, image_reg, image_size,
- anchor_sizes):
- map_size = []
- offsets_defaults = []
- offsets_default_node = 0
- offsets_default_link = 0
- for i in range(N_DET_LAYERS):
- offsets_defaults.append([offsets_default_node, offsets_default_link])
- map_size.append(image_size // (2**(2 + i)))
- offsets_default_node += map_size[i][0] * map_size[i][1]
- if i == 0:
- offsets_default_link += map_size[i][0] * map_size[i][
- 1] * N_LOCAL_LINKS
- else:
- offsets_default_link += map_size[i][0] * map_size[i][1] * (
- N_LOCAL_LINKS + N_CROSS_LINKS)
- image_group_indices_all = decode_image_by_join(image_node_scores,
- image_link_scores,
- FLAGS.node_threshold,
- FLAGS.link_threshold,
- map_size, offsets_defaults)
- image_group_indices_all -= 1
- image_group_indices = image_group_indices_all[np.where(
- image_group_indices_all >= 0)[0]]
- image_segments_counts = len(image_group_indices)
- # convert image_reg to segments with scores(OFFSET_DIM+1)
- image_segments = np.zeros((image_segments_counts, OFFSET_DIM),
- dtype=np.float32)
- for i, offsets in enumerate(np.where(image_group_indices_all >= 0)[0]):
- encoded_cx = image_reg[offsets, 0]
- encoded_cy = image_reg[offsets, 1]
- encoded_width = image_reg[offsets, 2]
- encoded_height = image_reg[offsets, 3]
- encoded_theta_cos = image_reg[offsets, 4]
- encoded_theta_sin = image_reg[offsets, 5]
- l_idx, x, y = get_coord(offsets, map_size, offsets_defaults)
- rs = anchor_sizes[l_idx]
- eps = 1e-6
- image_segments[i, 0] = encoded_cx * rs + (2**(2 + l_idx)) * (x + 0.5)
- image_segments[i, 1] = encoded_cy * rs + (2**(2 + l_idx)) * (y + 0.5)
- image_segments[i, 2] = np.exp(encoded_width) * rs - eps
- image_segments[i, 3] = np.exp(encoded_height) * rs - eps
- image_segments[i, 4] = encoded_theta_cos
- image_segments[i, 5] = encoded_theta_sin
- return image_segments, image_group_indices, image_segments_counts, image_group_indices_all
- def decode_image_by_join(node_scores, link_scores, node_threshold,
- link_threshold, map_size, offsets_defaults):
- node_mask = node_scores[:, POS_LABEL] >= node_threshold
- link_mask = link_scores[:, POS_LABEL] >= link_threshold
- group_mask = np.zeros_like(node_mask, np.int32) - 1
- offsets_pos = np.where(node_mask == 1)[0]
- def find_parent(point):
- return group_mask[point]
- def set_parent(point, parent):
- group_mask[point] = parent
- def is_root(point):
- return find_parent(point) == -1
- def find_root(point):
- root = point
- update_parent = False
- while not is_root(root):
- root = find_parent(root)
- update_parent = True
- # for acceleration of find_root
- if update_parent:
- set_parent(point, root)
- return root
- def join(p1, p2):
- root1 = find_root(p1)
- root2 = find_root(p2)
- if root1 != root2:
- set_parent(root1, root2)
- def get_all():
- root_map = {}
- def get_index(root):
- if root not in root_map:
- root_map[root] = len(root_map) + 1
- return root_map[root]
- mask = np.zeros_like(node_mask, dtype=np.int32)
- for i, point in enumerate(offsets_pos):
- point_root = find_root(point)
- bbox_idx = get_index(point_root)
- mask[point] = bbox_idx
- return mask
- # join by link
- pos_link = 0
- for i, offsets in enumerate(offsets_pos):
- l_idx, x, y = get_coord(offsets, map_size, offsets_defaults)
- neighbours = get_neighbours(l_idx, x, y, map_size, offsets_defaults)
- for n_idx, noffsets in enumerate(neighbours):
- link_value = link_mask[noffsets[1]]
- node_cls = node_mask[noffsets[0]]
- if link_value and node_cls:
- pos_link += 1
- join(offsets, noffsets[0])
- # print(pos_link)
- mask = get_all()
- return mask
- def get_link_mask(node_mask, offsets_defaults, link_max):
- link_mask = np.zeros_like(link_max)
- link_mask[0:offsets_defaults[1][1]] = np.tile(
- node_mask[0:offsets_defaults[1][0]],
- (N_LOCAL_LINKS, 1)).transpose().reshape(offsets_defaults[1][1])
- link_mask[offsets_defaults[1][1]:offsets_defaults[2][1]] = np.tile(
- node_mask[offsets_defaults[1][0]:offsets_defaults[2][0]],
- (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape(
- (offsets_defaults[2][1] - offsets_defaults[1][1]))
- link_mask[offsets_defaults[2][1]:offsets_defaults[3][1]] = np.tile(
- node_mask[offsets_defaults[2][0]:offsets_defaults[3][0]],
- (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape(
- (offsets_defaults[3][1] - offsets_defaults[2][1]))
- link_mask[offsets_defaults[3][1]:offsets_defaults[4][1]] = np.tile(
- node_mask[offsets_defaults[3][0]:offsets_defaults[4][0]],
- (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape(
- (offsets_defaults[4][1] - offsets_defaults[3][1]))
- link_mask[offsets_defaults[4][1]:offsets_defaults[5][1]] = np.tile(
- node_mask[offsets_defaults[4][0]:offsets_defaults[5][0]],
- (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape(
- (offsets_defaults[5][1] - offsets_defaults[4][1]))
- link_mask[offsets_defaults[5][1]:] = np.tile(
- node_mask[offsets_defaults[5][0]:],
- (N_LOCAL_LINKS + N_CROSS_LINKS, 1)).transpose().reshape(
- (len(link_mask) - offsets_defaults[5][1]))
- return link_mask
- def get_link8(link_scores_raw, map_size):
- # link[i-1] -local- start -16- end -cross- link[i]
- link8_mask = np.zeros((link_scores_raw.shape[0]))
- for i in range(N_DET_LAYERS):
- if i == 0:
- offsets_start = map_size[i][0] * map_size[i][1] * N_LOCAL_LINKS
- offsets_end = map_size[i][0] * map_size[i][1] * (
- N_LOCAL_LINKS + 16)
- offsets_link = map_size[i][0] * map_size[i][1] * (
- N_LOCAL_LINKS + 16)
- link8_mask[:offsets_start] = 1
- else:
- offsets_start = offsets_link + map_size[i][0] * map_size[i][
- 1] * N_LOCAL_LINKS
- offsets_end = offsets_link + map_size[i][0] * map_size[i][1] * (
- N_LOCAL_LINKS + 16)
- offsets_link_pre = offsets_link
- offsets_link += map_size[i][0] * map_size[i][1] * (
- N_LOCAL_LINKS + 16 + N_CROSS_LINKS)
- link8_mask[offsets_link_pre:offsets_start] = 1
- link8_mask[offsets_end:offsets_link] = 1
- return link_scores_raw[np.where(link8_mask > 0)[0], :]
- def decode_image_by_mutex(node_scores, link_scores, node_threshold,
- link_threshold, map_size, offsets_defaults):
- node_mask = node_scores[:, POS_LABEL] >= node_threshold
- link_pos = link_scores[:, POS_LABEL]
- link_mut = link_scores[:, MUT_LABEL]
- link_max = np.max(np.vstack((link_pos, link_mut)), axis=0)
- offsets_pos_list = np.where(node_mask == 1)[0].tolist()
- link_mask_th = link_max >= link_threshold
- link_mask = get_link_mask(node_mask, offsets_defaults, link_max)
- offsets_link_max = np.argsort(-(link_max * link_mask * link_mask_th))
- offsets_link_max = offsets_link_max[:len(offsets_pos_list) * 8]
- group_mask = np.zeros_like(node_mask, dtype=np.int32) - 1
- mutex_mask = len(node_mask) * [[]]
- def find_parent(point):
- return group_mask[point]
- def set_parent(point, parent):
- group_mask[point] = parent
- def set_mutex_constraint(point, mutex_point_list):
- mutex_mask[point] = mutex_point_list
- def find_mutex_constraint(point):
- mutex_point_list = mutex_mask[point]
- # update mutex_point_list
- mutex_point_list_new = []
- if not mutex_point_list == []:
- for mutex_point in mutex_point_list:
- if not is_root(mutex_point):
- mutex_point = find_root(mutex_point)
- if mutex_point not in mutex_point_list_new:
- mutex_point_list_new.append(mutex_point)
- set_mutex_constraint(point, mutex_point_list_new)
- return mutex_point_list_new
- def combine_mutex_constraint(point, parent):
- mutex_point_list = find_mutex_constraint(point)
- mutex_parent_list = find_mutex_constraint(parent)
- for mutex_point in mutex_point_list:
- if not is_root(mutex_point):
- mutex_point = find_root(mutex_point)
- if mutex_point not in mutex_parent_list:
- mutex_parent_list.append(mutex_point)
- set_mutex_constraint(parent, mutex_parent_list)
- def add_mutex_constraint(p1, p2):
- mutex_point_list1 = find_mutex_constraint(p1)
- mutex_point_list2 = find_mutex_constraint(p2)
- if p1 not in mutex_point_list2:
- mutex_point_list2.append(p1)
- if p2 not in mutex_point_list1:
- mutex_point_list1.append(p2)
- set_mutex_constraint(p1, mutex_point_list1)
- set_mutex_constraint(p2, mutex_point_list2)
- def is_root(point):
- return find_parent(point) == -1
- def find_root(point):
- root = point
- update_parent = False
- while not is_root(root):
- root = find_parent(root)
- update_parent = True
- # for acceleration of find_root
- if update_parent:
- set_parent(point, root)
- return root
- def join(p1, p2):
- root1 = find_root(p1)
- root2 = find_root(p2)
- if root1 != root2 and (root1 not in find_mutex_constraint(root2)):
- set_parent(root1, root2)
- combine_mutex_constraint(root1, root2)
- def disjoin(p1, p2):
- root1 = find_root(p1)
- root2 = find_root(p2)
- if root1 != root2:
- add_mutex_constraint(root1, root2)
- def get_all():
- root_map = {}
- def get_index(root):
- if root not in root_map:
- root_map[root] = len(root_map) + 1
- return root_map[root]
- mask = np.zeros_like(node_mask, dtype=np.int32)
- for _, point in enumerate(offsets_pos_list):
- point_root = find_root(point)
- bbox_idx = get_index(point_root)
- mask[point] = bbox_idx
- return mask
- # join by link
- pos_link = 0
- mut_link = 0
- for _, offsets_link in enumerate(offsets_link_max):
- l_idx, x, y, link_idx = get_coord_link(offsets_link, map_size,
- offsets_defaults)
- offsets = offsets_defaults[l_idx][0] + map_size[l_idx][1] * y + x
- if offsets in offsets_pos_list:
- neighbours = get_neighbours(l_idx, x, y, map_size,
- offsets_defaults)
- if not len(np.where(np.array(neighbours)[:,
- 2] == link_idx)[0]) == 0:
- noffsets = neighbours[np.where(
- np.array(neighbours)[:, 2] == link_idx)[0][0]]
- link_pos_value = link_pos[noffsets[1]]
- link_mut_value = link_mut[noffsets[1]]
- node_cls = node_mask[noffsets[0]]
- if node_cls and (link_pos_value > link_mut_value):
- pos_link += 1
- join(offsets, noffsets[0])
- elif node_cls and (link_pos_value < link_mut_value):
- mut_link += 1
- disjoin(offsets, noffsets[0])
- mask = get_all()
- return mask
|