predict_cls.py 3.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. import cv2
  2. import copy
  3. import numpy as np
  4. import math
  5. from .cls_postprocess import ClsPostProcess
  6. from .predict_base import PredictBase
  7. class TextClassifier(PredictBase):
  8. def __init__(self, args):
  9. self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")]
  10. self.cls_batch_num = args.cls_batch_num
  11. self.cls_thresh = args.cls_thresh
  12. self.postprocess_op = ClsPostProcess(label_list=args.label_list)
  13. # 初始化模型
  14. self.cls_onnx_session = self.get_onnx_session(args.cls_model_dir, args.use_gpu)
  15. self.cls_input_name = self.get_input_name(self.cls_onnx_session)
  16. self.cls_output_name = self.get_output_name(self.cls_onnx_session)
  17. def resize_norm_img(self, img):
  18. imgC, imgH, imgW = self.cls_image_shape
  19. h = img.shape[0]
  20. w = img.shape[1]
  21. ratio = w / float(h)
  22. if math.ceil(imgH * ratio) > imgW:
  23. resized_w = imgW
  24. else:
  25. resized_w = int(math.ceil(imgH * ratio))
  26. resized_image = cv2.resize(img, (resized_w, imgH))
  27. resized_image = resized_image.astype("float32")
  28. if self.cls_image_shape[0] == 1:
  29. resized_image = resized_image / 255
  30. resized_image = resized_image[np.newaxis, :]
  31. else:
  32. resized_image = resized_image.transpose((2, 0, 1)) / 255
  33. resized_image -= 0.5
  34. resized_image /= 0.5
  35. padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
  36. padding_im[:, :, 0:resized_w] = resized_image
  37. return padding_im
  38. def __call__(self, img_list):
  39. img_list = copy.deepcopy(img_list)
  40. img_num = len(img_list)
  41. # Calculate the aspect ratio of all text bars
  42. width_list = []
  43. for img in img_list:
  44. width_list.append(img.shape[1] / float(img.shape[0]))
  45. # Sorting can speed up the cls process
  46. indices = np.argsort(np.array(width_list))
  47. cls_res = [["", 0.0]] * img_num
  48. batch_num = self.cls_batch_num
  49. for beg_img_no in range(0, img_num, batch_num):
  50. end_img_no = min(img_num, beg_img_no + batch_num)
  51. norm_img_batch = []
  52. max_wh_ratio = 0
  53. for ino in range(beg_img_no, end_img_no):
  54. h, w = img_list[indices[ino]].shape[0:2]
  55. wh_ratio = w * 1.0 / h
  56. max_wh_ratio = max(max_wh_ratio, wh_ratio)
  57. for ino in range(beg_img_no, end_img_no):
  58. norm_img = self.resize_norm_img(img_list[indices[ino]])
  59. norm_img = norm_img[np.newaxis, :]
  60. norm_img_batch.append(norm_img)
  61. norm_img_batch = np.concatenate(norm_img_batch)
  62. norm_img_batch = norm_img_batch.copy()
  63. input_feed = self.get_input_feed(self.cls_input_name, norm_img_batch)
  64. outputs = self.cls_onnx_session.run(
  65. self.cls_output_name, input_feed=input_feed
  66. )
  67. prob_out = outputs[0]
  68. cls_result = self.postprocess_op(prob_out)
  69. for rno in range(len(cls_result)):
  70. label, score = cls_result[rno]
  71. cls_res[indices[beg_img_no + rno]] = [label, score]
  72. if "180" in label and score > self.cls_thresh:
  73. img_list[indices[beg_img_no + rno]] = cv2.rotate(
  74. img_list[indices[beg_img_no + rno]], 1
  75. )
  76. return img_list, cls_res