eval_end2end.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import re
  16. import sys
  17. import shapely
  18. from shapely.geometry import Polygon
  19. import numpy as np
  20. from collections import defaultdict
  21. import operator
  22. import editdistance
  23. def strQ2B(ustring):
  24. rstring = ""
  25. for uchar in ustring:
  26. inside_code = ord(uchar)
  27. if inside_code == 12288:
  28. inside_code = 32
  29. elif inside_code >= 65281 and inside_code <= 65374:
  30. inside_code -= 65248
  31. rstring += chr(inside_code)
  32. return rstring
  33. def polygon_from_str(polygon_points):
  34. """
  35. Create a shapely polygon object from gt or dt line.
  36. """
  37. polygon_points = np.array(polygon_points).reshape(4, 2)
  38. polygon = Polygon(polygon_points).convex_hull
  39. return polygon
  40. def polygon_iou(poly1, poly2):
  41. """
  42. Intersection over union between two shapely polygons.
  43. """
  44. if not poly1.intersects(poly2): # this test is fast and can accelerate calculation
  45. iou = 0
  46. else:
  47. try:
  48. inter_area = poly1.intersection(poly2).area
  49. union_area = poly1.area + poly2.area - inter_area
  50. iou = float(inter_area) / union_area
  51. except shapely.geos.TopologicalError:
  52. # except Exception as e:
  53. # print(e)
  54. print("shapely.geos.TopologicalError occurred, iou set to 0")
  55. iou = 0
  56. return iou
  57. def ed(str1, str2):
  58. return editdistance.eval(str1, str2)
  59. def e2e_eval(gt_dir, res_dir, ignore_blank=False):
  60. print("start testing...")
  61. iou_thresh = 0.5
  62. val_names = os.listdir(gt_dir)
  63. num_gt_chars = 0
  64. gt_count = 0
  65. dt_count = 0
  66. hit = 0
  67. ed_sum = 0
  68. for i, val_name in enumerate(val_names):
  69. with open(os.path.join(gt_dir, val_name), encoding="utf-8") as f:
  70. gt_lines = [o.strip() for o in f.readlines()]
  71. gts = []
  72. ignore_masks = []
  73. for line in gt_lines:
  74. parts = line.strip().split("\t")
  75. # ignore illegal data
  76. if len(parts) < 9:
  77. continue
  78. assert len(parts) < 11
  79. if len(parts) == 9:
  80. gts.append(parts[:8] + [""])
  81. else:
  82. gts.append(parts[:8] + [parts[-1]])
  83. ignore_masks.append(parts[8])
  84. val_path = os.path.join(res_dir, val_name)
  85. if not os.path.exists(val_path):
  86. dt_lines = []
  87. else:
  88. with open(val_path, encoding="utf-8") as f:
  89. dt_lines = [o.strip() for o in f.readlines()]
  90. dts = []
  91. for line in dt_lines:
  92. # print(line)
  93. parts = line.strip().split("\t")
  94. assert len(parts) < 10, "line error: {}".format(line)
  95. if len(parts) == 8:
  96. dts.append(parts + [""])
  97. else:
  98. dts.append(parts)
  99. dt_match = [False] * len(dts)
  100. gt_match = [False] * len(gts)
  101. all_ious = defaultdict(tuple)
  102. for index_gt, gt in enumerate(gts):
  103. gt_coors = [float(gt_coor) for gt_coor in gt[0:8]]
  104. gt_poly = polygon_from_str(gt_coors)
  105. for index_dt, dt in enumerate(dts):
  106. dt_coors = [float(dt_coor) for dt_coor in dt[0:8]]
  107. dt_poly = polygon_from_str(dt_coors)
  108. iou = polygon_iou(dt_poly, gt_poly)
  109. if iou >= iou_thresh:
  110. all_ious[(index_gt, index_dt)] = iou
  111. sorted_ious = sorted(all_ious.items(), key=operator.itemgetter(1), reverse=True)
  112. sorted_gt_dt_pairs = [item[0] for item in sorted_ious]
  113. # matched gt and dt
  114. for gt_dt_pair in sorted_gt_dt_pairs:
  115. index_gt, index_dt = gt_dt_pair
  116. if gt_match[index_gt] == False and dt_match[index_dt] == False:
  117. gt_match[index_gt] = True
  118. dt_match[index_dt] = True
  119. if ignore_blank:
  120. gt_str = strQ2B(gts[index_gt][8]).replace(" ", "")
  121. dt_str = strQ2B(dts[index_dt][8]).replace(" ", "")
  122. else:
  123. gt_str = strQ2B(gts[index_gt][8])
  124. dt_str = strQ2B(dts[index_dt][8])
  125. if ignore_masks[index_gt] == "0":
  126. ed_sum += ed(gt_str, dt_str)
  127. num_gt_chars += len(gt_str)
  128. if gt_str == dt_str:
  129. hit += 1
  130. gt_count += 1
  131. dt_count += 1
  132. # unmatched dt
  133. for tindex, dt_match_flag in enumerate(dt_match):
  134. if dt_match_flag == False:
  135. dt_str = dts[tindex][8]
  136. gt_str = ""
  137. ed_sum += ed(dt_str, gt_str)
  138. dt_count += 1
  139. # unmatched gt
  140. for tindex, gt_match_flag in enumerate(gt_match):
  141. if gt_match_flag == False and ignore_masks[tindex] == "0":
  142. dt_str = ""
  143. gt_str = gts[tindex][8]
  144. ed_sum += ed(gt_str, dt_str)
  145. num_gt_chars += len(gt_str)
  146. gt_count += 1
  147. eps = 1e-9
  148. print("hit, dt_count, gt_count", hit, dt_count, gt_count)
  149. precision = hit / (dt_count + eps)
  150. recall = hit / (gt_count + eps)
  151. fmeasure = 2.0 * precision * recall / (precision + recall + eps)
  152. avg_edit_dist_img = ed_sum / len(val_names)
  153. avg_edit_dist_field = ed_sum / (gt_count + eps)
  154. character_acc = 1 - ed_sum / (num_gt_chars + eps)
  155. print("character_acc: %.2f" % (character_acc * 100) + "%")
  156. print("avg_edit_dist_field: %.2f" % (avg_edit_dist_field))
  157. print("avg_edit_dist_img: %.2f" % (avg_edit_dist_img))
  158. print("precision: %.2f" % (precision * 100) + "%")
  159. print("recall: %.2f" % (recall * 100) + "%")
  160. print("fmeasure: %.2f" % (fmeasure * 100) + "%")
  161. if __name__ == "__main__":
  162. # if len(sys.argv) != 3:
  163. # print("python3 ocr_e2e_eval.py gt_dir res_dir")
  164. # exit(-1)
  165. # gt_folder = sys.argv[1]
  166. # pred_folder = sys.argv[2]
  167. gt_folder = sys.argv[1]
  168. pred_folder = sys.argv[2]
  169. e2e_eval(gt_folder, pred_folder)