string-reg-location.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. """
  2. 鏂囧瓧璇嗗埆鍜屽畾浣嶆ā鍧?鍔熻兘锛氬湪鎴浘涓煡鎵炬寚瀹氭枃瀛楋紝骞惰繑鍥炴枃瀛楀湪鎴浘涓殑鍧愭爣
  3. 浣跨敤 OnnxOCR 杩涜鏂囧瓧璇嗗埆
  4. """
  5. import sys
  6. import os
  7. import cv2
  8. from pathlib import Path
  9. from typing import Optional, Tuple, Dict, Any
  10. # 娣诲姞 OnnxOCR 璺緞鍒?sys.path
  11. current_dir = Path(__file__).parent
  12. onnxocr_path = current_dir / 'OnnxOCR'
  13. if str(onnxocr_path) not in sys.path:
  14. sys.path.insert(0, str(onnxocr_path))
  15. from onnxocr.onnx_paddleocr import ONNXPaddleOcr
  16. # 璁剧疆鐜鍙橀噺锛岃烦杩囨ā鍨嬫簮杩炴帴妫€鏌ワ紙閬垮厤棣栨杩愯鏃惰秴鏃讹級
  17. os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
  18. def find_text_location(
  19. screenshot_path: str,
  20. target_text: str,
  21. device_width: int = None,
  22. device_height: int = None,
  23. use_angle_cls: bool = True,
  24. lang: str = 'ch'
  25. ) -> Optional[Dict[str, Any]]:
  26. """
  27. 鍦ㄦ埅鍥句腑鏌ユ壘鐩爣鏂囧瓧骞惰繑鍥炲潗鏍?
  28. Args:
  29. screenshot_path: 鎴浘鏂囦欢璺緞
  30. target_text: 瑕佹煡鎵剧殑鏂囧瓧
  31. device_width: 璁惧瀹為檯瀹藉害锛堝儚绱狅級锛屽鏋滄彁渚涘垯浼氬皢鍧愭爣杞崲鍒拌澶囧垎杈ㄧ巼
  32. device_height: 璁惧瀹為檯楂樺害锛堝儚绱狅級锛屽鏋滄彁渚涘垯浼氬皢鍧愭爣杞崲鍒拌澶囧垎杈ㄧ巼
  33. use_angle_cls: 鏄惁浣跨敤瑙掑害鍒嗙被鍣紝榛樿True (瀵瑰簲 OnnxOCR 鐨?use_angle_cls)
  34. lang: 璇█绫诲瀷锛?ch'琛ㄧず涓嫳鏂囨贩鍚堬紝'en'琛ㄧず鑻辨枃锛岄粯璁?ch' (OnnxOCR 鍐呴儴澶勭悊)
  35. Returns:
  36. 濡傛灉鎵惧埌鏂囧瓧锛岃繑鍥炲寘鍚潗鏍囦俊鎭殑瀛楀吀锛? {
  37. "found": True,
  38. "x": 涓績鐐箈鍧愭爣,
  39. "y": 涓績鐐箉鍧愭爣,
  40. "width": 鏂囧瓧妗嗗搴?
  41. "height": 鏂囧瓧妗嗛珮搴?
  42. "bbox": [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] # 鏂囧瓧妗嗙殑鍥涗釜瑙掔偣
  43. }
  44. 濡傛灉鏈壘鍒帮紝杩斿洖 {"found": False}
  45. """
  46. # 妫€鏌ユ枃浠舵槸鍚﹀瓨鍦? screenshot = Path(screenshot_path)
  47. if not screenshot.exists():
  48. raise FileNotFoundError(f"鎴浘鏂囦欢涓嶅瓨鍦? {screenshot_path}")
  49. # 妫€鏌ョ洰鏍囨枃瀛楁槸鍚︿负绌? if not target_text or not target_text.strip():
  50. raise ValueError("鐩爣鏂囧瓧涓嶈兘涓虹┖")
  51. try:
  52. # 璇诲彇鎴浘浠ヨ幏鍙栧疄闄呭昂瀵? img = cv2.imread(str(screenshot))
  53. if img is None:
  54. raise ValueError(f"鏃犳硶璇诲彇鎴浘鏂囦欢: {screenshot_path}")
  55. screenshot_height, screenshot_width = img.shape[:2]
  56. # 璁$畻缂╂斁姣斾緥锛堝鏋滄彁渚涗簡璁惧鍒嗚鲸鐜囷級
  57. scale_x = 1.0
  58. scale_y = 1.0
  59. if device_width is not None and device_height is not None:
  60. scale_x = device_width / screenshot_width
  61. scale_y = device_height / screenshot_height
  62. # 鍒濆鍖?OnnxOCR锛堥娆¤皟鐢ㄥ彲鑳介渶瑕佷竴浜涙椂闂达級
  63. # OnnxOCR 闇€瑕佷紶鍏ュ浘鐗囧璞★紝鑰屼笉鏄矾寰? ocr = ONNXPaddleOcr(use_angle_cls=use_angle_cls, use_gpu=False)
  64. # 璇嗗埆鎴浘涓殑鎵€鏈夋枃瀛楋紙OnnxOCR 闇€瑕佷紶鍏?cv2 璇诲彇鐨勫浘鐗囧璞★級
  65. result = ocr.ocr(img, det=True, rec=True, cls=use_angle_cls)
  66. # 濡傛灉璇嗗埆缁撴灉涓虹┖
  67. if not result or not result[0]:
  68. return {"found": False}
  69. # 鍦ㄨ瘑鍒粨鏋滀腑鏌ユ壘鐩爣鏂囧瓧
  70. # result[0] 鏄竴涓垪琛紝姣忎釜鍏冪礌鏄竴琛屾枃瀛楃殑璇嗗埆缁撴灉
  71. # 鏍煎紡: [[[x1,y1], [x2,y2], [x3,y3], [x4,y4]], (鏂囧瓧鍐呭, 缃俊搴?]
  72. for line in result[0]:
  73. if not line:
  74. continue
  75. # line[0] 鏄洓涓鐐瑰潗鏍? # line[1] 鏄?(鏂囧瓧鍐呭, 缃俊搴?
  76. bbox = line[0] # 鍥涗釜瑙掔偣: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
  77. text_info = line[1]
  78. recognized_text = text_info[0] # 璇嗗埆鐨勬枃瀛楀唴瀹? confidence = text_info[1] # 缃俊搴?
  79. # 妫€鏌ヨ瘑鍒殑鏂囧瓧鏄惁鍖呭惈鐩爣鏂囧瓧锛堟敮鎸侀儴鍒嗗尮閰嶅拰瀹屽叏鍖归厤锛? # 浣跨敤 in 鎿嶄綔绗︽敮鎸侀儴鍒嗗尮閰嶏紝濡傛灉闇€瑕佸畬鍏ㄥ尮閰嶅彲浠ヤ娇鐢?==
  80. if target_text in recognized_text or recognized_text in target_text:
  81. # 鎵惧埌鍖归厤锛岃绠楁枃瀛楁鐨勪腑蹇冪偣鍜屽昂瀵? # bbox 鏄洓涓鐐圭殑鍒楄〃: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
  82. x_coords = [point[0] for point in bbox]
  83. y_coords = [point[1] for point in bbox]
  84. # 璁$畻杈圭晫妗嗭紙鍩轰簬鎴浘灏哄锛? min_x = int(min(x_coords))
  85. max_x = int(max(x_coords))
  86. min_y = int(min(y_coords))
  87. max_y = int(max(y_coords))
  88. # 灏嗗潗鏍囪浆鎹㈠埌璁惧鍒嗚鲸鐜囷紙濡傛灉鎻愪緵浜嗚澶囧垎杈ㄧ巼锛? min_x = int(min_x * scale_x)
  89. max_x = int(max_x * scale_x)
  90. min_y = int(min_y * scale_y)
  91. max_y = int(max_y * scale_y)
  92. # 杞崲 bbox 鍧愭爣
  93. scaled_bbox = [[int(p[0] * scale_x), int(p[1] * scale_y)] for p in bbox]
  94. # 璁$畻涓績鐐癸紙鍩轰簬璁惧鍒嗚鲸鐜囷級
  95. center_x = int((min_x + max_x) / 2)
  96. center_y = int((min_y + max_y) / 2)
  97. # 璁$畻瀹藉害鍜岄珮搴︼紙鍩轰簬璁惧鍒嗚鲸鐜囷級
  98. width = max_x - min_x
  99. height = max_y - min_y
  100. return {
  101. "found": True,
  102. "x": center_x,
  103. "y": center_y,
  104. "width": width,
  105. "height": height,
  106. "bbox": scaled_bbox, # 宸茶浆鎹㈠埌璁惧鍒嗚鲸鐜囩殑鍧愭爣
  107. "text": recognized_text, # 瀹為檯璇嗗埆鐨勬枃瀛? "confidence": float(confidence) # 缃俊搴? }
  108. # 鏈壘鍒板尮閰嶇殑鏂囧瓧
  109. return {"found": False}
  110. except Exception as e:
  111. raise RuntimeError(f"OCR 璇嗗埆杩囩▼涓嚭閿? {str(e)}")
  112. def find_text_location_multiple(
  113. screenshot_path: str,
  114. target_text: str,
  115. device_width: int = None,
  116. device_height: int = None,
  117. use_angle_cls: bool = True,
  118. lang: str = 'ch'
  119. ) -> list:
  120. """
  121. 鍦ㄦ埅鍥句腑鏌ユ壘鐩爣鏂囧瓧鐨勬墍鏈夊嚭鐜颁綅缃紙鍙兘鏈夊澶勫尮閰嶏級
  122. Args:
  123. screenshot_path: 鎴浘鏂囦欢璺緞
  124. target_text: 瑕佹煡鎵剧殑鏂囧瓧
  125. device_width: 璁惧瀹為檯瀹藉害锛堝儚绱狅級
  126. device_height: 璁惧瀹為檯楂樺害锛堝儚绱狅級
  127. use_angle_cls: 鏄惁浣跨敤瑙掑害鍒嗙被鍣? lang: 璇█绫诲瀷
  128. Returns:
  129. 杩斿洖鎵€鏈夊尮閰嶄綅缃殑鍒楄〃锛屾瘡涓厓绱犱负鍖呭惈鍧愭爣淇℃伅鐨勫瓧鍏? """
  130. screenshot = Path(screenshot_path)
  131. if not screenshot.exists():
  132. raise FileNotFoundError(f"鎴浘鏂囦欢涓嶅瓨鍦? {screenshot_path}")
  133. if not target_text or not target_text.strip():
  134. raise ValueError("鐩爣鏂囧瓧涓嶈兘涓虹┖")
  135. try:
  136. # 璇诲彇鎴浘浠ヨ幏鍙栧疄闄呭昂瀵? img = cv2.imread(str(screenshot))
  137. if img is None:
  138. raise ValueError(f"鏃犳硶璇诲彇鎴浘鏂囦欢: {screenshot_path}")
  139. screenshot_height, screenshot_width = img.shape[:2]
  140. # 璁$畻缂╂斁姣斾緥锛堝鏋滄彁渚涗簡璁惧鍒嗚鲸鐜囷級
  141. scale_x = 1.0
  142. scale_y = 1.0
  143. if device_width is not None and device_height is not None:
  144. scale_x = device_width / screenshot_width
  145. scale_y = device_height / screenshot_height
  146. # 鍒濆鍖?OnnxOCR
  147. ocr = ONNXPaddleOcr(use_angle_cls=use_angle_cls, use_gpu=False)
  148. # OnnxOCR 闇€瑕佷紶鍏ュ浘鐗囧璞? result = ocr.ocr(img, det=True, rec=True, cls=use_angle_cls)
  149. if not result or not result[0]:
  150. return []
  151. matches = []
  152. for line in result[0]:
  153. if not line:
  154. continue
  155. bbox = line[0]
  156. text_info = line[1]
  157. recognized_text = text_info[0]
  158. confidence = text_info[1]
  159. # 妫€鏌ユ槸鍚﹀尮閰? if target_text in recognized_text or recognized_text in target_text:
  160. x_coords = [point[0] for point in bbox]
  161. y_coords = [point[1] for point in bbox]
  162. # 璁$畻杈圭晫妗嗭紙鍩轰簬鎴浘灏哄锛? min_x = int(min(x_coords))
  163. max_x = int(max(x_coords))
  164. min_y = int(min(y_coords))
  165. max_y = int(max(y_coords))
  166. # 灏嗗潗鏍囪浆鎹㈠埌璁惧鍒嗚鲸鐜囷紙濡傛灉鎻愪緵浜嗚澶囧垎杈ㄧ巼锛? min_x = int(min_x * scale_x)
  167. max_x = int(max_x * scale_x)
  168. min_y = int(min_y * scale_y)
  169. max_y = int(max_y * scale_y)
  170. # 杞崲 bbox 鍧愭爣
  171. scaled_bbox = [[int(p[0] * scale_x), int(p[1] * scale_y)] for p in bbox]
  172. center_x = int((min_x + max_x) / 2)
  173. center_y = int((min_y + max_y) / 2)
  174. width = max_x - min_x
  175. height = max_y - min_y
  176. matches.append({
  177. "found": True,
  178. "x": center_x,
  179. "y": center_y,
  180. "width": width,
  181. "height": height,
  182. "bbox": scaled_bbox,
  183. "text": recognized_text,
  184. "confidence": float(confidence)
  185. })
  186. return matches
  187. except Exception as e:
  188. raise RuntimeError(f"OCR 璇嗗埆杩囩▼涓嚭閿? {str(e)}")
  189. if __name__ == "__main__":
  190. # 娴嬭瘯绀轰緥
  191. if len(sys.argv) < 3:
  192. print("鐢ㄦ硶: python string-reg-location.py <鎴浘璺緞> <瑕佹煡鎵剧殑鏂囧瓧> [璁惧瀹藉害] [璁惧楂樺害]")
  193. print("绀轰緥: python string-reg-location.py screenshot.png \"浣犲ソ\" 1080 2400")
  194. sys.exit(1)
  195. screenshot_path = sys.argv[1]
  196. target_text = sys.argv[2]
  197. device_width = None
  198. device_height = None
  199. if len(sys.argv) >= 5:
  200. try:
  201. device_width = int(sys.argv[3])
  202. device_height = int(sys.argv[4])
  203. except ValueError:
  204. print("璀﹀憡: 鏃犳硶瑙f瀽璁惧鍒嗚鲸鐜囷紝灏嗕娇鐢ㄦ埅鍥惧師濮嬪昂瀵?)
  205. try:
  206. result = find_text_location(screenshot_path, target_text, device_width, device_height)
  207. if result.get("found"):
  208. x = result["x"]
  209. y = result["y"]
  210. w = result["width"]
  211. h = result["height"]
  212. print(f"鎵惧埌鏂囧瓧锛佸潗鏍? x={x}, y={y}, 瀹藉害={w}, 楂樺害={h}")
  213. print(f"璇嗗埆鐨勬枃瀛? {result.get('text', '')}")
  214. print(f"缃俊搴? {result.get('confidence', 0):.2f}")
  215. print(f"JSON鏍煎紡: {{\"x\": {x}, \"y\": {y}, \"width\": {w}, \"height\": {h}}}")
  216. else:
  217. print("鏈壘鍒板尮閰嶇殑鏂囧瓧")
  218. except Exception as e:
  219. print(f"閿欒: {e}")
  220. sys.exit(1)