""" 鏂囧瓧璇嗗埆鍜屽畾浣嶆ā鍧?鍔熻兘锛氬湪鎴浘涓煡鎵炬寚瀹氭枃瀛楋紝骞惰繑鍥炴枃瀛楀湪鎴浘涓殑鍧愭爣 浣跨敤 OnnxOCR 杩涜鏂囧瓧璇嗗埆 """ import sys import os import cv2 from pathlib import Path from typing import Optional, Tuple, Dict, Any # 娣诲姞 OnnxOCR 璺緞鍒?sys.path current_dir = Path(__file__).parent onnxocr_path = current_dir / 'OnnxOCR' if str(onnxocr_path) not in sys.path: sys.path.insert(0, str(onnxocr_path)) from onnxocr.onnx_paddleocr import ONNXPaddleOcr # 璁剧疆鐜鍙橀噺锛岃烦杩囨ā鍨嬫簮杩炴帴妫€鏌ワ紙閬垮厤棣栨杩愯鏃惰秴鏃讹級 os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True' def find_text_location( screenshot_path: str, target_text: str, device_width: int = None, device_height: int = None, use_angle_cls: bool = True, lang: str = 'ch' ) -> Optional[Dict[str, Any]]: """ 鍦ㄦ埅鍥句腑鏌ユ壘鐩爣鏂囧瓧骞惰繑鍥炲潗鏍? Args: screenshot_path: 鎴浘鏂囦欢璺緞 target_text: 瑕佹煡鎵剧殑鏂囧瓧 device_width: 璁惧瀹為檯瀹藉害锛堝儚绱狅級锛屽鏋滄彁渚涘垯浼氬皢鍧愭爣杞崲鍒拌澶囧垎杈ㄧ巼 device_height: 璁惧瀹為檯楂樺害锛堝儚绱狅級锛屽鏋滄彁渚涘垯浼氬皢鍧愭爣杞崲鍒拌澶囧垎杈ㄧ巼 use_angle_cls: 鏄惁浣跨敤瑙掑害鍒嗙被鍣紝榛樿True (瀵瑰簲 OnnxOCR 鐨?use_angle_cls) lang: 璇█绫诲瀷锛?ch'琛ㄧず涓嫳鏂囨贩鍚堬紝'en'琛ㄧず鑻辨枃锛岄粯璁?ch' (OnnxOCR 鍐呴儴澶勭悊) Returns: 濡傛灉鎵惧埌鏂囧瓧锛岃繑鍥炲寘鍚潗鏍囦俊鎭殑瀛楀吀锛? { "found": True, "x": 涓績鐐箈鍧愭爣, "y": 涓績鐐箉鍧愭爣, "width": 鏂囧瓧妗嗗搴? "height": 鏂囧瓧妗嗛珮搴? "bbox": [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] # 鏂囧瓧妗嗙殑鍥涗釜瑙掔偣 } 濡傛灉鏈壘鍒帮紝杩斿洖 {"found": False} """ # 妫€鏌ユ枃浠舵槸鍚﹀瓨鍦? screenshot = Path(screenshot_path) if not screenshot.exists(): raise FileNotFoundError(f"鎴浘鏂囦欢涓嶅瓨鍦? {screenshot_path}") # 妫€鏌ョ洰鏍囨枃瀛楁槸鍚︿负绌? if not target_text or not target_text.strip(): raise ValueError("鐩爣鏂囧瓧涓嶈兘涓虹┖") try: # 璇诲彇鎴浘浠ヨ幏鍙栧疄闄呭昂瀵? img = cv2.imread(str(screenshot)) if img is None: raise ValueError(f"鏃犳硶璇诲彇鎴浘鏂囦欢: {screenshot_path}") screenshot_height, screenshot_width = img.shape[:2] # 璁$畻缂╂斁姣斾緥锛堝鏋滄彁渚涗簡璁惧鍒嗚鲸鐜囷級 scale_x = 1.0 scale_y = 1.0 if device_width is not None and device_height is not None: scale_x = device_width / screenshot_width scale_y = device_height / screenshot_height # 鍒濆鍖?OnnxOCR锛堥娆¤皟鐢ㄥ彲鑳介渶瑕佷竴浜涙椂闂达級 # OnnxOCR 闇€瑕佷紶鍏ュ浘鐗囧璞★紝鑰屼笉鏄矾寰? ocr = ONNXPaddleOcr(use_angle_cls=use_angle_cls, use_gpu=False) # 璇嗗埆鎴浘涓殑鎵€鏈夋枃瀛楋紙OnnxOCR 闇€瑕佷紶鍏?cv2 璇诲彇鐨勫浘鐗囧璞★級 result = ocr.ocr(img, det=True, rec=True, cls=use_angle_cls) # 濡傛灉璇嗗埆缁撴灉涓虹┖ if not result or not result[0]: return {"found": False} # 鍦ㄨ瘑鍒粨鏋滀腑鏌ユ壘鐩爣鏂囧瓧 # result[0] 鏄竴涓垪琛紝姣忎釜鍏冪礌鏄竴琛屾枃瀛楃殑璇嗗埆缁撴灉 # 鏍煎紡: [[[x1,y1], [x2,y2], [x3,y3], [x4,y4]], (鏂囧瓧鍐呭, 缃俊搴?] for line in result[0]: if not line: continue # line[0] 鏄洓涓鐐瑰潗鏍? # line[1] 鏄?(鏂囧瓧鍐呭, 缃俊搴? bbox = line[0] # 鍥涗釜瑙掔偣: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] text_info = line[1] recognized_text = text_info[0] # 璇嗗埆鐨勬枃瀛楀唴瀹? confidence = text_info[1] # 缃俊搴? # 妫€鏌ヨ瘑鍒殑鏂囧瓧鏄惁鍖呭惈鐩爣鏂囧瓧锛堟敮鎸侀儴鍒嗗尮閰嶅拰瀹屽叏鍖归厤锛? # 浣跨敤 in 鎿嶄綔绗︽敮鎸侀儴鍒嗗尮閰嶏紝濡傛灉闇€瑕佸畬鍏ㄥ尮閰嶅彲浠ヤ娇鐢?== if target_text in recognized_text or recognized_text in target_text: # 鎵惧埌鍖归厤锛岃绠楁枃瀛楁鐨勪腑蹇冪偣鍜屽昂瀵? # bbox 鏄洓涓鐐圭殑鍒楄〃: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] x_coords = [point[0] for point in bbox] y_coords = [point[1] for point in bbox] # 璁$畻杈圭晫妗嗭紙鍩轰簬鎴浘灏哄锛? min_x = int(min(x_coords)) max_x = int(max(x_coords)) min_y = int(min(y_coords)) max_y = int(max(y_coords)) # 灏嗗潗鏍囪浆鎹㈠埌璁惧鍒嗚鲸鐜囷紙濡傛灉鎻愪緵浜嗚澶囧垎杈ㄧ巼锛? min_x = int(min_x * scale_x) max_x = int(max_x * scale_x) min_y = int(min_y * scale_y) max_y = int(max_y * scale_y) # 杞崲 bbox 鍧愭爣 scaled_bbox = [[int(p[0] * scale_x), int(p[1] * scale_y)] for p in bbox] # 璁$畻涓績鐐癸紙鍩轰簬璁惧鍒嗚鲸鐜囷級 center_x = int((min_x + max_x) / 2) center_y = int((min_y + max_y) / 2) # 璁$畻瀹藉害鍜岄珮搴︼紙鍩轰簬璁惧鍒嗚鲸鐜囷級 width = max_x - min_x height = max_y - min_y return { "found": True, "x": center_x, "y": center_y, "width": width, "height": height, "bbox": scaled_bbox, # 宸茶浆鎹㈠埌璁惧鍒嗚鲸鐜囩殑鍧愭爣 "text": recognized_text, # 瀹為檯璇嗗埆鐨勬枃瀛? "confidence": float(confidence) # 缃俊搴? } # 鏈壘鍒板尮閰嶇殑鏂囧瓧 return {"found": False} except Exception as e: raise RuntimeError(f"OCR 璇嗗埆杩囩▼涓嚭閿? {str(e)}") def find_text_location_multiple( screenshot_path: str, target_text: str, device_width: int = None, device_height: int = None, use_angle_cls: bool = True, lang: str = 'ch' ) -> list: """ 鍦ㄦ埅鍥句腑鏌ユ壘鐩爣鏂囧瓧鐨勬墍鏈夊嚭鐜颁綅缃紙鍙兘鏈夊澶勫尮閰嶏級 Args: screenshot_path: 鎴浘鏂囦欢璺緞 target_text: 瑕佹煡鎵剧殑鏂囧瓧 device_width: 璁惧瀹為檯瀹藉害锛堝儚绱狅級 device_height: 璁惧瀹為檯楂樺害锛堝儚绱狅級 use_angle_cls: 鏄惁浣跨敤瑙掑害鍒嗙被鍣? lang: 璇█绫诲瀷 Returns: 杩斿洖鎵€鏈夊尮閰嶄綅缃殑鍒楄〃锛屾瘡涓厓绱犱负鍖呭惈鍧愭爣淇℃伅鐨勫瓧鍏? """ screenshot = Path(screenshot_path) if not screenshot.exists(): raise FileNotFoundError(f"鎴浘鏂囦欢涓嶅瓨鍦? {screenshot_path}") if not target_text or not target_text.strip(): raise ValueError("鐩爣鏂囧瓧涓嶈兘涓虹┖") try: # 璇诲彇鎴浘浠ヨ幏鍙栧疄闄呭昂瀵? img = cv2.imread(str(screenshot)) if img is None: raise ValueError(f"鏃犳硶璇诲彇鎴浘鏂囦欢: {screenshot_path}") screenshot_height, screenshot_width = img.shape[:2] # 璁$畻缂╂斁姣斾緥锛堝鏋滄彁渚涗簡璁惧鍒嗚鲸鐜囷級 scale_x = 1.0 scale_y = 1.0 if device_width is not None and device_height is not None: scale_x = device_width / screenshot_width scale_y = device_height / screenshot_height # 鍒濆鍖?OnnxOCR ocr = ONNXPaddleOcr(use_angle_cls=use_angle_cls, use_gpu=False) # OnnxOCR 闇€瑕佷紶鍏ュ浘鐗囧璞? result = ocr.ocr(img, det=True, rec=True, cls=use_angle_cls) if not result or not result[0]: return [] matches = [] for line in result[0]: if not line: continue bbox = line[0] text_info = line[1] recognized_text = text_info[0] confidence = text_info[1] # 妫€鏌ユ槸鍚﹀尮閰? if target_text in recognized_text or recognized_text in target_text: x_coords = [point[0] for point in bbox] y_coords = [point[1] for point in bbox] # 璁$畻杈圭晫妗嗭紙鍩轰簬鎴浘灏哄锛? min_x = int(min(x_coords)) max_x = int(max(x_coords)) min_y = int(min(y_coords)) max_y = int(max(y_coords)) # 灏嗗潗鏍囪浆鎹㈠埌璁惧鍒嗚鲸鐜囷紙濡傛灉鎻愪緵浜嗚澶囧垎杈ㄧ巼锛? min_x = int(min_x * scale_x) max_x = int(max_x * scale_x) min_y = int(min_y * scale_y) max_y = int(max_y * scale_y) # 杞崲 bbox 鍧愭爣 scaled_bbox = [[int(p[0] * scale_x), int(p[1] * scale_y)] for p in bbox] center_x = int((min_x + max_x) / 2) center_y = int((min_y + max_y) / 2) width = max_x - min_x height = max_y - min_y matches.append({ "found": True, "x": center_x, "y": center_y, "width": width, "height": height, "bbox": scaled_bbox, "text": recognized_text, "confidence": float(confidence) }) return matches except Exception as e: raise RuntimeError(f"OCR 璇嗗埆杩囩▼涓嚭閿? {str(e)}") if __name__ == "__main__": # 娴嬭瘯绀轰緥 if len(sys.argv) < 3: print("鐢ㄦ硶: python string-reg-location.py <鎴浘璺緞> <瑕佹煡鎵剧殑鏂囧瓧> [璁惧瀹藉害] [璁惧楂樺害]") print("绀轰緥: python string-reg-location.py screenshot.png \"浣犲ソ\" 1080 2400") sys.exit(1) screenshot_path = sys.argv[1] target_text = sys.argv[2] device_width = None device_height = None if len(sys.argv) >= 5: try: device_width = int(sys.argv[3]) device_height = int(sys.argv[4]) except ValueError: print("璀﹀憡: 鏃犳硶瑙f瀽璁惧鍒嗚鲸鐜囷紝灏嗕娇鐢ㄦ埅鍥惧師濮嬪昂瀵?) try: result = find_text_location(screenshot_path, target_text, device_width, device_height) if result.get("found"): x = result["x"] y = result["y"] w = result["width"] h = result["height"] print(f"鎵惧埌鏂囧瓧锛佸潗鏍? x={x}, y={y}, 瀹藉害={w}, 楂樺害={h}") print(f"璇嗗埆鐨勬枃瀛? {result.get('text', '')}") print(f"缃俊搴? {result.get('confidence', 0):.2f}") print(f"JSON鏍煎紡: {{\"x\": {x}, \"y\": {y}, \"width\": {w}, \"height\": {h}}}") else: print("鏈壘鍒板尮閰嶇殑鏂囧瓧") except Exception as e: print(f"閿欒: {e}") sys.exit(1)