| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269 |
- """
- 鏂囧瓧璇嗗埆鍜屽畾浣嶆ā鍧?鍔熻兘锛氬湪鎴浘涓煡鎵炬寚瀹氭枃瀛楋紝骞惰繑鍥炴枃瀛楀湪鎴浘涓殑鍧愭爣
- 浣跨敤 OnnxOCR 杩涜鏂囧瓧璇嗗埆
- """
- import sys
- import os
- import cv2
- from pathlib import Path
- from typing import Optional, Tuple, Dict, Any
- # 娣诲姞 OnnxOCR 璺緞鍒?sys.path
- current_dir = Path(__file__).parent
- onnxocr_path = current_dir / 'OnnxOCR'
- if str(onnxocr_path) not in sys.path:
- sys.path.insert(0, str(onnxocr_path))
- from onnxocr.onnx_paddleocr import ONNXPaddleOcr
- # 璁剧疆鐜鍙橀噺锛岃烦杩囨ā鍨嬫簮杩炴帴妫€鏌ワ紙閬垮厤棣栨杩愯鏃惰秴鏃讹級
- os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
- def find_text_location(
- screenshot_path: str,
- target_text: str,
- device_width: int = None,
- device_height: int = None,
- use_angle_cls: bool = True,
- lang: str = 'ch'
- ) -> Optional[Dict[str, Any]]:
- """
- 鍦ㄦ埅鍥句腑鏌ユ壘鐩爣鏂囧瓧骞惰繑鍥炲潗鏍?
- Args:
- screenshot_path: 鎴浘鏂囦欢璺緞
- target_text: 瑕佹煡鎵剧殑鏂囧瓧
- device_width: 璁惧瀹為檯瀹藉害锛堝儚绱狅級锛屽鏋滄彁渚涘垯浼氬皢鍧愭爣杞崲鍒拌澶囧垎杈ㄧ巼
- device_height: 璁惧瀹為檯楂樺害锛堝儚绱狅級锛屽鏋滄彁渚涘垯浼氬皢鍧愭爣杞崲鍒拌澶囧垎杈ㄧ巼
- use_angle_cls: 鏄惁浣跨敤瑙掑害鍒嗙被鍣紝榛樿True (瀵瑰簲 OnnxOCR 鐨?use_angle_cls)
- lang: 璇█绫诲瀷锛?ch'琛ㄧず涓嫳鏂囨贩鍚堬紝'en'琛ㄧず鑻辨枃锛岄粯璁?ch' (OnnxOCR 鍐呴儴澶勭悊)
-
- Returns:
- 濡傛灉鎵惧埌鏂囧瓧锛岃繑鍥炲寘鍚潗鏍囦俊鎭殑瀛楀吀锛? {
- "found": True,
- "x": 涓績鐐箈鍧愭爣,
- "y": 涓績鐐箉鍧愭爣,
- "width": 鏂囧瓧妗嗗搴?
- "height": 鏂囧瓧妗嗛珮搴?
- "bbox": [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] # 鏂囧瓧妗嗙殑鍥涗釜瑙掔偣
- }
- 濡傛灉鏈壘鍒帮紝杩斿洖 {"found": False}
- """
- # 妫€鏌ユ枃浠舵槸鍚﹀瓨鍦? screenshot = Path(screenshot_path)
- if not screenshot.exists():
- raise FileNotFoundError(f"鎴浘鏂囦欢涓嶅瓨鍦? {screenshot_path}")
-
- # 妫€鏌ョ洰鏍囨枃瀛楁槸鍚︿负绌? if not target_text or not target_text.strip():
- raise ValueError("鐩爣鏂囧瓧涓嶈兘涓虹┖")
-
- try:
- # 璇诲彇鎴浘浠ヨ幏鍙栧疄闄呭昂瀵? img = cv2.imread(str(screenshot))
- if img is None:
- raise ValueError(f"鏃犳硶璇诲彇鎴浘鏂囦欢: {screenshot_path}")
-
- screenshot_height, screenshot_width = img.shape[:2]
-
- # 璁$畻缂╂斁姣斾緥锛堝鏋滄彁渚涗簡璁惧鍒嗚鲸鐜囷級
- scale_x = 1.0
- scale_y = 1.0
- if device_width is not None and device_height is not None:
- scale_x = device_width / screenshot_width
- scale_y = device_height / screenshot_height
-
- # 鍒濆鍖?OnnxOCR锛堥娆¤皟鐢ㄥ彲鑳介渶瑕佷竴浜涙椂闂达級
- # OnnxOCR 闇€瑕佷紶鍏ュ浘鐗囧璞★紝鑰屼笉鏄矾寰? ocr = ONNXPaddleOcr(use_angle_cls=use_angle_cls, use_gpu=False)
-
- # 璇嗗埆鎴浘涓殑鎵€鏈夋枃瀛楋紙OnnxOCR 闇€瑕佷紶鍏?cv2 璇诲彇鐨勫浘鐗囧璞★級
- result = ocr.ocr(img, det=True, rec=True, cls=use_angle_cls)
-
- # 濡傛灉璇嗗埆缁撴灉涓虹┖
- if not result or not result[0]:
- return {"found": False}
-
- # 鍦ㄨ瘑鍒粨鏋滀腑鏌ユ壘鐩爣鏂囧瓧
- # result[0] 鏄竴涓垪琛紝姣忎釜鍏冪礌鏄竴琛屾枃瀛楃殑璇嗗埆缁撴灉
- # 鏍煎紡: [[[x1,y1], [x2,y2], [x3,y3], [x4,y4]], (鏂囧瓧鍐呭, 缃俊搴?]
- for line in result[0]:
- if not line:
- continue
-
- # line[0] 鏄洓涓鐐瑰潗鏍? # line[1] 鏄?(鏂囧瓧鍐呭, 缃俊搴?
- bbox = line[0] # 鍥涗釜瑙掔偣: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
- text_info = line[1]
- recognized_text = text_info[0] # 璇嗗埆鐨勬枃瀛楀唴瀹? confidence = text_info[1] # 缃俊搴?
- # 妫€鏌ヨ瘑鍒殑鏂囧瓧鏄惁鍖呭惈鐩爣鏂囧瓧锛堟敮鎸侀儴鍒嗗尮閰嶅拰瀹屽叏鍖归厤锛? # 浣跨敤 in 鎿嶄綔绗︽敮鎸侀儴鍒嗗尮閰嶏紝濡傛灉闇€瑕佸畬鍏ㄥ尮閰嶅彲浠ヤ娇鐢?==
- if target_text in recognized_text or recognized_text in target_text:
- # 鎵惧埌鍖归厤锛岃绠楁枃瀛楁鐨勪腑蹇冪偣鍜屽昂瀵? # bbox 鏄洓涓鐐圭殑鍒楄〃: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
- x_coords = [point[0] for point in bbox]
- y_coords = [point[1] for point in bbox]
-
- # 璁$畻杈圭晫妗嗭紙鍩轰簬鎴浘灏哄锛? min_x = int(min(x_coords))
- max_x = int(max(x_coords))
- min_y = int(min(y_coords))
- max_y = int(max(y_coords))
-
- # 灏嗗潗鏍囪浆鎹㈠埌璁惧鍒嗚鲸鐜囷紙濡傛灉鎻愪緵浜嗚澶囧垎杈ㄧ巼锛? min_x = int(min_x * scale_x)
- max_x = int(max_x * scale_x)
- min_y = int(min_y * scale_y)
- max_y = int(max_y * scale_y)
-
- # 杞崲 bbox 鍧愭爣
- scaled_bbox = [[int(p[0] * scale_x), int(p[1] * scale_y)] for p in bbox]
-
- # 璁$畻涓績鐐癸紙鍩轰簬璁惧鍒嗚鲸鐜囷級
- center_x = int((min_x + max_x) / 2)
- center_y = int((min_y + max_y) / 2)
-
- # 璁$畻瀹藉害鍜岄珮搴︼紙鍩轰簬璁惧鍒嗚鲸鐜囷級
- width = max_x - min_x
- height = max_y - min_y
-
- return {
- "found": True,
- "x": center_x,
- "y": center_y,
- "width": width,
- "height": height,
- "bbox": scaled_bbox, # 宸茶浆鎹㈠埌璁惧鍒嗚鲸鐜囩殑鍧愭爣
- "text": recognized_text, # 瀹為檯璇嗗埆鐨勬枃瀛? "confidence": float(confidence) # 缃俊搴? }
-
- # 鏈壘鍒板尮閰嶇殑鏂囧瓧
- return {"found": False}
-
- except Exception as e:
- raise RuntimeError(f"OCR 璇嗗埆杩囩▼涓嚭閿? {str(e)}")
- def find_text_location_multiple(
- screenshot_path: str,
- target_text: str,
- device_width: int = None,
- device_height: int = None,
- use_angle_cls: bool = True,
- lang: str = 'ch'
- ) -> list:
- """
- 鍦ㄦ埅鍥句腑鏌ユ壘鐩爣鏂囧瓧鐨勬墍鏈夊嚭鐜颁綅缃紙鍙兘鏈夊澶勫尮閰嶏級
-
- Args:
- screenshot_path: 鎴浘鏂囦欢璺緞
- target_text: 瑕佹煡鎵剧殑鏂囧瓧
- device_width: 璁惧瀹為檯瀹藉害锛堝儚绱狅級
- device_height: 璁惧瀹為檯楂樺害锛堝儚绱狅級
- use_angle_cls: 鏄惁浣跨敤瑙掑害鍒嗙被鍣? lang: 璇█绫诲瀷
-
- Returns:
- 杩斿洖鎵€鏈夊尮閰嶄綅缃殑鍒楄〃锛屾瘡涓厓绱犱负鍖呭惈鍧愭爣淇℃伅鐨勫瓧鍏? """
- screenshot = Path(screenshot_path)
- if not screenshot.exists():
- raise FileNotFoundError(f"鎴浘鏂囦欢涓嶅瓨鍦? {screenshot_path}")
-
- if not target_text or not target_text.strip():
- raise ValueError("鐩爣鏂囧瓧涓嶈兘涓虹┖")
-
- try:
- # 璇诲彇鎴浘浠ヨ幏鍙栧疄闄呭昂瀵? img = cv2.imread(str(screenshot))
- if img is None:
- raise ValueError(f"鏃犳硶璇诲彇鎴浘鏂囦欢: {screenshot_path}")
-
- screenshot_height, screenshot_width = img.shape[:2]
-
- # 璁$畻缂╂斁姣斾緥锛堝鏋滄彁渚涗簡璁惧鍒嗚鲸鐜囷級
- scale_x = 1.0
- scale_y = 1.0
- if device_width is not None and device_height is not None:
- scale_x = device_width / screenshot_width
- scale_y = device_height / screenshot_height
-
- # 鍒濆鍖?OnnxOCR
- ocr = ONNXPaddleOcr(use_angle_cls=use_angle_cls, use_gpu=False)
- # OnnxOCR 闇€瑕佷紶鍏ュ浘鐗囧璞? result = ocr.ocr(img, det=True, rec=True, cls=use_angle_cls)
-
- if not result or not result[0]:
- return []
-
- matches = []
- for line in result[0]:
- if not line:
- continue
-
- bbox = line[0]
- text_info = line[1]
- recognized_text = text_info[0]
- confidence = text_info[1]
-
- # 妫€鏌ユ槸鍚﹀尮閰? if target_text in recognized_text or recognized_text in target_text:
- x_coords = [point[0] for point in bbox]
- y_coords = [point[1] for point in bbox]
-
- # 璁$畻杈圭晫妗嗭紙鍩轰簬鎴浘灏哄锛? min_x = int(min(x_coords))
- max_x = int(max(x_coords))
- min_y = int(min(y_coords))
- max_y = int(max(y_coords))
-
- # 灏嗗潗鏍囪浆鎹㈠埌璁惧鍒嗚鲸鐜囷紙濡傛灉鎻愪緵浜嗚澶囧垎杈ㄧ巼锛? min_x = int(min_x * scale_x)
- max_x = int(max_x * scale_x)
- min_y = int(min_y * scale_y)
- max_y = int(max_y * scale_y)
-
- # 杞崲 bbox 鍧愭爣
- scaled_bbox = [[int(p[0] * scale_x), int(p[1] * scale_y)] for p in bbox]
-
- center_x = int((min_x + max_x) / 2)
- center_y = int((min_y + max_y) / 2)
- width = max_x - min_x
- height = max_y - min_y
-
- matches.append({
- "found": True,
- "x": center_x,
- "y": center_y,
- "width": width,
- "height": height,
- "bbox": scaled_bbox,
- "text": recognized_text,
- "confidence": float(confidence)
- })
-
- return matches
-
- except Exception as e:
- raise RuntimeError(f"OCR 璇嗗埆杩囩▼涓嚭閿? {str(e)}")
- if __name__ == "__main__":
- # 娴嬭瘯绀轰緥
- if len(sys.argv) < 3:
- print("鐢ㄦ硶: python string-reg-location.py <鎴浘璺緞> <瑕佹煡鎵剧殑鏂囧瓧> [璁惧瀹藉害] [璁惧楂樺害]")
- print("绀轰緥: python string-reg-location.py screenshot.png \"浣犲ソ\" 1080 2400")
- sys.exit(1)
-
- screenshot_path = sys.argv[1]
- target_text = sys.argv[2]
-
- device_width = None
- device_height = None
- if len(sys.argv) >= 5:
- try:
- device_width = int(sys.argv[3])
- device_height = int(sys.argv[4])
- except ValueError:
- print("璀﹀憡: 鏃犳硶瑙f瀽璁惧鍒嗚鲸鐜囷紝灏嗕娇鐢ㄦ埅鍥惧師濮嬪昂瀵?)
-
- try:
- result = find_text_location(screenshot_path, target_text, device_width, device_height)
- if result.get("found"):
- x = result["x"]
- y = result["y"]
- w = result["width"]
- h = result["height"]
- print(f"鎵惧埌鏂囧瓧锛佸潗鏍? x={x}, y={y}, 瀹藉害={w}, 楂樺害={h}")
- print(f"璇嗗埆鐨勬枃瀛? {result.get('text', '')}")
- print(f"缃俊搴? {result.get('confidence', 0):.2f}")
- print(f"JSON鏍煎紡: {{\"x\": {x}, \"y\": {y}, \"width\": {w}, \"height\": {h}}}")
- else:
- print("鏈壘鍒板尮閰嶇殑鏂囧瓧")
- except Exception as e:
- print(f"閿欒: {e}")
- sys.exit(1)
|