ocr-chat.js 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744
  1. /**
  2. * OCR 聊天记录提取功能(Node.js 实现)
  3. *
  4. * 功能:根据屏幕截图提取聊天记录和聊天角色,并输出为 JSON 格式
  5. * - 使用 OCR 识别聊天内容
  6. * - 根据头像位置或消息位置识别发送者角色(friend/me)
  7. * - 返回 JSON 格式的消息数组,每条消息包含 sender 和 text 字段
  8. *
  9. * 实现方式:直接调用 Python 的 OnnxOCR 和 OpenCV,通过内联 Python 代码实现
  10. */
  11. import { exec } from 'child_process';
  12. import { promisify } from 'util';
  13. import { join, isAbsolute } from 'path';
  14. import { fileURLToPath } from 'url';
  15. import { dirname } from 'path';
  16. import { readFile, writeFile } from 'fs/promises';
  17. const execAsync = promisify(exec);
  18. const __filename = fileURLToPath(import.meta.url);
  19. const __dirname = dirname(__filename);
  20. /**
  21. * 安全读取包含 Unicode 字符的图片
  22. */
  23. function readImageSafe(imagePath) {
  24. return `
  25. import cv2
  26. import numpy as np
  27. import os
  28. def read_image_safe(image_path):
  29. abs_path = os.path.abspath(str(image_path))
  30. try:
  31. with open(abs_path, 'rb') as f:
  32. image_data = f.read()
  33. img_array = np.frombuffer(image_data, np.uint8)
  34. img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
  35. if img is None:
  36. raise ValueError(f"cv2.imdecode 无法解码图片: {abs_path}")
  37. return img
  38. except FileNotFoundError:
  39. raise FileNotFoundError(f"图片文件不存在: {abs_path}")
  40. except Exception as e:
  41. raise Exception(f"读取图片失败: {abs_path}, 错误: {str(e)}")
  42. `;
  43. }
  44. /**
  45. * 根据屏幕截图提取聊天记录和聊天角色
  46. *
  47. * @param {string} screenshotPath - 截图路径(屏幕截图文件)
  48. * @param {string} friendAvatarPath - 好友头像路径(可选,用于识别发送者角色)
  49. * @param {string} myAvatarPath - 我的头像路径(可选,用于识别发送者角色)
  50. * @param {number} deviceWidth - 设备宽度(可选,用于位置判断)
  51. * @param {number} deviceHeight - 设备高度(可选,用于位置判断)
  52. * @param {string} workflowFolder - 工作流文件夹路径(可选)
  53. * @param {string} regionJson - 识别区域 JSON 字符串(可选,包含四个顶点坐标的 corners 对象)
  54. * @returns {Promise<{success: boolean, messages?: Array<{sender: 'friend'|'me'|'unknown', text: string}>, messagesText?: string, error?: string}>}
  55. *
  56. * messages: JSON 格式的消息数组,每条消息包含:
  57. * - sender: 发送者角色('friend' 表示好友,'me' 表示自己,'unknown' 表示无法识别)
  58. * - text: 消息文本内容
  59. */
  60. export async function extractChatHistory(screenshotPath, friendAvatarPath, myAvatarPath, deviceWidth, deviceHeight, workflowFolder, regionJson = null, friendRgb = null, myRgb = null) {
  61. try {
  62. const pythonExePath = join(__dirname, '..', '..', 'py', 'venv', 'Scripts', 'python.exe');
  63. const onnxocrPath = join(__dirname, '..', '..', 'py', 'OnnxOCR');
  64. // 构建内联 Python 脚本
  65. const pythonCode = `
  66. import sys
  67. import os
  68. import cv2
  69. import numpy as np
  70. import json
  71. from pathlib import Path
  72. # 添加 OnnxOCR 路径
  73. sys.path.insert(0, r"${onnxocrPath.replace(/\\/g, '/')}")
  74. from onnxocr.onnx_paddleocr import ONNXPaddleOcr
  75. # 设置环境变量
  76. os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
  77. ${readImageSafe()}
  78. def find_avatar_positions(screenshot_path, friend_avatar_path, my_avatar_path):
  79. """在截图中查找头像位置"""
  80. screenshot = read_image_safe(screenshot_path)
  81. result = {'friend': [], 'my': []}
  82. if friend_avatar_path and friend_avatar_path != 'None':
  83. try:
  84. friend_avatar = read_image_safe(friend_avatar_path)
  85. result_friend = cv2.matchTemplate(screenshot, friend_avatar, cv2.TM_CCOEFF_NORMED)
  86. locations_friend = np.where(result_friend >= 0.8)
  87. for pt in zip(*locations_friend[::-1]):
  88. result['friend'].append([int(pt[0]), int(pt[1])])
  89. except Exception as e:
  90. print(f"查找好友头像失败: {e}", file=sys.stderr)
  91. if my_avatar_path and my_avatar_path != 'None':
  92. try:
  93. my_avatar = read_image_safe(my_avatar_path)
  94. result_my = cv2.matchTemplate(screenshot, my_avatar, cv2.TM_CCOEFF_NORMED)
  95. locations_my = np.where(result_my >= 0.8)
  96. for pt in zip(*locations_my[::-1]):
  97. result['my'].append([int(pt[0]), int(pt[1])])
  98. except Exception as e:
  99. print(f"查找我的头像失败: {e}", file=sys.stderr)
  100. return result
  101. def detect_bubble_color(screenshot, box):
  102. """检测文本框区域的主要颜色(RGB)"""
  103. try:
  104. # 获取文本框的边界框
  105. x_coords = [point[0] for point in box]
  106. y_coords = [point[1] for point in box]
  107. x_min, x_max = int(min(x_coords)), int(max(x_coords))
  108. y_min, y_max = int(min(y_coords)), int(max(y_coords))
  109. # 确保坐标在图片范围内
  110. x_min = max(0, x_min)
  111. y_min = max(0, y_min)
  112. x_max = min(screenshot.shape[1] - 1, x_max)
  113. y_max = min(screenshot.shape[0] - 1, y_max)
  114. if x_max <= x_min or y_max <= y_min:
  115. return None
  116. # 提取文本框区域(扩大一点范围以包含气泡背景)
  117. # 向上和向下各扩展10像素,向左和向右各扩展5像素
  118. expand_x = 5
  119. expand_y = 10
  120. x_start = max(0, x_min - expand_x)
  121. y_start = max(0, y_min - expand_y)
  122. x_end = min(screenshot.shape[1], x_max + expand_x)
  123. y_end = min(screenshot.shape[0], y_max + expand_y)
  124. bubble_region = screenshot[y_start:y_end, x_start:x_end]
  125. if bubble_region.size == 0:
  126. return None
  127. # 计算区域的平均RGB值
  128. # OpenCV使用BGR格式,需要转换为RGB
  129. avg_bgr = np.mean(bubble_region.reshape(-1, 3), axis=0)
  130. avg_rgb = [int(avg_bgr[2]), int(avg_bgr[1]), int(avg_bgr[0])] # BGR -> RGB
  131. return avg_rgb
  132. except Exception as e:
  133. return None
  134. def match_rgb_color(actual_rgb, target_rgb, tolerance=30):
  135. """判断实际RGB是否匹配目标RGB(允许容差)"""
  136. if actual_rgb is None or target_rgb is None:
  137. return False
  138. return all(abs(actual_rgb[i] - target_rgb[i]) <= tolerance for i in range(3))
  139. def extract_chat_history(screenshot_path, friend_avatar_path, my_avatar_path, device_width, device_height, workflow_folder, region_json=None, friend_rgb=None, my_rgb=None):
  140. """提取完整的聊天记录"""
  141. try:
  142. original_screenshot = read_image_safe(screenshot_path)
  143. if original_screenshot is None:
  144. return {'success': False, 'error': '无法读取截图文件'}
  145. # 如果提供了区域,先裁剪图片,然后再进行OCR识别
  146. # 这样可以确保只识别指定区域,避免识别到键盘和导航栏
  147. crop_offset_x = 0
  148. crop_offset_y = 0
  149. screenshot = original_screenshot
  150. original_height = original_screenshot.shape[0]
  151. original_width = original_screenshot.shape[1]
  152. if region_json and region_json != 'None':
  153. try:
  154. region = json.loads(region_json)
  155. # 区域格式:corners 对象,包含 topLeft, topRight, bottomLeft, bottomRight
  156. if isinstance(region, dict) and 'topLeft' in region and 'bottomRight' in region:
  157. top_left = region['topLeft']
  158. bottom_right = region['bottomRight']
  159. x1 = int(top_left.get('x', 0))
  160. y1 = int(top_left.get('y', 0))
  161. x2 = int(bottom_right.get('x', original_width))
  162. y2 = int(bottom_right.get('y', original_height))
  163. # 确保坐标在图片范围内,并且 x2 > x1, y2 > y1
  164. x1 = max(0, min(x1, original_width - 1))
  165. y1 = max(0, min(y1, original_height - 1))
  166. x2 = max(x1 + 1, min(x2, original_width))
  167. y2 = max(y1 + 1, min(y2, original_height))
  168. # 验证裁剪区域是否有效
  169. if x2 > x1 and y2 > y1:
  170. # 保存裁剪偏移量(用于调整头像位置)
  171. crop_offset_x = x1
  172. crop_offset_y = y1
  173. # 裁剪图片:使用 numpy 数组切片 [y1:y2, x1:x2]
  174. screenshot = original_screenshot[y1:y2, x1:x2]
  175. # 验证裁剪后的图片是否有效
  176. if screenshot is not None and screenshot.size > 0:
  177. # 保存裁剪后的图片到工作流目录下的 tmp 目录,用于调试
  178. try:
  179. import datetime
  180. # 获取工作流目录下的 tmp 目录路径
  181. # 方法1: 从 workflow_folder 推断(如果提供)
  182. if workflow_folder and workflow_folder != 'None':
  183. workflow_path = Path(workflow_folder)
  184. # workflow_folder 通常是 static/processing/xxx 格式的绝对路径
  185. # tmp 目录应该在工作流目录下:static/processing/xxx/tmp
  186. tmp_dir = workflow_path / 'tmp'
  187. else:
  188. # 方法2: 从截图路径推断(向后兼容)
  189. screenshot_path_obj = Path(screenshot_path)
  190. # 尝试向上查找工作流目录(包含 tmp 目录的父目录)
  191. current = screenshot_path_obj.parent
  192. tmp_dir = None
  193. for _ in range(5): # 最多向上查找5层
  194. if (current / 'tmp').exists():
  195. tmp_dir = current / 'tmp'
  196. break
  197. # 检查是否是工作流目录(包含 processing.json)
  198. if (current / 'processing.json').exists():
  199. tmp_dir = current / 'tmp'
  200. break
  201. current = current.parent
  202. if tmp_dir is None:
  203. # 如果找不到,使用截图目录的父目录下的 tmp
  204. tmp_dir = screenshot_path_obj.parent / 'tmp'
  205. os.makedirs(str(tmp_dir), exist_ok=True)
  206. timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S_%f')
  207. cropped_image_path = tmp_dir / f'cropped_region_{timestamp}.png'
  208. cv2.imwrite(str(cropped_image_path), screenshot)
  209. # 清理工作流目录下的 tmp 目录:如果总大小超过 20MB,删除时间最早的文件
  210. try:
  211. max_size = 20 * 1024 * 1024 # 20MB
  212. files = []
  213. if tmp_dir.exists():
  214. for file_path in tmp_dir.iterdir():
  215. if file_path.is_file():
  216. file_stat = file_path.stat()
  217. files.append({
  218. 'path': file_path,
  219. 'size': file_stat.st_size,
  220. 'mtime': file_stat.st_mtime
  221. })
  222. # 计算总大小
  223. total_size = sum(f['size'] for f in files)
  224. # 如果总大小超过 20MB,删除时间最早的文件
  225. if total_size > max_size:
  226. # 按修改时间排序(最早的在前)
  227. files.sort(key=lambda x: x['mtime'])
  228. # 删除最早的文件直到总大小小于 20MB
  229. for file_info in files:
  230. if total_size <= max_size:
  231. break
  232. try:
  233. file_info['path'].unlink()
  234. total_size -= file_info['size']
  235. except Exception as del_error:
  236. pass
  237. except Exception as cleanup_error:
  238. pass # 清理失败不影响主流程
  239. print(f"Python: 裁剪后的图片已保存到: {cropped_image_path}", file=sys.stderr)
  240. print(f"Python: 裁剪区域 - x1={x1}, y1={y1}, x2={x2}, y2={y2}, 原始尺寸={original_width}x{original_height}, 裁剪后尺寸={screenshot.shape[1]}x{screenshot.shape[0]}", file=sys.stderr)
  241. except Exception as save_error:
  242. print(f"Python: 保存裁剪图片失败: {save_error}", file=sys.stderr)
  243. import traceback
  244. traceback.print_exc(file=sys.stderr)
  245. else:
  246. screenshot = original_screenshot
  247. crop_offset_x = 0
  248. crop_offset_y = 0
  249. except Exception as e:
  250. print(f"Python: 区域裁剪异常: {e}", file=sys.stderr)
  251. import traceback
  252. traceback.print_exc(file=sys.stderr)
  253. # 查找头像位置
  254. # 如果图片被裁剪了,在裁剪后的图片上查找头像(需要临时保存)
  255. # 否则在原始截图上查找
  256. import tempfile
  257. temp_cropped_path = None
  258. try:
  259. if crop_offset_x > 0 or crop_offset_y > 0:
  260. # 如果图片被裁剪了,需要临时保存裁剪后的图片用于头像匹配
  261. temp_cropped_path = tempfile.mktemp(suffix='.png')
  262. cv2.imwrite(temp_cropped_path, screenshot)
  263. avatar_positions = find_avatar_positions(temp_cropped_path, friend_avatar_path, my_avatar_path)
  264. else:
  265. # 使用原始截图查找头像
  266. avatar_positions = find_avatar_positions(screenshot_path, friend_avatar_path, my_avatar_path)
  267. finally:
  268. # 清理临时文件
  269. if temp_cropped_path and os.path.exists(temp_cropped_path):
  270. try:
  271. os.remove(temp_cropped_path)
  272. except:
  273. pass
  274. # 获取 OCR 实例
  275. ocr = ONNXPaddleOcr(use_angle_cls=False, use_gpu=True)
  276. # 执行 OCR(cls=False 避免角度分类器警告)
  277. # 如果提供了区域,在裁剪后的图片上识别;否则全屏识别
  278. ocr_result = ocr.ocr(screenshot, cls=False)
  279. if not ocr_result or not ocr_result[0]:
  280. return {'success': False, 'error': 'OCR 识别失败'}
  281. # 解析 OCR 结果,按 y 坐标分组消息
  282. messages = []
  283. friend_positions = avatar_positions.get('friend', [])
  284. my_positions = avatar_positions.get('my', [])
  285. # 获取截图高度(如果被裁剪了,使用裁剪后的高度;否则使用原始高度)
  286. screenshot_height = screenshot.shape[0]
  287. screenshot_width = screenshot.shape[1]
  288. # 计算键盘区域的阈值:通常键盘在屏幕底部,占屏幕高度的30-40%
  289. # 如果提供了区域裁剪,说明用户已经指定了识别区域,应该信任这个区域,不进行键盘过滤
  290. # 或者使用更宽松的阈值(90%),只过滤最底部的内容
  291. if region_json and region_json != 'None':
  292. # 如果提供了识别区域,使用更宽松的阈值(90%),几乎不过滤
  293. # 因为用户已经通过区域限制了识别范围
  294. keyboard_threshold_y = int(screenshot_height * 0.90) # 从90%的位置开始过滤
  295. else:
  296. # 如果没有提供区域,使用原来的65%阈值
  297. keyboard_threshold_y = int(screenshot_height * 0.65) # 从65%的位置开始过滤
  298. # 简单的消息分组逻辑:根据 y 坐标和头像位置判断发送者
  299. for line in ocr_result[0]:
  300. if not line:
  301. continue
  302. box = line[0]
  303. text = line[1][0] if len(line) > 1 and line[1] else ''
  304. confidence = line[1][1] if len(line) > 1 and len(line[1]) > 1 else 0.0
  305. if not text or confidence < 0.5:
  306. continue
  307. # 计算消息框的中心坐标
  308. x_center = sum([point[0] for point in box]) / len(box)
  309. y_center = sum([point[1] for point in box]) / len(box)
  310. # 过滤键盘区域:如果y坐标超过阈值,直接跳过
  311. if y_center > keyboard_threshold_y:
  312. continue
  313. # 过滤明显是键盘按键的文本(即使y坐标在阈值内,也要过滤)
  314. keyboard_keywords = ['ABC', 'DEF', 'GHI', 'JKL', 'MNO', 'PQRS', 'TUV', 'WXYZ',
  315. '分词', '重输', '换行', '符', '中/英', '中二', '123', '0', '1', '2', '3',
  316. '4', '5', '6', '7', '8', '9', 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I',
  317. 'O', 'P', 'A', 'S', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'Z', 'X', 'C',
  318. 'V', 'B', 'N', 'M', '△', '三', '-']
  319. if text.strip() in keyboard_keywords:
  320. continue
  321. # 判断发送者(优先使用RGB颜色判断,如果失败则使用头像距离判断,最后使用 x 坐标判断)
  322. sender = 'unknown'
  323. # 方法1: 使用RGB颜色判断(最高优先级)
  324. if friend_rgb and my_rgb:
  325. try:
  326. bubble_rgb = detect_bubble_color(screenshot, box)
  327. if bubble_rgb:
  328. # 判断是否匹配好友颜色(白色/浅色)
  329. if match_rgb_color(bubble_rgb, friend_rgb, tolerance=40):
  330. sender = 'friend'
  331. # 判断是否匹配我的颜色(绿色)
  332. elif match_rgb_color(bubble_rgb, my_rgb, tolerance=40):
  333. sender = 'me'
  334. except Exception as e:
  335. pass # RGB检测失败,继续使用其他方法
  336. # 方法2: 如果RGB判断失败,使用头像位置距离判断
  337. if sender == 'unknown':
  338. min_friend_dist = float('inf')
  339. min_my_dist = float('inf')
  340. for fx, fy in friend_positions:
  341. dist = abs(y_center - (fy + 20)) # 假设头像高度约 40px
  342. if dist < min_friend_dist:
  343. min_friend_dist = dist
  344. for mx, my in my_positions:
  345. dist = abs(y_center - (my + 20))
  346. if dist < min_my_dist:
  347. min_my_dist = dist
  348. # 优先使用距离判断(阈值100像素)
  349. if min_friend_dist < 100 and min_friend_dist < min_my_dist:
  350. sender = 'friend'
  351. elif min_my_dist < 100 and min_my_dist < min_friend_dist:
  352. sender = 'me'
  353. else:
  354. # 距离判断失败,使用备选方法
  355. screen_center_x = device_width / 2
  356. # 如果两个头像都没找到,直接使用 x 坐标判断
  357. if not friend_positions and not my_positions:
  358. if x_center < screen_center_x:
  359. sender = 'friend' # 左侧通常是好友
  360. else:
  361. sender = 'me' # 右侧通常是"我"
  362. # 如果只找到好友头像,放宽阈值到150像素
  363. elif friend_positions and not my_positions:
  364. if min_friend_dist < 150:
  365. sender = 'friend'
  366. elif x_center < screen_center_x:
  367. sender = 'friend'
  368. else:
  369. sender = 'me'
  370. # 如果只找到我的头像,放宽阈值到150像素
  371. elif my_positions and not friend_positions:
  372. if min_my_dist < 150:
  373. sender = 'me'
  374. elif x_center >= screen_center_x:
  375. sender = 'me'
  376. else:
  377. sender = 'friend'
  378. # 如果两个头像都找到了但距离判断失败,使用 x 坐标判断
  379. else:
  380. if x_center < screen_center_x:
  381. sender = 'friend'
  382. else:
  383. sender = 'me'
  384. messages.append({
  385. 'text': text,
  386. 'sender': sender,
  387. 'y': int(y_center),
  388. 'confidence': float(confidence)
  389. })
  390. # 按 y 坐标排序(从上到下)
  391. messages.sort(key=lambda m: m['y'])
  392. # 格式化消息文本
  393. messages_text = '\\n'.join([f"{'对方' if m['sender'] == 'friend' else '我' if m['sender'] == 'me' else '未知'}: {m['text']}" for m in messages])
  394. result = {
  395. 'success': True,
  396. 'messages': messages,
  397. 'messagesText': messages_text,
  398. 'count': len(messages)
  399. }
  400. return result
  401. except Exception as e:
  402. return {'success': False, 'error': f'提取聊天记录失败: {str(e)}'}
  403. # 主逻辑
  404. if __name__ == '__main__':
  405. import sys
  406. screenshot_path = sys.argv[1]
  407. friend_avatar_path = sys.argv[2] if len(sys.argv) > 2 and sys.argv[2] != 'None' else None
  408. my_avatar_path = sys.argv[3] if len(sys.argv) > 3 and sys.argv[3] != 'None' else None
  409. device_width = int(sys.argv[4]) if len(sys.argv) > 4 and sys.argv[4] else 1080
  410. device_height = int(sys.argv[5]) if len(sys.argv) > 5 and sys.argv[5] else 2400
  411. workflow_folder = sys.argv[6] if len(sys.argv) > 6 and sys.argv[6] != 'None' else None
  412. region_json = sys.argv[7] if len(sys.argv) > 7 and sys.argv[7] != 'None' else None
  413. friend_rgb_str = sys.argv[8] if len(sys.argv) > 8 and sys.argv[8] != 'None' else None
  414. my_rgb_str = sys.argv[9] if len(sys.argv) > 9 and sys.argv[9] != 'None' else None
  415. # 解析RGB字符串(格式:"(r,g,b)")
  416. friend_rgb = None
  417. my_rgb = None
  418. if friend_rgb_str and friend_rgb_str != 'None':
  419. try:
  420. # 解析格式 "(r,g,b)" 或 "(r, g, b)"
  421. friend_rgb_str = friend_rgb_str.strip().strip('()')
  422. parts = [int(x.strip()) for x in friend_rgb_str.split(',')]
  423. if len(parts) == 3:
  424. friend_rgb = parts
  425. except Exception as e:
  426. print(f"Python: 解析好友RGB失败: {e}", file=sys.stderr)
  427. if my_rgb_str and my_rgb_str != 'None':
  428. try:
  429. # 解析格式 "(r,g,b)" 或 "(r, g, b)"
  430. my_rgb_str = my_rgb_str.strip().strip('()')
  431. parts = [int(x.strip()) for x in my_rgb_str.split(',')]
  432. if len(parts) == 3:
  433. my_rgb = parts
  434. except Exception as e:
  435. print(f"Python: 解析我的RGB失败: {e}", file=sys.stderr)
  436. # 打印接收到的参数(用于调试)
  437. print(f"Python: 接收到的参数 - region_json={'已提供' if region_json and region_json != 'None' else '未提供'}, friend_rgb={friend_rgb}, my_rgb={my_rgb}", file=sys.stderr)
  438. result = extract_chat_history(screenshot_path, friend_avatar_path, my_avatar_path, device_width, device_height, workflow_folder, region_json, friend_rgb, my_rgb)
  439. print(json.dumps(result, ensure_ascii=False))
  440. `;
  441. // 将 Python 代码写入临时文件
  442. const tempScriptPath = join(__dirname, '..', '..', 'temp_extract_chat_history.py');
  443. await writeFile(tempScriptPath, pythonCode, 'utf8');
  444. // 构建命令
  445. const normalizedScreenshotPath = screenshotPath.replace(/\\/g, '/');
  446. let friendAvatarArg = 'None';
  447. if (friendAvatarPath) {
  448. friendAvatarArg = isAbsolute(friendAvatarPath)
  449. ? friendAvatarPath.replace(/\\/g, '/')
  450. : join(__dirname, '..', '..', 'static', 'processing', friendAvatarPath).replace(/\\/g, '/');
  451. }
  452. let myAvatarArg = 'None';
  453. if (myAvatarPath) {
  454. myAvatarArg = isAbsolute(myAvatarPath)
  455. ? myAvatarPath.replace(/\\/g, '/')
  456. : join(__dirname, '..', '..', 'static', 'processing', myAvatarPath).replace(/\\/g, '/');
  457. }
  458. let workflowFolderArg = 'None';
  459. if (workflowFolder) {
  460. workflowFolderArg = isAbsolute(workflowFolder)
  461. ? workflowFolder.replace(/\\/g, '/')
  462. : join(__dirname, '..', '..', 'static', 'processing', workflowFolder).replace(/\\/g, '/');
  463. }
  464. // 传递区域参数(如果提供)
  465. let regionArg = 'None';
  466. if (regionJson && regionJson !== 'None') {
  467. regionArg = regionJson.replace(/"/g, '\\"');
  468. }
  469. // 传递RGB参数(如果提供)
  470. let friendRgbArg = 'None';
  471. if (friendRgb && typeof friendRgb === 'string') {
  472. friendRgbArg = friendRgb;
  473. }
  474. let myRgbArg = 'None';
  475. if (myRgb && typeof myRgb === 'string') {
  476. myRgbArg = myRgb;
  477. }
  478. const command = `"${pythonExePath}" "${tempScriptPath}" "${normalizedScreenshotPath}" "${friendAvatarArg}" "${myAvatarArg}" ${deviceWidth || 1080} ${deviceHeight || 2400} "${workflowFolderArg}" "${regionArg}" "${friendRgbArg}" "${myRgbArg}"`;
  479. const env = {
  480. ...process.env,
  481. DISABLE_MODEL_SOURCE_CHECK: 'True'
  482. };
  483. const { stdout, stderr } = await execAsync(command, {
  484. timeout: 60000,
  485. maxBuffer: 10 * 1024 * 1024,
  486. cwd: join(__dirname, '..', '..'),
  487. encoding: 'utf8',
  488. env: { ...env, PYTHONIOENCODING: 'utf-8', PYTHONUTF8: '1' }
  489. });
  490. // 打印 Python 脚本的 stderr 输出
  491. if (stderr && stderr.trim()) {
  492. try {
  493. const decodedStderr = Buffer.from(stderr, 'utf8').toString('utf8');
  494. console.log(decodedStderr.trim());
  495. } catch (e) {
  496. console.log(stderr.trim());
  497. }
  498. }
  499. // 清理临时文件
  500. try {
  501. await import('fs/promises').then(fs => fs.unlink(tempScriptPath));
  502. } catch (e) {
  503. // 忽略删除失败
  504. }
  505. // 解析输出
  506. const cleanStdout = stdout.replace(/\[33m.*?\[0m/g, '').replace(/DeprecationWarning.*?\n/g, '');
  507. try {
  508. const result = JSON.parse(cleanStdout.trim());
  509. return result;
  510. } catch (parseError) {
  511. console.error('聊天记录解析失败:', parseError);
  512. console.error('原始输出:', cleanStdout);
  513. return { success: false, error: `解析聊天记录失败: ${parseError.message}` };
  514. }
  515. } catch (error) {
  516. console.error('提取聊天记录失败:', error);
  517. if (error.message && error.message.includes('timeout')) {
  518. return { success: false, error: '提取聊天记录超时,请检查网络连接或稍后重试' };
  519. }
  520. return { success: false, error: error.message };
  521. }
  522. }
  523. /**
  524. * 获取最后一条消息
  525. */
  526. export async function getLastMessage(screenshotPath, friendAvatarPath, myAvatarPath, deviceWidth, deviceHeight) {
  527. try {
  528. // 先提取完整聊天记录
  529. const result = await extractChatHistory(screenshotPath, friendAvatarPath, myAvatarPath, deviceWidth, deviceHeight, null);
  530. if (!result.success || !result.messages || result.messages.length === 0) {
  531. return { success: false, error: '未找到消息' };
  532. }
  533. // 获取最后一条消息(y 坐标最大的)
  534. const lastMessage = result.messages.reduce((max, msg) => msg.y > max.y ? msg : max, result.messages[0]);
  535. return {
  536. success: true,
  537. text: lastMessage.text,
  538. sender: lastMessage.sender,
  539. position: { y: lastMessage.y }
  540. };
  541. } catch (error) {
  542. return { success: false, error: error.message };
  543. }
  544. }
  545. /**
  546. * 全屏 OCR 识别
  547. */
  548. export async function ocrFullScreen(screenshotPath, deviceWidth, deviceHeight) {
  549. try {
  550. const pythonExePath = join(__dirname, '..', '..', 'py', 'venv', 'Scripts', 'python.exe');
  551. const onnxocrPath = join(__dirname, '..', '..', 'py', 'OnnxOCR');
  552. const pythonCode = `
  553. import sys
  554. import os
  555. import cv2
  556. import numpy as np
  557. import json
  558. # 添加 OnnxOCR 路径
  559. sys.path.insert(0, r"${onnxocrPath.replace(/\\/g, '/')}")
  560. from onnxocr.onnx_paddleocr import ONNXPaddleOcr
  561. # 设置环境变量
  562. os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
  563. def read_image_safe(image_path):
  564. abs_path = os.path.abspath(str(image_path))
  565. try:
  566. with open(abs_path, 'rb') as f:
  567. image_data = f.read()
  568. img_array = np.frombuffer(image_data, np.uint8)
  569. img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
  570. if img is None:
  571. raise ValueError(f"cv2.imdecode 无法解码图片: {abs_path}")
  572. return img
  573. except FileNotFoundError:
  574. raise FileNotFoundError(f"图片文件不存在: {abs_path}")
  575. except Exception as e:
  576. raise Exception(f"读取图片失败: {abs_path}, 错误: {str(e)}")
  577. # 主逻辑
  578. if __name__ == '__main__':
  579. screenshot_path = sys.argv[1]
  580. try:
  581. screenshot = read_image_safe(screenshot_path)
  582. if screenshot is None:
  583. print(json.dumps({'success': False, 'error': '无法读取截图文件'}, ensure_ascii=False))
  584. sys.exit(1)
  585. ocr = ONNXPaddleOcr(use_angle_cls=False, use_gpu=True)
  586. ocr_result = ocr.ocr(screenshot, cls=False)
  587. if not ocr_result or not ocr_result[0]:
  588. print(json.dumps({'success': False, 'error': 'OCR 识别失败'}, ensure_ascii=False))
  589. sys.exit(1)
  590. # 提取所有文本
  591. texts = []
  592. for line in ocr_result[0]:
  593. if line and len(line) > 1:
  594. text = line[1][0] if isinstance(line[1], (list, tuple)) else str(line[1])
  595. if text:
  596. texts.append(text)
  597. full_text = '\\n'.join(texts)
  598. print(json.dumps({
  599. 'success': True,
  600. 'text': full_text,
  601. 'position': None
  602. }, ensure_ascii=False))
  603. except Exception as e:
  604. print(json.dumps({'success': False, 'error': f'OCR 识别失败: {str(e)}'}, ensure_ascii=False))
  605. sys.exit(1)
  606. `;
  607. // 将 Python 代码写入临时文件
  608. const tempScriptPath = join(__dirname, '..', '..', 'temp_ocr_full_screen.py');
  609. await writeFile(tempScriptPath, pythonCode, 'utf8');
  610. const normalizedScreenshotPath = screenshotPath.replace(/\\/g, '/');
  611. const command = `"${pythonExePath}" "${tempScriptPath}" "${normalizedScreenshotPath}"`;
  612. const env = {
  613. ...process.env,
  614. DISABLE_MODEL_SOURCE_CHECK: 'True'
  615. };
  616. const { stdout, stderr } = await execAsync(command, {
  617. timeout: 60000,
  618. maxBuffer: 10 * 1024 * 1024,
  619. cwd: join(__dirname, '..', '..'),
  620. encoding: 'utf8',
  621. env: { ...env, PYTHONIOENCODING: 'utf-8', PYTHONUTF8: '1' }
  622. });
  623. // 清理临时文件
  624. try {
  625. await import('fs/promises').then(fs => fs.unlink(tempScriptPath));
  626. } catch (e) {
  627. // 忽略删除失败
  628. }
  629. const cleanStdout = stdout.replace(/\[33m.*?\[0m/g, '').replace(/DeprecationWarning.*?\n/g, '');
  630. try {
  631. const result = JSON.parse(cleanStdout.trim());
  632. return result;
  633. } catch (parseError) {
  634. return { success: false, error: `解析失败: ${parseError.message}` };
  635. }
  636. } catch (error) {
  637. return { success: false, error: error.message };
  638. }
  639. }