ocr-chat.js 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884
  1. /**
  2. * OCR 聊天记录提取功能(Node.js 实现)
  3. *
  4. * 功能:根据屏幕截图提取聊天记录和聊天角色,并输出为 JSON 格式
  5. * - 使用 OCR 识别聊天内容
  6. * - 根据头像位置或消息位置识别发送者角色(friend/me)
  7. * - 返回 JSON 格式的消息数组,每条消息包含 sender 和 text 字段
  8. *
  9. * 实现方式:直接调用 Python 的 OnnxOCR 和 OpenCV,通过内联 Python 代码实现
  10. */
  11. import { exec } from 'child_process';
  12. import { promisify } from 'util';
  13. import { join, isAbsolute, dirname } from 'path';
  14. import { fileURLToPath } from 'url';
  15. import { readFile, writeFile, mkdir } from 'fs/promises';
  16. import { existsSync, readdirSync, statSync } from 'fs';
  17. const execAsync = promisify(exec);
  18. const __filename = fileURLToPath(import.meta.url);
  19. const __dirname = dirname(__filename);
  20. /**
  21. * 确保 pyvenv.cfg 文件使用当前系统的 Python 路径
  22. * @param {string} projectRoot - 项目根目录
  23. * @returns {Promise<void>}
  24. */
  25. async function ensurePyvenvConfig(projectRoot) {
  26. const pyvenvCfgPath = join(projectRoot, 'py', 'venv', 'pyvenv.cfg');
  27. if (!existsSync(pyvenvCfgPath)) {
  28. return; // 如果文件不存在,跳过
  29. }
  30. try {
  31. // 读取现有配置
  32. const currentContent = await readFile(pyvenvCfgPath, 'utf8');
  33. // 尝试从现有配置中提取系统 Python 路径
  34. const homeMatch = currentContent.match(/^home\s*=\s*(.+)$/m);
  35. const executableMatch = currentContent.match(/^executable\s*=\s*(.+)$/m);
  36. // 如果配置文件中已经有路径,检查路径是否存在
  37. if (homeMatch && executableMatch) {
  38. const existingHome = homeMatch[1].trim();
  39. const existingExecutable = executableMatch[1].trim();
  40. // 检查系统 Python 是否存在
  41. if (existsSync(existingExecutable)) {
  42. // 路径存在,不需要更新
  43. return;
  44. }
  45. }
  46. // 如果配置文件中的路径不存在,使用系统 Python 检测
  47. // 使用系统 Python(不是虚拟环境中的),因为我们需要检测系统 Python 路径
  48. const { stdout } = await execAsync('python -c "import sys; import os; print(os.path.dirname(sys.executable))"', {
  49. encoding: 'utf8',
  50. timeout: 5000,
  51. cwd: projectRoot
  52. });
  53. const pythonHome = stdout.trim();
  54. if (!pythonHome) {
  55. return; // 如果无法检测,跳过
  56. }
  57. const pythonExe = join(pythonHome, 'python.exe');
  58. // 检查系统 Python 是否存在
  59. if (!existsSync(pythonExe)) {
  60. return; // 系统 Python 不存在,跳过
  61. }
  62. // 检测 Python 版本(使用系统 Python)
  63. const { stdout: versionOutput } = await execAsync('python -c "import sys; print(\"{}.{}.{}\".format(sys.version_info.major, sys.version_info.minor, sys.version_info.micro))"', {
  64. encoding: 'utf8',
  65. timeout: 5000,
  66. cwd: projectRoot
  67. });
  68. const pythonVersion = versionOutput.trim();
  69. // 更新配置
  70. const newContent = `home = ${pythonHome}
  71. include-system-site-packages = false
  72. version = ${pythonVersion}
  73. executable = ${pythonExe}
  74. command = ${pythonExe} -m venv py/venv
  75. `;
  76. await writeFile(pyvenvCfgPath, newContent, 'utf8');
  77. } catch (error) {
  78. // 静默失败,不影响主流程
  79. console.warn('无法更新 pyvenv.cfg:', error.message);
  80. }
  81. }
  82. /**
  83. * 安全读取包含 Unicode 字符的图片
  84. */
  85. function readImageSafe(imagePath) {
  86. return `
  87. import cv2
  88. import numpy as np
  89. import os
  90. def read_image_safe(image_path):
  91. abs_path = os.path.abspath(str(image_path))
  92. try:
  93. with open(abs_path, 'rb') as f:
  94. image_data = f.read()
  95. img_array = np.frombuffer(image_data, np.uint8)
  96. img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
  97. if img is None:
  98. raise ValueError(f"cv2.imdecode 无法解码图片: {abs_path}")
  99. return img
  100. except FileNotFoundError:
  101. raise FileNotFoundError(f"图片文件不存在: {abs_path}")
  102. except Exception as e:
  103. raise Exception(f"读取图片失败: {abs_path}, 错误: {str(e)}")
  104. `;
  105. }
  106. /**
  107. * 根据屏幕截图提取聊天记录和聊天角色
  108. *
  109. * @param {string} screenshotPath - 截图路径(屏幕截图文件)
  110. * @param {string} friendAvatarPath - 好友头像路径(可选,用于识别发送者角色)
  111. * @param {string} myAvatarPath - 我的头像路径(可选,用于识别发送者角色)
  112. * @param {number} deviceWidth - 设备宽度(可选,用于位置判断)
  113. * @param {number} deviceHeight - 设备高度(可选,用于位置判断)
  114. * @param {string} workflowFolder - 工作流文件夹路径(可选)
  115. * @param {string} regionJson - 识别区域 JSON 字符串(可选,包含四个顶点坐标的 corners 对象)
  116. * @returns {Promise<{success: boolean, messages?: Array<{sender: 'friend'|'me'|'unknown', text: string}>, messagesText?: string, error?: string}>}
  117. *
  118. * messages: JSON 格式的消息数组,每条消息包含:
  119. * - sender: 发送者角色('friend' 表示好友,'me' 表示自己,'unknown' 表示无法识别)
  120. * - text: 消息文本内容
  121. */
  122. export async function extractChatHistory(screenshotPath, friendAvatarPath, myAvatarPath, deviceWidth, deviceHeight, workflowFolder, regionJson = null, friendRgb = null, myRgb = null) {
  123. try {
  124. // 使用相对路径(相对于项目根目录)
  125. const projectRoot = join(__dirname, '..', '..');
  126. // 确保 pyvenv.cfg 使用当前系统的 Python 路径
  127. await ensurePyvenvConfig(projectRoot);
  128. const venvRelativePath = 'py/venv';
  129. const venvScriptsRelativePath = 'py/venv/Scripts';
  130. const pythonExeRelativePath = 'py/venv/Scripts/python.exe';
  131. // 转换为绝对路径(用于环境变量)
  132. const venvAbsolutePath = join(projectRoot, venvRelativePath);
  133. const venvScriptsAbsolutePath = join(projectRoot, venvScriptsRelativePath);
  134. const pythonExeAbsolutePath = join(projectRoot, pythonExeRelativePath);
  135. // 使用绝对路径执行 Python 命令(确保路径正确)
  136. // 使用项目根目录 + py/venv/Scripts/python.exe
  137. const pythonCommand = pythonExeAbsolutePath;
  138. // 构建内联 Python 脚本
  139. const pythonCode = `
  140. import sys
  141. import os
  142. import cv2
  143. import numpy as np
  144. import json
  145. from pathlib import Path
  146. # OnnxOCR 已通过 pip 安装到虚拟环境中,直接导入即可
  147. from onnxocr.onnx_paddleocr import ONNXPaddleOcr
  148. # 设置环境变量
  149. os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
  150. ${readImageSafe()}
  151. def find_avatar_positions(screenshot_path, friend_avatar_path, my_avatar_path):
  152. """在截图中查找头像位置"""
  153. screenshot = read_image_safe(screenshot_path)
  154. result = {'friend': [], 'my': []}
  155. if friend_avatar_path and friend_avatar_path != 'None':
  156. try:
  157. friend_avatar = read_image_safe(friend_avatar_path)
  158. result_friend = cv2.matchTemplate(screenshot, friend_avatar, cv2.TM_CCOEFF_NORMED)
  159. locations_friend = np.where(result_friend >= 0.8)
  160. for pt in zip(*locations_friend[::-1]):
  161. result['friend'].append([int(pt[0]), int(pt[1])])
  162. except Exception as e:
  163. print(f"查找好友头像失败: {e}", file=sys.stderr)
  164. if my_avatar_path and my_avatar_path != 'None':
  165. try:
  166. my_avatar = read_image_safe(my_avatar_path)
  167. result_my = cv2.matchTemplate(screenshot, my_avatar, cv2.TM_CCOEFF_NORMED)
  168. locations_my = np.where(result_my >= 0.8)
  169. for pt in zip(*locations_my[::-1]):
  170. result['my'].append([int(pt[0]), int(pt[1])])
  171. except Exception as e:
  172. print(f"查找我的头像失败: {e}", file=sys.stderr)
  173. return result
  174. def detect_bubble_color(screenshot, box):
  175. """检测文本框区域的主要颜色(RGB)"""
  176. try:
  177. # 获取文本框的边界框
  178. x_coords = [point[0] for point in box]
  179. y_coords = [point[1] for point in box]
  180. x_min, x_max = int(min(x_coords)), int(max(x_coords))
  181. y_min, y_max = int(min(y_coords)), int(max(y_coords))
  182. # 确保坐标在图片范围内
  183. x_min = max(0, x_min)
  184. y_min = max(0, y_min)
  185. x_max = min(screenshot.shape[1] - 1, x_max)
  186. y_max = min(screenshot.shape[0] - 1, y_max)
  187. if x_max <= x_min or y_max <= y_min:
  188. return None
  189. # 提取文本框区域(扩大一点范围以包含气泡背景)
  190. # 向上和向下各扩展10像素,向左和向右各扩展5像素
  191. expand_x = 5
  192. expand_y = 10
  193. x_start = max(0, x_min - expand_x)
  194. y_start = max(0, y_min - expand_y)
  195. x_end = min(screenshot.shape[1], x_max + expand_x)
  196. y_end = min(screenshot.shape[0], y_max + expand_y)
  197. bubble_region = screenshot[y_start:y_end, x_start:x_end]
  198. if bubble_region.size == 0:
  199. return None
  200. # 计算区域的平均RGB值
  201. # OpenCV使用BGR格式,需要转换为RGB
  202. avg_bgr = np.mean(bubble_region.reshape(-1, 3), axis=0)
  203. avg_rgb = [int(avg_bgr[2]), int(avg_bgr[1]), int(avg_bgr[0])] # BGR -> RGB
  204. return avg_rgb
  205. except Exception as e:
  206. return None
  207. def match_rgb_color(actual_rgb, target_rgb, tolerance=30):
  208. """判断实际RGB是否匹配目标RGB(允许容差)"""
  209. if actual_rgb is None or target_rgb is None:
  210. return False
  211. return all(abs(actual_rgb[i] - target_rgb[i]) <= tolerance for i in range(3))
  212. def extract_chat_history(screenshot_path, friend_avatar_path, my_avatar_path, device_width, device_height, workflow_folder, region_json=None, friend_rgb=None, my_rgb=None):
  213. """提取完整的聊天记录"""
  214. try:
  215. original_screenshot = read_image_safe(screenshot_path)
  216. if original_screenshot is None:
  217. return {'success': False, 'error': '无法读取截图文件'}
  218. # 如果提供了区域,先裁剪图片,然后再进行OCR识别
  219. # 这样可以确保只识别指定区域,避免识别到键盘和导航栏
  220. crop_offset_x = 0
  221. crop_offset_y = 0
  222. screenshot = original_screenshot
  223. original_height = original_screenshot.shape[0]
  224. original_width = original_screenshot.shape[1]
  225. if region_json and region_json != 'None':
  226. try:
  227. region = json.loads(region_json)
  228. # 区域格式:corners 对象,包含 topLeft, topRight, bottomLeft, bottomRight
  229. if isinstance(region, dict) and 'topLeft' in region and 'bottomRight' in region:
  230. top_left = region['topLeft']
  231. bottom_right = region['bottomRight']
  232. x1 = int(top_left.get('x', 0))
  233. y1 = int(top_left.get('y', 0))
  234. x2 = int(bottom_right.get('x', original_width))
  235. y2 = int(bottom_right.get('y', original_height))
  236. # 确保坐标在图片范围内,并且 x2 > x1, y2 > y1
  237. x1 = max(0, min(x1, original_width - 1))
  238. y1 = max(0, min(y1, original_height - 1))
  239. x2 = max(x1 + 1, min(x2, original_width))
  240. y2 = max(y1 + 1, min(y2, original_height))
  241. # 验证裁剪区域是否有效
  242. if x2 > x1 and y2 > y1:
  243. # 保存裁剪偏移量(用于调整头像位置)
  244. crop_offset_x = x1
  245. crop_offset_y = y1
  246. # 裁剪图片:使用 numpy 数组切片 [y1:y2, x1:x2]
  247. screenshot = original_screenshot[y1:y2, x1:x2]
  248. # 验证裁剪后的图片是否有效
  249. if screenshot is not None and screenshot.size > 0:
  250. # 保存裁剪后的图片到工作流目录下的 tmp 目录,用于调试
  251. try:
  252. import datetime
  253. # 获取工作流目录下的 tmp 目录路径
  254. # 方法1: 从 workflow_folder 推断(如果提供)
  255. if workflow_folder and workflow_folder != 'None':
  256. workflow_path = Path(workflow_folder)
  257. # workflow_folder 通常是 static/processing/xxx 格式的绝对路径
  258. # tmp 目录应该在工作流目录下:static/processing/xxx/tmp
  259. tmp_dir = workflow_path / 'tmp'
  260. else:
  261. # 方法2: 从截图路径推断(向后兼容)
  262. screenshot_path_obj = Path(screenshot_path)
  263. # 尝试向上查找工作流目录(包含 tmp 目录的父目录)
  264. current = screenshot_path_obj.parent
  265. tmp_dir = None
  266. for _ in range(5): # 最多向上查找5层
  267. if (current / 'tmp').exists():
  268. tmp_dir = current / 'tmp'
  269. break
  270. # 检查是否是工作流目录(包含 processing.json)
  271. if (current / 'processing.json').exists():
  272. tmp_dir = current / 'tmp'
  273. break
  274. current = current.parent
  275. if tmp_dir is None:
  276. # 如果找不到,使用截图目录的父目录下的 tmp
  277. tmp_dir = screenshot_path_obj.parent / 'tmp'
  278. os.makedirs(str(tmp_dir), exist_ok=True)
  279. timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S_%f')
  280. cropped_image_path = tmp_dir / f'cropped_region_{timestamp}.png'
  281. cv2.imwrite(str(cropped_image_path), screenshot)
  282. # 清理工作流目录下的 tmp 目录:如果总大小超过 20MB,删除时间最早的文件
  283. try:
  284. max_size = 20 * 1024 * 1024 # 20MB
  285. files = []
  286. if tmp_dir.exists():
  287. for file_path in tmp_dir.iterdir():
  288. if file_path.is_file():
  289. file_stat = file_path.stat()
  290. files.append({
  291. 'path': file_path,
  292. 'size': file_stat.st_size,
  293. 'mtime': file_stat.st_mtime
  294. })
  295. # 计算总大小
  296. total_size = sum(f['size'] for f in files)
  297. # 如果总大小超过 20MB,删除时间最早的文件
  298. if total_size > max_size:
  299. # 按修改时间排序(最早的在前)
  300. files.sort(key=lambda x: x['mtime'])
  301. # 删除最早的文件直到总大小小于 20MB
  302. for file_info in files:
  303. if total_size <= max_size:
  304. break
  305. try:
  306. file_info['path'].unlink()
  307. total_size -= file_info['size']
  308. except Exception as del_error:
  309. pass
  310. except Exception as cleanup_error:
  311. pass # 清理失败不影响主流程
  312. print(f"Python: 裁剪后的图片已保存到: {cropped_image_path}", file=sys.stderr)
  313. print(f"Python: 裁剪区域 - x1={x1}, y1={y1}, x2={x2}, y2={y2}, 原始尺寸={original_width}x{original_height}, 裁剪后尺寸={screenshot.shape[1]}x{screenshot.shape[0]}", file=sys.stderr)
  314. except Exception as save_error:
  315. print(f"Python: 保存裁剪图片失败: {save_error}", file=sys.stderr)
  316. import traceback
  317. traceback.print_exc(file=sys.stderr)
  318. else:
  319. screenshot = original_screenshot
  320. crop_offset_x = 0
  321. crop_offset_y = 0
  322. except Exception as e:
  323. print(f"Python: 区域裁剪异常: {e}", file=sys.stderr)
  324. import traceback
  325. traceback.print_exc(file=sys.stderr)
  326. # 查找头像位置
  327. # 如果图片被裁剪了,在裁剪后的图片上查找头像(需要临时保存)
  328. # 否则在原始截图上查找
  329. import tempfile
  330. temp_cropped_path = None
  331. try:
  332. if crop_offset_x > 0 or crop_offset_y > 0:
  333. # 如果图片被裁剪了,需要临时保存裁剪后的图片用于头像匹配
  334. temp_cropped_path = tempfile.mktemp(suffix='.png')
  335. cv2.imwrite(temp_cropped_path, screenshot)
  336. avatar_positions = find_avatar_positions(temp_cropped_path, friend_avatar_path, my_avatar_path)
  337. else:
  338. # 使用原始截图查找头像
  339. avatar_positions = find_avatar_positions(screenshot_path, friend_avatar_path, my_avatar_path)
  340. finally:
  341. # 清理临时文件
  342. if temp_cropped_path and os.path.exists(temp_cropped_path):
  343. try:
  344. os.remove(temp_cropped_path)
  345. except:
  346. pass
  347. # 获取 OCR 实例
  348. ocr = ONNXPaddleOcr(use_angle_cls=False, use_gpu=True)
  349. # 执行 OCR(cls=False 避免角度分类器警告)
  350. # 如果提供了区域,在裁剪后的图片上识别;否则全屏识别
  351. ocr_result = ocr.ocr(screenshot, cls=False)
  352. if not ocr_result or not ocr_result[0]:
  353. return {'success': False, 'error': 'OCR 识别失败'}
  354. # 解析 OCR 结果,按 y 坐标分组消息
  355. messages = []
  356. friend_positions = avatar_positions.get('friend', [])
  357. my_positions = avatar_positions.get('my', [])
  358. # 获取截图高度(如果被裁剪了,使用裁剪后的高度;否则使用原始高度)
  359. screenshot_height = screenshot.shape[0]
  360. screenshot_width = screenshot.shape[1]
  361. # 计算键盘区域的阈值:通常键盘在屏幕底部,占屏幕高度的30-40%
  362. # 如果提供了区域裁剪,说明用户已经指定了识别区域,应该信任这个区域,不进行键盘过滤
  363. # 或者使用更宽松的阈值(90%),只过滤最底部的内容
  364. if region_json and region_json != 'None':
  365. # 如果提供了识别区域,使用更宽松的阈值(90%),几乎不过滤
  366. # 因为用户已经通过区域限制了识别范围
  367. keyboard_threshold_y = int(screenshot_height * 0.90) # 从90%的位置开始过滤
  368. else:
  369. # 如果没有提供区域,使用原来的65%阈值
  370. keyboard_threshold_y = int(screenshot_height * 0.65) # 从65%的位置开始过滤
  371. # 简单的消息分组逻辑:根据 y 坐标和头像位置判断发送者
  372. for line in ocr_result[0]:
  373. if not line:
  374. continue
  375. box = line[0]
  376. text = line[1][0] if len(line) > 1 and line[1] else ''
  377. confidence = line[1][1] if len(line) > 1 and len(line[1]) > 1 else 0.0
  378. if not text or confidence < 0.5:
  379. continue
  380. # 计算消息框的中心坐标
  381. x_center = sum([point[0] for point in box]) / len(box)
  382. y_center = sum([point[1] for point in box]) / len(box)
  383. # 过滤键盘区域:如果y坐标超过阈值,直接跳过
  384. if y_center > keyboard_threshold_y:
  385. continue
  386. # 过滤明显是键盘按键的文本(即使y坐标在阈值内,也要过滤)
  387. keyboard_keywords = ['ABC', 'DEF', 'GHI', 'JKL', 'MNO', 'PQRS', 'TUV', 'WXYZ',
  388. '分词', '重输', '换行', '符', '中/英', '中二', '123', '0', '1', '2', '3',
  389. '4', '5', '6', '7', '8', '9', 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I',
  390. 'O', 'P', 'A', 'S', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'Z', 'X', 'C',
  391. 'V', 'B', 'N', 'M', '△', '三', '-']
  392. if text.strip() in keyboard_keywords:
  393. continue
  394. # 判断发送者(优先使用RGB颜色判断,如果失败则使用头像距离判断,最后使用 x 坐标判断)
  395. sender = 'unknown'
  396. # 方法1: 使用RGB颜色判断(最高优先级)
  397. if friend_rgb and my_rgb:
  398. try:
  399. bubble_rgb = detect_bubble_color(screenshot, box)
  400. if bubble_rgb:
  401. # 判断是否匹配好友颜色(白色/浅色)
  402. if match_rgb_color(bubble_rgb, friend_rgb, tolerance=40):
  403. sender = 'friend'
  404. # 判断是否匹配我的颜色(绿色)
  405. elif match_rgb_color(bubble_rgb, my_rgb, tolerance=40):
  406. sender = 'me'
  407. except Exception as e:
  408. pass # RGB检测失败,继续使用其他方法
  409. # 方法2: 如果RGB判断失败,使用头像位置距离判断
  410. if sender == 'unknown':
  411. min_friend_dist = float('inf')
  412. min_my_dist = float('inf')
  413. for fx, fy in friend_positions:
  414. dist = abs(y_center - (fy + 20)) # 假设头像高度约 40px
  415. if dist < min_friend_dist:
  416. min_friend_dist = dist
  417. for mx, my in my_positions:
  418. dist = abs(y_center - (my + 20))
  419. if dist < min_my_dist:
  420. min_my_dist = dist
  421. # 优先使用距离判断(阈值100像素)
  422. if min_friend_dist < 100 and min_friend_dist < min_my_dist:
  423. sender = 'friend'
  424. elif min_my_dist < 100 and min_my_dist < min_friend_dist:
  425. sender = 'me'
  426. else:
  427. # 距离判断失败,使用备选方法
  428. screen_center_x = device_width / 2
  429. # 如果两个头像都没找到,直接使用 x 坐标判断
  430. if not friend_positions and not my_positions:
  431. if x_center < screen_center_x:
  432. sender = 'friend' # 左侧通常是好友
  433. else:
  434. sender = 'me' # 右侧通常是"我"
  435. # 如果只找到好友头像,放宽阈值到150像素
  436. elif friend_positions and not my_positions:
  437. if min_friend_dist < 150:
  438. sender = 'friend'
  439. elif x_center < screen_center_x:
  440. sender = 'friend'
  441. else:
  442. sender = 'me'
  443. # 如果只找到我的头像,放宽阈值到150像素
  444. elif my_positions and not friend_positions:
  445. if min_my_dist < 150:
  446. sender = 'me'
  447. elif x_center >= screen_center_x:
  448. sender = 'me'
  449. else:
  450. sender = 'friend'
  451. # 如果两个头像都找到了但距离判断失败,使用 x 坐标判断
  452. else:
  453. if x_center < screen_center_x:
  454. sender = 'friend'
  455. else:
  456. sender = 'me'
  457. messages.append({
  458. 'text': text,
  459. 'sender': sender,
  460. 'y': int(y_center),
  461. 'confidence': float(confidence)
  462. })
  463. # 按 y 坐标排序(从上到下)
  464. messages.sort(key=lambda m: m['y'])
  465. # 格式化消息文本
  466. messages_text = '\\n'.join([f"{'对方' if m['sender'] == 'friend' else '我' if m['sender'] == 'me' else '未知'}: {m['text']}" for m in messages])
  467. result = {
  468. 'success': True,
  469. 'messages': messages,
  470. 'messagesText': messages_text,
  471. 'count': len(messages)
  472. }
  473. return result
  474. except Exception as e:
  475. return {'success': False, 'error': f'提取聊天记录失败: {str(e)}'}
  476. # 主逻辑
  477. if __name__ == '__main__':
  478. import sys
  479. screenshot_path = sys.argv[1]
  480. friend_avatar_path = sys.argv[2] if len(sys.argv) > 2 and sys.argv[2] != 'None' else None
  481. my_avatar_path = sys.argv[3] if len(sys.argv) > 3 and sys.argv[3] != 'None' else None
  482. device_width = int(sys.argv[4]) if len(sys.argv) > 4 and sys.argv[4] else 1080
  483. device_height = int(sys.argv[5]) if len(sys.argv) > 5 and sys.argv[5] else 2400
  484. workflow_folder = sys.argv[6] if len(sys.argv) > 6 and sys.argv[6] != 'None' else None
  485. region_json = sys.argv[7] if len(sys.argv) > 7 and sys.argv[7] != 'None' else None
  486. friend_rgb_str = sys.argv[8] if len(sys.argv) > 8 and sys.argv[8] != 'None' else None
  487. my_rgb_str = sys.argv[9] if len(sys.argv) > 9 and sys.argv[9] != 'None' else None
  488. # 解析RGB字符串(格式:"(r,g,b)")
  489. friend_rgb = None
  490. my_rgb = None
  491. if friend_rgb_str and friend_rgb_str != 'None':
  492. try:
  493. # 解析格式 "(r,g,b)" 或 "(r, g, b)"
  494. friend_rgb_str = friend_rgb_str.strip().strip('()')
  495. parts = [int(x.strip()) for x in friend_rgb_str.split(',')]
  496. if len(parts) == 3:
  497. friend_rgb = parts
  498. except Exception as e:
  499. print(f"Python: 解析好友RGB失败: {e}", file=sys.stderr)
  500. if my_rgb_str and my_rgb_str != 'None':
  501. try:
  502. # 解析格式 "(r,g,b)" 或 "(r, g, b)"
  503. my_rgb_str = my_rgb_str.strip().strip('()')
  504. parts = [int(x.strip()) for x in my_rgb_str.split(',')]
  505. if len(parts) == 3:
  506. my_rgb = parts
  507. except Exception as e:
  508. print(f"Python: 解析我的RGB失败: {e}", file=sys.stderr)
  509. # 打印接收到的参数(用于调试)
  510. print(f"Python: 接收到的参数 - region_json={'已提供' if region_json and region_json != 'None' else '未提供'}, friend_rgb={friend_rgb}, my_rgb={my_rgb}", file=sys.stderr)
  511. result = extract_chat_history(screenshot_path, friend_avatar_path, my_avatar_path, device_width, device_height, workflow_folder, region_json, friend_rgb, my_rgb)
  512. print(json.dumps(result, ensure_ascii=False))
  513. `;
  514. // 将 Python 代码写入临时文件(使用相对路径)
  515. const tempScriptPath = 'temp_extract_chat_history.py';
  516. const tempScriptAbsolutePath = join(projectRoot, tempScriptPath);
  517. await writeFile(tempScriptAbsolutePath, pythonCode, 'utf8');
  518. // 构建命令(使用相对路径)
  519. // 如果传入的是绝对路径,转换为相对路径
  520. let relativeScreenshotPath = screenshotPath;
  521. if (isAbsolute(screenshotPath)) {
  522. try {
  523. relativeScreenshotPath = require('path').relative(projectRoot, screenshotPath).replace(/\\/g, '/');
  524. } catch (e) {
  525. relativeScreenshotPath = screenshotPath.replace(/\\/g, '/');
  526. }
  527. } else {
  528. relativeScreenshotPath = screenshotPath.replace(/\\/g, '/');
  529. }
  530. let friendAvatarArg = 'None';
  531. if (friendAvatarPath) {
  532. if (isAbsolute(friendAvatarPath)) {
  533. try {
  534. friendAvatarArg = require('path').relative(projectRoot, friendAvatarPath).replace(/\\/g, '/');
  535. } catch (e) {
  536. friendAvatarArg = friendAvatarPath.replace(/\\/g, '/');
  537. }
  538. } else {
  539. friendAvatarArg = friendAvatarPath.replace(/\\/g, '/');
  540. }
  541. }
  542. let myAvatarArg = 'None';
  543. if (myAvatarPath) {
  544. if (isAbsolute(myAvatarPath)) {
  545. try {
  546. myAvatarArg = require('path').relative(projectRoot, myAvatarPath).replace(/\\/g, '/');
  547. } catch (e) {
  548. myAvatarArg = myAvatarPath.replace(/\\/g, '/');
  549. }
  550. } else {
  551. myAvatarArg = myAvatarPath.replace(/\\/g, '/');
  552. }
  553. }
  554. let workflowFolderArg = 'None';
  555. if (workflowFolder) {
  556. if (isAbsolute(workflowFolder)) {
  557. try {
  558. workflowFolderArg = require('path').relative(projectRoot, workflowFolder).replace(/\\/g, '/');
  559. } catch (e) {
  560. workflowFolderArg = workflowFolder.replace(/\\/g, '/');
  561. }
  562. } else {
  563. workflowFolderArg = workflowFolder.replace(/\\/g, '/');
  564. }
  565. }
  566. // 传递区域参数(如果提供)
  567. let regionArg = 'None';
  568. if (regionJson && regionJson !== 'None') {
  569. regionArg = regionJson.replace(/"/g, '\\"');
  570. }
  571. // 传递RGB参数(如果提供)
  572. let friendRgbArg = 'None';
  573. if (friendRgb && typeof friendRgb === 'string') {
  574. friendRgbArg = friendRgb;
  575. }
  576. let myRgbArg = 'None';
  577. if (myRgb && typeof myRgb === 'string') {
  578. myRgbArg = myRgb;
  579. }
  580. // 使用绝对路径的 Python 命令(用引号包裹,确保路径中的空格被正确处理),传递给 Python 的参数使用相对路径
  581. const command = `"${pythonCommand}" "${tempScriptPath}" "${relativeScreenshotPath}" "${friendAvatarArg}" "${myAvatarArg}" ${deviceWidth || 1080} ${deviceHeight || 2400} "${workflowFolderArg}" "${regionArg}" "${friendRgbArg}" "${myRgbArg}"`;
  582. const env = {
  583. ...process.env,
  584. DISABLE_MODEL_SOURCE_CHECK: 'True'
  585. };
  586. const { stdout, stderr } = await execAsync(command, {
  587. timeout: 60000,
  588. maxBuffer: 10 * 1024 * 1024,
  589. cwd: projectRoot, // 设置工作目录为项目根目录,这样相对路径才能正确解析
  590. encoding: 'utf8',
  591. env: {
  592. ...env,
  593. PYTHONIOENCODING: 'utf-8',
  594. PYTHONUTF8: '1',
  595. // 设置虚拟环境相关环境变量
  596. VIRTUAL_ENV: venvAbsolutePath,
  597. // 将虚拟环境的 Scripts 目录添加到 PATH 前面,确保使用虚拟环境中的工具
  598. PATH: `${venvScriptsAbsolutePath};${process.env.PATH}`
  599. }
  600. });
  601. // 忽略 Python 脚本的 stderr 输出
  602. // 清理临时文件
  603. try {
  604. await import('fs/promises').then(fs => fs.unlink(tempScriptPath));
  605. } catch (e) {
  606. // 忽略删除失败
  607. }
  608. // 解析输出
  609. const cleanStdout = stdout.replace(/\[33m.*?\[0m/g, '').replace(/DeprecationWarning.*?\n/g, '');
  610. try {
  611. const result = JSON.parse(cleanStdout.trim());
  612. return result;
  613. } catch (parseError) {
  614. return { success: false, error: `解析聊天记录失败: ${parseError.message}` };
  615. }
  616. } catch (error) {
  617. if (error.message && error.message.includes('timeout')) {
  618. return { success: false, error: '提取聊天记录超时,请检查网络连接或稍后重试' };
  619. }
  620. return { success: false, error: error.message };
  621. }
  622. }
  623. /**
  624. * 获取最后一条消息
  625. */
  626. export async function getLastMessage(screenshotPath, friendAvatarPath, myAvatarPath, deviceWidth, deviceHeight) {
  627. try {
  628. // 先提取完整聊天记录
  629. const result = await extractChatHistory(screenshotPath, friendAvatarPath, myAvatarPath, deviceWidth, deviceHeight, null);
  630. if (!result.success || !result.messages || result.messages.length === 0) {
  631. return { success: false, error: '未找到消息' };
  632. }
  633. // 获取最后一条消息(y 坐标最大的)
  634. const lastMessage = result.messages.reduce((max, msg) => msg.y > max.y ? msg : max, result.messages[0]);
  635. return {
  636. success: true,
  637. text: lastMessage.text,
  638. sender: lastMessage.sender,
  639. position: { y: lastMessage.y }
  640. };
  641. } catch (error) {
  642. return { success: false, error: error.message };
  643. }
  644. }
  645. /**
  646. * 全屏 OCR 识别
  647. */
  648. export async function ocrFullScreen(screenshotPath, deviceWidth, deviceHeight) {
  649. try {
  650. // 使用相对路径(相对于项目根目录)
  651. const projectRoot = join(__dirname, '..', '..');
  652. // 确保 pyvenv.cfg 使用当前系统的 Python 路径
  653. await ensurePyvenvConfig(projectRoot);
  654. const venvRelativePath = 'py/venv';
  655. const venvScriptsRelativePath = 'py/venv/Scripts';
  656. const pythonExeRelativePath = 'py/venv/Scripts/python.exe';
  657. // 转换为绝对路径(用于环境变量)
  658. const venvAbsolutePath = join(projectRoot, venvRelativePath);
  659. const venvScriptsAbsolutePath = join(projectRoot, venvScriptsRelativePath);
  660. const pythonExeAbsolutePath = join(projectRoot, pythonExeRelativePath);
  661. // 使用绝对路径执行 Python 命令(确保路径正确)
  662. // 使用项目根目录 + py/venv/Scripts/python.exe
  663. const pythonCommand = pythonExeAbsolutePath;
  664. const pythonCode = `
  665. import sys
  666. import os
  667. import cv2
  668. import numpy as np
  669. import json
  670. # OnnxOCR 已通过 pip 安装到虚拟环境中,直接导入即可
  671. from onnxocr.onnx_paddleocr import ONNXPaddleOcr
  672. # 设置环境变量
  673. os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
  674. def read_image_safe(image_path):
  675. abs_path = os.path.abspath(str(image_path))
  676. try:
  677. with open(abs_path, 'rb') as f:
  678. image_data = f.read()
  679. img_array = np.frombuffer(image_data, np.uint8)
  680. img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
  681. if img is None:
  682. raise ValueError(f"cv2.imdecode 无法解码图片: {abs_path}")
  683. return img
  684. except FileNotFoundError:
  685. raise FileNotFoundError(f"图片文件不存在: {abs_path}")
  686. except Exception as e:
  687. raise Exception(f"读取图片失败: {abs_path}, 错误: {str(e)}")
  688. # 主逻辑
  689. if __name__ == '__main__':
  690. screenshot_path = sys.argv[1]
  691. try:
  692. screenshot = read_image_safe(screenshot_path)
  693. if screenshot is None:
  694. print(json.dumps({'success': False, 'error': '无法读取截图文件'}, ensure_ascii=False))
  695. sys.exit(1)
  696. ocr = ONNXPaddleOcr(use_angle_cls=False, use_gpu=True)
  697. ocr_result = ocr.ocr(screenshot, cls=False)
  698. if not ocr_result or not ocr_result[0]:
  699. print(json.dumps({'success': False, 'error': 'OCR 识别失败'}, ensure_ascii=False))
  700. sys.exit(1)
  701. # 提取所有文本
  702. texts = []
  703. for line in ocr_result[0]:
  704. if line and len(line) > 1:
  705. text = line[1][0] if isinstance(line[1], (list, tuple)) else str(line[1])
  706. if text:
  707. texts.append(text)
  708. full_text = '\\n'.join(texts)
  709. print(json.dumps({
  710. 'success': True,
  711. 'text': full_text,
  712. 'position': None
  713. }, ensure_ascii=False))
  714. except Exception as e:
  715. print(json.dumps({'success': False, 'error': f'OCR 识别失败: {str(e)}'}, ensure_ascii=False))
  716. sys.exit(1)
  717. `;
  718. // 将 Python 代码写入临时文件
  719. const tempScriptPath = join(__dirname, '..', '..', 'temp_ocr_full_screen.py');
  720. await writeFile(tempScriptPath, pythonCode, 'utf8');
  721. const normalizedScreenshotPath = screenshotPath.replace(/\\/g, '/');
  722. // 使用绝对路径的 Python 命令(用引号包裹,确保路径中的空格被正确处理)
  723. const command = `"${pythonCommand}" "${tempScriptPath}" "${normalizedScreenshotPath}"`;
  724. const env = {
  725. ...process.env,
  726. DISABLE_MODEL_SOURCE_CHECK: 'True'
  727. };
  728. const { stdout, stderr } = await execAsync(command, {
  729. timeout: 60000,
  730. maxBuffer: 10 * 1024 * 1024,
  731. cwd: projectRoot, // 设置工作目录为项目根目录,这样相对路径才能正确解析
  732. encoding: 'utf8',
  733. env: {
  734. ...env,
  735. PYTHONIOENCODING: 'utf-8',
  736. PYTHONUTF8: '1',
  737. // 设置虚拟环境相关环境变量
  738. VIRTUAL_ENV: venvAbsolutePath,
  739. // 将虚拟环境的 Scripts 目录添加到 PATH 前面,确保使用虚拟环境中的工具
  740. PATH: `${venvScriptsAbsolutePath};${process.env.PATH}`
  741. }
  742. });
  743. // 清理临时文件
  744. try {
  745. await import('fs/promises').then(fs => fs.unlink(tempScriptPath));
  746. } catch (e) {
  747. // 忽略删除失败
  748. }
  749. const cleanStdout = stdout.replace(/\[33m.*?\[0m/g, '').replace(/DeprecationWarning.*?\n/g, '');
  750. try {
  751. const result = JSON.parse(cleanStdout.trim());
  752. return result;
  753. } catch (parseError) {
  754. return { success: false, error: `解析失败: ${parseError.message}` };
  755. }
  756. } catch (error) {
  757. return { success: false, error: error.message };
  758. }
  759. }