ocr-chat.js 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894
  1. /**
  2. * OCR 聊天记录提取功能(Node.js 实现)
  3. *
  4. * 功能:根据屏幕截图提取聊天记录和聊天角色,并输出为 JSON 格式
  5. * - 使用 OCR 识别聊天内容
  6. * - 根据头像位置或消息位置识别发送者角色(friend/me)
  7. * - 返回 JSON 格式的消息数组,每条消息包含 sender 和 text 字段
  8. *
  9. * 实现方式:直接调用 Python 的 OnnxOCR 和 OpenCV,通过内联 Python 代码实现
  10. */
  11. import { exec } from 'child_process';
  12. import { promisify } from 'util';
  13. import { join, isAbsolute, dirname } from 'path';
  14. import { fileURLToPath } from 'url';
  15. import { readFile, writeFile, mkdir } from 'fs/promises';
  16. import { existsSync, readdirSync, statSync } from 'fs';
  17. const execAsync = promisify(exec);
  18. const __filename = fileURLToPath(import.meta.url);
  19. const __dirname = dirname(__filename);
  20. /**
  21. * 确保 pyvenv.cfg 文件使用当前系统的 Python 路径
  22. * @param {string} projectRoot - 项目根目录
  23. * @returns {Promise<void>}
  24. */
  25. async function ensurePyvenvConfig(projectRoot) {
  26. const pyvenvCfgPath = join(projectRoot, 'py', 'venv', 'pyvenv.cfg');
  27. if (!existsSync(pyvenvCfgPath)) {
  28. return; // 如果文件不存在,跳过
  29. }
  30. try {
  31. // 读取现有配置
  32. const currentContent = await readFile(pyvenvCfgPath, 'utf8');
  33. // 尝试从现有配置中提取系统 Python 路径
  34. const homeMatch = currentContent.match(/^home\s*=\s*(.+)$/m);
  35. const executableMatch = currentContent.match(/^executable\s*=\s*(.+)$/m);
  36. // 如果配置文件中已经有路径,检查路径是否存在
  37. if (homeMatch && executableMatch) {
  38. const existingHome = homeMatch[1].trim();
  39. const existingExecutable = executableMatch[1].trim();
  40. // 检查系统 Python 是否存在
  41. if (existsSync(existingExecutable)) {
  42. // 路径存在,不需要更新
  43. return;
  44. }
  45. }
  46. // 如果配置文件中的路径不存在,使用系统 Python 检测
  47. // 使用系统 Python(不是虚拟环境中的),因为我们需要检测系统 Python 路径
  48. const { stdout } = await execAsync('python -c "import sys; import os; print(os.path.dirname(sys.executable))"', {
  49. encoding: 'utf8',
  50. timeout: 5000,
  51. cwd: projectRoot
  52. });
  53. const pythonHome = stdout.trim();
  54. if (!pythonHome) {
  55. return; // 如果无法检测,跳过
  56. }
  57. const pythonExe = join(pythonHome, 'python.exe');
  58. // 检查系统 Python 是否存在
  59. if (!existsSync(pythonExe)) {
  60. return; // 系统 Python 不存在,跳过
  61. }
  62. // 检测 Python 版本(使用系统 Python)
  63. const { stdout: versionOutput } = await execAsync('python -c "import sys; print(\"{}.{}.{}\".format(sys.version_info.major, sys.version_info.minor, sys.version_info.micro))"', {
  64. encoding: 'utf8',
  65. timeout: 5000,
  66. cwd: projectRoot
  67. });
  68. const pythonVersion = versionOutput.trim();
  69. // 更新配置
  70. const newContent = `home = ${pythonHome}
  71. include-system-site-packages = false
  72. version = ${pythonVersion}
  73. executable = ${pythonExe}
  74. command = ${pythonExe} -m venv py/venv
  75. `;
  76. await writeFile(pyvenvCfgPath, newContent, 'utf8');
  77. } catch (error) {
  78. // 静默失败,不影响主流程
  79. console.warn('无法更新 pyvenv.cfg:', error.message);
  80. }
  81. }
  82. /**
  83. * 安全读取包含 Unicode 字符的图片
  84. */
  85. function readImageSafe(imagePath) {
  86. return `
  87. import cv2
  88. import numpy as np
  89. import os
  90. def read_image_safe(image_path):
  91. abs_path = os.path.abspath(str(image_path))
  92. try:
  93. with open(abs_path, 'rb') as f:
  94. image_data = f.read()
  95. img_array = np.frombuffer(image_data, np.uint8)
  96. img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
  97. if img is None:
  98. raise ValueError(f"cv2.imdecode 无法解码图片: {abs_path}")
  99. return img
  100. except FileNotFoundError:
  101. raise FileNotFoundError(f"图片文件不存在: {abs_path}")
  102. except Exception as e:
  103. raise Exception(f"读取图片失败: {abs_path}, 错误: {str(e)}")
  104. `;
  105. }
  106. /**
  107. * 根据屏幕截图提取聊天记录和聊天角色
  108. *
  109. * @param {string} screenshotPath - 截图路径(屏幕截图文件)
  110. * @param {string} friendAvatarPath - 好友头像路径(可选,用于识别发送者角色)
  111. * @param {string} myAvatarPath - 我的头像路径(可选,用于识别发送者角色)
  112. * @param {number} deviceWidth - 设备宽度(可选,用于位置判断)
  113. * @param {number} deviceHeight - 设备高度(可选,用于位置判断)
  114. * @param {string} workflowFolder - 工作流文件夹路径(可选)
  115. * @param {string} regionJson - 识别区域 JSON 字符串(可选,包含四个顶点坐标的 corners 对象)
  116. * @returns {Promise<{success: boolean, messages?: Array<{sender: 'friend'|'me'|'unknown', text: string}>, messagesText?: string, error?: string}>}
  117. *
  118. * messages: JSON 格式的消息数组,每条消息包含:
  119. * - sender: 发送者角色('friend' 表示好友,'me' 表示自己,'unknown' 表示无法识别)
  120. * - text: 消息文本内容
  121. */
  122. export async function extractChatHistory(screenshotPath, friendAvatarPath, myAvatarPath, deviceWidth, deviceHeight, workflowFolder, regionJson = null, friendRgb = null, myRgb = null) {
  123. try {
  124. // 使用相对路径(相对于项目根目录)
  125. const projectRoot = join(__dirname, '..', '..');
  126. // 优先使用项目下的便携版 Python
  127. let pythonExeAbsolutePath = join(projectRoot, 'python', 'python.exe');
  128. let isPortablePython = false;
  129. // 如果便携版 Python 不存在,回退到虚拟环境
  130. if (!existsSync(pythonExeAbsolutePath)) {
  131. isPortablePython = false;
  132. // 确保 pyvenv.cfg 使用当前系统的 Python 路径
  133. await ensurePyvenvConfig(projectRoot);
  134. pythonExeAbsolutePath = join(projectRoot, 'py', 'venv', 'Scripts', 'python.exe');
  135. } else {
  136. isPortablePython = true;
  137. }
  138. // 虚拟环境路径(用于设置环境变量)
  139. const venvAbsolutePath = isPortablePython ? join(projectRoot, 'python') : join(projectRoot, 'py', 'venv');
  140. const venvScriptsAbsolutePath = isPortablePython ? join(projectRoot, 'python', 'Scripts') : join(projectRoot, 'py', 'venv', 'Scripts');
  141. // 使用绝对路径执行 Python 命令(确保路径正确)
  142. const pythonCommand = pythonExeAbsolutePath;
  143. // 构建内联 Python 脚本
  144. const pythonCode = `
  145. import sys
  146. import os
  147. import cv2
  148. import numpy as np
  149. import json
  150. from pathlib import Path
  151. # OnnxOCR 已通过 pip 安装到虚拟环境中,直接导入即可
  152. from onnxocr.onnx_paddleocr import ONNXPaddleOcr
  153. # 设置环境变量
  154. os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
  155. ${readImageSafe()}
  156. def find_avatar_positions(screenshot_path, friend_avatar_path, my_avatar_path):
  157. """在截图中查找头像位置"""
  158. screenshot = read_image_safe(screenshot_path)
  159. result = {'friend': [], 'my': []}
  160. if friend_avatar_path and friend_avatar_path != 'None':
  161. try:
  162. friend_avatar = read_image_safe(friend_avatar_path)
  163. result_friend = cv2.matchTemplate(screenshot, friend_avatar, cv2.TM_CCOEFF_NORMED)
  164. locations_friend = np.where(result_friend >= 0.8)
  165. for pt in zip(*locations_friend[::-1]):
  166. result['friend'].append([int(pt[0]), int(pt[1])])
  167. except Exception as e:
  168. print(f"查找好友头像失败: {e}", file=sys.stderr)
  169. if my_avatar_path and my_avatar_path != 'None':
  170. try:
  171. my_avatar = read_image_safe(my_avatar_path)
  172. result_my = cv2.matchTemplate(screenshot, my_avatar, cv2.TM_CCOEFF_NORMED)
  173. locations_my = np.where(result_my >= 0.8)
  174. for pt in zip(*locations_my[::-1]):
  175. result['my'].append([int(pt[0]), int(pt[1])])
  176. except Exception as e:
  177. print(f"查找我的头像失败: {e}", file=sys.stderr)
  178. return result
  179. def detect_bubble_color(screenshot, box):
  180. """检测文本框区域的主要颜色(RGB)"""
  181. try:
  182. # 获取文本框的边界框
  183. x_coords = [point[0] for point in box]
  184. y_coords = [point[1] for point in box]
  185. x_min, x_max = int(min(x_coords)), int(max(x_coords))
  186. y_min, y_max = int(min(y_coords)), int(max(y_coords))
  187. # 确保坐标在图片范围内
  188. x_min = max(0, x_min)
  189. y_min = max(0, y_min)
  190. x_max = min(screenshot.shape[1] - 1, x_max)
  191. y_max = min(screenshot.shape[0] - 1, y_max)
  192. if x_max <= x_min or y_max <= y_min:
  193. return None
  194. # 提取文本框区域(扩大一点范围以包含气泡背景)
  195. # 向上和向下各扩展10像素,向左和向右各扩展5像素
  196. expand_x = 5
  197. expand_y = 10
  198. x_start = max(0, x_min - expand_x)
  199. y_start = max(0, y_min - expand_y)
  200. x_end = min(screenshot.shape[1], x_max + expand_x)
  201. y_end = min(screenshot.shape[0], y_max + expand_y)
  202. bubble_region = screenshot[y_start:y_end, x_start:x_end]
  203. if bubble_region.size == 0:
  204. return None
  205. # 计算区域的平均RGB值
  206. # OpenCV使用BGR格式,需要转换为RGB
  207. avg_bgr = np.mean(bubble_region.reshape(-1, 3), axis=0)
  208. avg_rgb = [int(avg_bgr[2]), int(avg_bgr[1]), int(avg_bgr[0])] # BGR -> RGB
  209. return avg_rgb
  210. except Exception as e:
  211. return None
  212. def match_rgb_color(actual_rgb, target_rgb, tolerance=30):
  213. """判断实际RGB是否匹配目标RGB(允许容差)"""
  214. if actual_rgb is None or target_rgb is None:
  215. return False
  216. return all(abs(actual_rgb[i] - target_rgb[i]) <= tolerance for i in range(3))
  217. def extract_chat_history(screenshot_path, friend_avatar_path, my_avatar_path, device_width, device_height, workflow_folder, region_json=None, friend_rgb=None, my_rgb=None):
  218. """提取完整的聊天记录"""
  219. try:
  220. original_screenshot = read_image_safe(screenshot_path)
  221. if original_screenshot is None:
  222. return {'success': False, 'error': '无法读取截图文件'}
  223. # 如果提供了区域,先裁剪图片,然后再进行OCR识别
  224. # 这样可以确保只识别指定区域,避免识别到键盘和导航栏
  225. crop_offset_x = 0
  226. crop_offset_y = 0
  227. screenshot = original_screenshot
  228. original_height = original_screenshot.shape[0]
  229. original_width = original_screenshot.shape[1]
  230. if region_json and region_json != 'None':
  231. try:
  232. region = json.loads(region_json)
  233. # 区域格式:corners 对象,包含 topLeft, topRight, bottomLeft, bottomRight
  234. if isinstance(region, dict) and 'topLeft' in region and 'bottomRight' in region:
  235. top_left = region['topLeft']
  236. bottom_right = region['bottomRight']
  237. x1 = int(top_left.get('x', 0))
  238. y1 = int(top_left.get('y', 0))
  239. x2 = int(bottom_right.get('x', original_width))
  240. y2 = int(bottom_right.get('y', original_height))
  241. # 确保坐标在图片范围内,并且 x2 > x1, y2 > y1
  242. x1 = max(0, min(x1, original_width - 1))
  243. y1 = max(0, min(y1, original_height - 1))
  244. x2 = max(x1 + 1, min(x2, original_width))
  245. y2 = max(y1 + 1, min(y2, original_height))
  246. # 验证裁剪区域是否有效
  247. if x2 > x1 and y2 > y1:
  248. # 保存裁剪偏移量(用于调整头像位置)
  249. crop_offset_x = x1
  250. crop_offset_y = y1
  251. # 裁剪图片:使用 numpy 数组切片 [y1:y2, x1:x2]
  252. screenshot = original_screenshot[y1:y2, x1:x2]
  253. # 验证裁剪后的图片是否有效
  254. if screenshot is not None and screenshot.size > 0:
  255. # 保存裁剪后的图片到工作流目录下的 tmp 目录,用于调试
  256. try:
  257. import datetime
  258. # 获取工作流目录下的 tmp 目录路径
  259. # 方法1: 从 workflow_folder 推断(如果提供)
  260. if workflow_folder and workflow_folder != 'None':
  261. workflow_path = Path(workflow_folder)
  262. # workflow_folder 通常是 static/processing/xxx 格式的绝对路径
  263. # tmp 目录应该在工作流目录下:static/processing/xxx/tmp
  264. tmp_dir = workflow_path / 'tmp'
  265. else:
  266. # 方法2: 从截图路径推断(向后兼容)
  267. screenshot_path_obj = Path(screenshot_path)
  268. # 尝试向上查找工作流目录(包含 tmp 目录的父目录)
  269. current = screenshot_path_obj.parent
  270. tmp_dir = None
  271. for _ in range(5): # 最多向上查找5层
  272. if (current / 'tmp').exists():
  273. tmp_dir = current / 'tmp'
  274. break
  275. # 检查是否是工作流目录(包含 processing.json)
  276. if (current / 'processing.json').exists():
  277. tmp_dir = current / 'tmp'
  278. break
  279. current = current.parent
  280. if tmp_dir is None:
  281. # 如果找不到,使用截图目录的父目录下的 tmp
  282. tmp_dir = screenshot_path_obj.parent / 'tmp'
  283. os.makedirs(str(tmp_dir), exist_ok=True)
  284. timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S_%f')
  285. cropped_image_path = tmp_dir / f'cropped_region_{timestamp}.png'
  286. cv2.imwrite(str(cropped_image_path), screenshot)
  287. # 清理工作流目录下的 tmp 目录:如果总大小超过 20MB,删除时间最早的文件
  288. try:
  289. max_size = 20 * 1024 * 1024 # 20MB
  290. files = []
  291. if tmp_dir.exists():
  292. for file_path in tmp_dir.iterdir():
  293. if file_path.is_file():
  294. file_stat = file_path.stat()
  295. files.append({
  296. 'path': file_path,
  297. 'size': file_stat.st_size,
  298. 'mtime': file_stat.st_mtime
  299. })
  300. # 计算总大小
  301. total_size = sum(f['size'] for f in files)
  302. # 如果总大小超过 20MB,删除时间最早的文件
  303. if total_size > max_size:
  304. # 按修改时间排序(最早的在前)
  305. files.sort(key=lambda x: x['mtime'])
  306. # 删除最早的文件直到总大小小于 20MB
  307. for file_info in files:
  308. if total_size <= max_size:
  309. break
  310. try:
  311. file_info['path'].unlink()
  312. total_size -= file_info['size']
  313. except Exception as del_error:
  314. pass
  315. except Exception as cleanup_error:
  316. pass # 清理失败不影响主流程
  317. print(f"Python: 裁剪后的图片已保存到: {cropped_image_path}", file=sys.stderr)
  318. print(f"Python: 裁剪区域 - x1={x1}, y1={y1}, x2={x2}, y2={y2}, 原始尺寸={original_width}x{original_height}, 裁剪后尺寸={screenshot.shape[1]}x{screenshot.shape[0]}", file=sys.stderr)
  319. except Exception as save_error:
  320. print(f"Python: 保存裁剪图片失败: {save_error}", file=sys.stderr)
  321. import traceback
  322. traceback.print_exc(file=sys.stderr)
  323. else:
  324. screenshot = original_screenshot
  325. crop_offset_x = 0
  326. crop_offset_y = 0
  327. except Exception as e:
  328. print(f"Python: 区域裁剪异常: {e}", file=sys.stderr)
  329. import traceback
  330. traceback.print_exc(file=sys.stderr)
  331. # 查找头像位置
  332. # 如果图片被裁剪了,在裁剪后的图片上查找头像(需要临时保存)
  333. # 否则在原始截图上查找
  334. import tempfile
  335. temp_cropped_path = None
  336. try:
  337. if crop_offset_x > 0 or crop_offset_y > 0:
  338. # 如果图片被裁剪了,需要临时保存裁剪后的图片用于头像匹配
  339. temp_cropped_path = tempfile.mktemp(suffix='.png')
  340. cv2.imwrite(temp_cropped_path, screenshot)
  341. avatar_positions = find_avatar_positions(temp_cropped_path, friend_avatar_path, my_avatar_path)
  342. else:
  343. # 使用原始截图查找头像
  344. avatar_positions = find_avatar_positions(screenshot_path, friend_avatar_path, my_avatar_path)
  345. finally:
  346. # 清理临时文件
  347. if temp_cropped_path and os.path.exists(temp_cropped_path):
  348. try:
  349. os.remove(temp_cropped_path)
  350. except:
  351. pass
  352. # 获取 OCR 实例
  353. ocr = ONNXPaddleOcr(use_angle_cls=False, use_gpu=True)
  354. # 执行 OCR(cls=False 避免角度分类器警告)
  355. # 如果提供了区域,在裁剪后的图片上识别;否则全屏识别
  356. ocr_result = ocr.ocr(screenshot, cls=False)
  357. if not ocr_result or not ocr_result[0]:
  358. return {'success': False, 'error': 'OCR 识别失败'}
  359. # 解析 OCR 结果,按 y 坐标分组消息
  360. messages = []
  361. friend_positions = avatar_positions.get('friend', [])
  362. my_positions = avatar_positions.get('my', [])
  363. # 获取截图高度(如果被裁剪了,使用裁剪后的高度;否则使用原始高度)
  364. screenshot_height = screenshot.shape[0]
  365. screenshot_width = screenshot.shape[1]
  366. # 计算键盘区域的阈值:通常键盘在屏幕底部,占屏幕高度的30-40%
  367. # 如果提供了区域裁剪,说明用户已经指定了识别区域,应该信任这个区域,不进行键盘过滤
  368. # 或者使用更宽松的阈值(90%),只过滤最底部的内容
  369. if region_json and region_json != 'None':
  370. # 如果提供了识别区域,使用更宽松的阈值(90%),几乎不过滤
  371. # 因为用户已经通过区域限制了识别范围
  372. keyboard_threshold_y = int(screenshot_height * 0.90) # 从90%的位置开始过滤
  373. else:
  374. # 如果没有提供区域,使用原来的65%阈值
  375. keyboard_threshold_y = int(screenshot_height * 0.65) # 从65%的位置开始过滤
  376. # 简单的消息分组逻辑:根据 y 坐标和头像位置判断发送者
  377. for line in ocr_result[0]:
  378. if not line:
  379. continue
  380. box = line[0]
  381. text = line[1][0] if len(line) > 1 and line[1] else ''
  382. confidence = line[1][1] if len(line) > 1 and len(line[1]) > 1 else 0.0
  383. if not text or confidence < 0.5:
  384. continue
  385. # 计算消息框的中心坐标
  386. x_center = sum([point[0] for point in box]) / len(box)
  387. y_center = sum([point[1] for point in box]) / len(box)
  388. # 过滤键盘区域:如果y坐标超过阈值,直接跳过
  389. if y_center > keyboard_threshold_y:
  390. continue
  391. # 过滤明显是键盘按键的文本(即使y坐标在阈值内,也要过滤)
  392. keyboard_keywords = ['ABC', 'DEF', 'GHI', 'JKL', 'MNO', 'PQRS', 'TUV', 'WXYZ',
  393. '分词', '重输', '换行', '符', '中/英', '中二', '123', '0', '1', '2', '3',
  394. '4', '5', '6', '7', '8', '9', 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I',
  395. 'O', 'P', 'A', 'S', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'Z', 'X', 'C',
  396. 'V', 'B', 'N', 'M', '△', '三', '-']
  397. if text.strip() in keyboard_keywords:
  398. continue
  399. # 判断发送者(优先使用RGB颜色判断,如果失败则使用头像距离判断,最后使用 x 坐标判断)
  400. sender = 'unknown'
  401. # 方法1: 使用RGB颜色判断(最高优先级)
  402. if friend_rgb and my_rgb:
  403. try:
  404. bubble_rgb = detect_bubble_color(screenshot, box)
  405. if bubble_rgb:
  406. # 判断是否匹配好友颜色(白色/浅色)
  407. if match_rgb_color(bubble_rgb, friend_rgb, tolerance=40):
  408. sender = 'friend'
  409. # 判断是否匹配我的颜色(绿色)
  410. elif match_rgb_color(bubble_rgb, my_rgb, tolerance=40):
  411. sender = 'me'
  412. except Exception as e:
  413. pass # RGB检测失败,继续使用其他方法
  414. # 方法2: 如果RGB判断失败,使用头像位置距离判断
  415. if sender == 'unknown':
  416. min_friend_dist = float('inf')
  417. min_my_dist = float('inf')
  418. for fx, fy in friend_positions:
  419. dist = abs(y_center - (fy + 20)) # 假设头像高度约 40px
  420. if dist < min_friend_dist:
  421. min_friend_dist = dist
  422. for mx, my in my_positions:
  423. dist = abs(y_center - (my + 20))
  424. if dist < min_my_dist:
  425. min_my_dist = dist
  426. # 优先使用距离判断(阈值100像素)
  427. if min_friend_dist < 100 and min_friend_dist < min_my_dist:
  428. sender = 'friend'
  429. elif min_my_dist < 100 and min_my_dist < min_friend_dist:
  430. sender = 'me'
  431. else:
  432. # 距离判断失败,使用备选方法
  433. screen_center_x = device_width / 2
  434. # 如果两个头像都没找到,直接使用 x 坐标判断
  435. if not friend_positions and not my_positions:
  436. if x_center < screen_center_x:
  437. sender = 'friend' # 左侧通常是好友
  438. else:
  439. sender = 'me' # 右侧通常是"我"
  440. # 如果只找到好友头像,放宽阈值到150像素
  441. elif friend_positions and not my_positions:
  442. if min_friend_dist < 150:
  443. sender = 'friend'
  444. elif x_center < screen_center_x:
  445. sender = 'friend'
  446. else:
  447. sender = 'me'
  448. # 如果只找到我的头像,放宽阈值到150像素
  449. elif my_positions and not friend_positions:
  450. if min_my_dist < 150:
  451. sender = 'me'
  452. elif x_center >= screen_center_x:
  453. sender = 'me'
  454. else:
  455. sender = 'friend'
  456. # 如果两个头像都找到了但距离判断失败,使用 x 坐标判断
  457. else:
  458. if x_center < screen_center_x:
  459. sender = 'friend'
  460. else:
  461. sender = 'me'
  462. messages.append({
  463. 'text': text,
  464. 'sender': sender,
  465. 'y': int(y_center),
  466. 'confidence': float(confidence)
  467. })
  468. # 按 y 坐标排序(从上到下)
  469. messages.sort(key=lambda m: m['y'])
  470. # 格式化消息文本
  471. messages_text = '\\n'.join([f"{'对方' if m['sender'] == 'friend' else '我' if m['sender'] == 'me' else '未知'}: {m['text']}" for m in messages])
  472. result = {
  473. 'success': True,
  474. 'messages': messages,
  475. 'messagesText': messages_text,
  476. 'count': len(messages)
  477. }
  478. return result
  479. except Exception as e:
  480. return {'success': False, 'error': f'提取聊天记录失败: {str(e)}'}
  481. # 主逻辑
  482. if __name__ == '__main__':
  483. import sys
  484. screenshot_path = sys.argv[1]
  485. friend_avatar_path = sys.argv[2] if len(sys.argv) > 2 and sys.argv[2] != 'None' else None
  486. my_avatar_path = sys.argv[3] if len(sys.argv) > 3 and sys.argv[3] != 'None' else None
  487. device_width = int(sys.argv[4]) if len(sys.argv) > 4 and sys.argv[4] else 1080
  488. device_height = int(sys.argv[5]) if len(sys.argv) > 5 and sys.argv[5] else 2400
  489. workflow_folder = sys.argv[6] if len(sys.argv) > 6 and sys.argv[6] != 'None' else None
  490. region_json = sys.argv[7] if len(sys.argv) > 7 and sys.argv[7] != 'None' else None
  491. friend_rgb_str = sys.argv[8] if len(sys.argv) > 8 and sys.argv[8] != 'None' else None
  492. my_rgb_str = sys.argv[9] if len(sys.argv) > 9 and sys.argv[9] != 'None' else None
  493. # 解析RGB字符串(格式:"(r,g,b)")
  494. friend_rgb = None
  495. my_rgb = None
  496. if friend_rgb_str and friend_rgb_str != 'None':
  497. try:
  498. # 解析格式 "(r,g,b)" 或 "(r, g, b)"
  499. friend_rgb_str = friend_rgb_str.strip().strip('()')
  500. parts = [int(x.strip()) for x in friend_rgb_str.split(',')]
  501. if len(parts) == 3:
  502. friend_rgb = parts
  503. except Exception as e:
  504. print(f"Python: 解析好友RGB失败: {e}", file=sys.stderr)
  505. if my_rgb_str and my_rgb_str != 'None':
  506. try:
  507. # 解析格式 "(r,g,b)" 或 "(r, g, b)"
  508. my_rgb_str = my_rgb_str.strip().strip('()')
  509. parts = [int(x.strip()) for x in my_rgb_str.split(',')]
  510. if len(parts) == 3:
  511. my_rgb = parts
  512. except Exception as e:
  513. print(f"Python: 解析我的RGB失败: {e}", file=sys.stderr)
  514. # 打印接收到的参数(用于调试)
  515. print(f"Python: 接收到的参数 - region_json={'已提供' if region_json and region_json != 'None' else '未提供'}, friend_rgb={friend_rgb}, my_rgb={my_rgb}", file=sys.stderr)
  516. result = extract_chat_history(screenshot_path, friend_avatar_path, my_avatar_path, device_width, device_height, workflow_folder, region_json, friend_rgb, my_rgb)
  517. print(json.dumps(result, ensure_ascii=False))
  518. `;
  519. // 将 Python 代码写入临时文件(使用相对路径)
  520. const tempScriptPath = 'temp_extract_chat_history.py';
  521. const tempScriptAbsolutePath = join(projectRoot, tempScriptPath);
  522. await writeFile(tempScriptAbsolutePath, pythonCode, 'utf8');
  523. // 构建命令(使用相对路径)
  524. // 如果传入的是绝对路径,转换为相对路径
  525. let relativeScreenshotPath = screenshotPath;
  526. if (isAbsolute(screenshotPath)) {
  527. try {
  528. relativeScreenshotPath = require('path').relative(projectRoot, screenshotPath).replace(/\\/g, '/');
  529. } catch (e) {
  530. relativeScreenshotPath = screenshotPath.replace(/\\/g, '/');
  531. }
  532. } else {
  533. relativeScreenshotPath = screenshotPath.replace(/\\/g, '/');
  534. }
  535. let friendAvatarArg = 'None';
  536. if (friendAvatarPath) {
  537. if (isAbsolute(friendAvatarPath)) {
  538. try {
  539. friendAvatarArg = require('path').relative(projectRoot, friendAvatarPath).replace(/\\/g, '/');
  540. } catch (e) {
  541. friendAvatarArg = friendAvatarPath.replace(/\\/g, '/');
  542. }
  543. } else {
  544. friendAvatarArg = friendAvatarPath.replace(/\\/g, '/');
  545. }
  546. }
  547. let myAvatarArg = 'None';
  548. if (myAvatarPath) {
  549. if (isAbsolute(myAvatarPath)) {
  550. try {
  551. myAvatarArg = require('path').relative(projectRoot, myAvatarPath).replace(/\\/g, '/');
  552. } catch (e) {
  553. myAvatarArg = myAvatarPath.replace(/\\/g, '/');
  554. }
  555. } else {
  556. myAvatarArg = myAvatarPath.replace(/\\/g, '/');
  557. }
  558. }
  559. let workflowFolderArg = 'None';
  560. if (workflowFolder) {
  561. if (isAbsolute(workflowFolder)) {
  562. try {
  563. workflowFolderArg = require('path').relative(projectRoot, workflowFolder).replace(/\\/g, '/');
  564. } catch (e) {
  565. workflowFolderArg = workflowFolder.replace(/\\/g, '/');
  566. }
  567. } else {
  568. workflowFolderArg = workflowFolder.replace(/\\/g, '/');
  569. }
  570. }
  571. // 传递区域参数(如果提供)
  572. let regionArg = 'None';
  573. if (regionJson && regionJson !== 'None') {
  574. regionArg = regionJson.replace(/"/g, '\\"');
  575. }
  576. // 传递RGB参数(如果提供)
  577. let friendRgbArg = 'None';
  578. if (friendRgb && typeof friendRgb === 'string') {
  579. friendRgbArg = friendRgb;
  580. }
  581. let myRgbArg = 'None';
  582. if (myRgb && typeof myRgb === 'string') {
  583. myRgbArg = myRgb;
  584. }
  585. // 使用绝对路径的 Python 命令(用引号包裹,确保路径中的空格被正确处理),传递给 Python 的参数使用相对路径
  586. const command = `"${pythonCommand}" "${tempScriptPath}" "${relativeScreenshotPath}" "${friendAvatarArg}" "${myAvatarArg}" ${deviceWidth || 1080} ${deviceHeight || 2400} "${workflowFolderArg}" "${regionArg}" "${friendRgbArg}" "${myRgbArg}"`;
  587. const env = {
  588. ...process.env,
  589. DISABLE_MODEL_SOURCE_CHECK: 'True'
  590. };
  591. const { stdout, stderr } = await execAsync(command, {
  592. timeout: 60000,
  593. maxBuffer: 10 * 1024 * 1024,
  594. cwd: projectRoot, // 设置工作目录为项目根目录,这样相对路径才能正确解析
  595. encoding: 'utf8',
  596. env: {
  597. ...env,
  598. PYTHONIOENCODING: 'utf-8',
  599. PYTHONUTF8: '1',
  600. // 设置虚拟环境相关环境变量
  601. VIRTUAL_ENV: venvAbsolutePath,
  602. // 将虚拟环境的 Scripts 目录添加到 PATH 前面,确保使用虚拟环境中的工具
  603. PATH: `${venvScriptsAbsolutePath};${process.env.PATH}`
  604. }
  605. });
  606. // 忽略 Python 脚本的 stderr 输出
  607. // 清理临时文件
  608. try {
  609. await import('fs/promises').then(fs => fs.unlink(tempScriptPath));
  610. } catch (e) {
  611. // 忽略删除失败
  612. }
  613. // 解析输出
  614. const cleanStdout = stdout.replace(/\[33m.*?\[0m/g, '').replace(/DeprecationWarning.*?\n/g, '');
  615. try {
  616. const result = JSON.parse(cleanStdout.trim());
  617. return result;
  618. } catch (parseError) {
  619. return { success: false, error: `解析聊天记录失败: ${parseError.message}` };
  620. }
  621. } catch (error) {
  622. if (error.message && error.message.includes('timeout')) {
  623. return { success: false, error: '提取聊天记录超时,请检查网络连接或稍后重试' };
  624. }
  625. return { success: false, error: error.message };
  626. }
  627. }
  628. /**
  629. * 获取最后一条消息
  630. */
  631. export async function getLastMessage(screenshotPath, friendAvatarPath, myAvatarPath, deviceWidth, deviceHeight) {
  632. try {
  633. // 先提取完整聊天记录
  634. const result = await extractChatHistory(screenshotPath, friendAvatarPath, myAvatarPath, deviceWidth, deviceHeight, null);
  635. if (!result.success || !result.messages || result.messages.length === 0) {
  636. return { success: false, error: '未找到消息' };
  637. }
  638. // 获取最后一条消息(y 坐标最大的)
  639. const lastMessage = result.messages.reduce((max, msg) => msg.y > max.y ? msg : max, result.messages[0]);
  640. return {
  641. success: true,
  642. text: lastMessage.text,
  643. sender: lastMessage.sender,
  644. position: { y: lastMessage.y }
  645. };
  646. } catch (error) {
  647. return { success: false, error: error.message };
  648. }
  649. }
  650. /**
  651. * 全屏 OCR 识别
  652. */
  653. export async function ocrFullScreen(screenshotPath, deviceWidth, deviceHeight) {
  654. try {
  655. // 使用相对路径(相对于项目根目录)
  656. const projectRoot = join(__dirname, '..', '..');
  657. // 优先使用项目下的便携版 Python
  658. let pythonExeAbsolutePath = join(projectRoot, 'python', 'python.exe');
  659. let isPortablePython = false;
  660. // 如果便携版 Python 不存在,回退到虚拟环境
  661. if (!existsSync(pythonExeAbsolutePath)) {
  662. isPortablePython = false;
  663. // 确保 pyvenv.cfg 使用当前系统的 Python 路径
  664. await ensurePyvenvConfig(projectRoot);
  665. pythonExeAbsolutePath = join(projectRoot, 'py', 'venv', 'Scripts', 'python.exe');
  666. } else {
  667. isPortablePython = true;
  668. }
  669. // 虚拟环境路径(用于设置环境变量)
  670. const venvAbsolutePath = isPortablePython ? join(projectRoot, 'python') : join(projectRoot, 'py', 'venv');
  671. const venvScriptsAbsolutePath = isPortablePython ? join(projectRoot, 'python', 'Scripts') : join(projectRoot, 'py', 'venv', 'Scripts');
  672. // 使用绝对路径执行 Python 命令(确保路径正确)
  673. const pythonCommand = pythonExeAbsolutePath;
  674. const pythonCode = `
  675. import sys
  676. import os
  677. import cv2
  678. import numpy as np
  679. import json
  680. # OnnxOCR 已通过 pip 安装到虚拟环境中,直接导入即可
  681. from onnxocr.onnx_paddleocr import ONNXPaddleOcr
  682. # 设置环境变量
  683. os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
  684. def read_image_safe(image_path):
  685. abs_path = os.path.abspath(str(image_path))
  686. try:
  687. with open(abs_path, 'rb') as f:
  688. image_data = f.read()
  689. img_array = np.frombuffer(image_data, np.uint8)
  690. img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
  691. if img is None:
  692. raise ValueError(f"cv2.imdecode 无法解码图片: {abs_path}")
  693. return img
  694. except FileNotFoundError:
  695. raise FileNotFoundError(f"图片文件不存在: {abs_path}")
  696. except Exception as e:
  697. raise Exception(f"读取图片失败: {abs_path}, 错误: {str(e)}")
  698. # 主逻辑
  699. if __name__ == '__main__':
  700. screenshot_path = sys.argv[1]
  701. try:
  702. screenshot = read_image_safe(screenshot_path)
  703. if screenshot is None:
  704. print(json.dumps({'success': False, 'error': '无法读取截图文件'}, ensure_ascii=False))
  705. sys.exit(1)
  706. ocr = ONNXPaddleOcr(use_angle_cls=False, use_gpu=True)
  707. ocr_result = ocr.ocr(screenshot, cls=False)
  708. if not ocr_result or not ocr_result[0]:
  709. print(json.dumps({'success': False, 'error': 'OCR 识别失败'}, ensure_ascii=False))
  710. sys.exit(1)
  711. # 提取所有文本
  712. texts = []
  713. for line in ocr_result[0]:
  714. if line and len(line) > 1:
  715. text = line[1][0] if isinstance(line[1], (list, tuple)) else str(line[1])
  716. if text:
  717. texts.append(text)
  718. full_text = '\\n'.join(texts)
  719. print(json.dumps({
  720. 'success': True,
  721. 'text': full_text,
  722. 'position': None
  723. }, ensure_ascii=False))
  724. except Exception as e:
  725. print(json.dumps({'success': False, 'error': f'OCR 识别失败: {str(e)}'}, ensure_ascii=False))
  726. sys.exit(1)
  727. `;
  728. // 将 Python 代码写入临时文件
  729. const tempScriptPath = join(__dirname, '..', '..', 'temp_ocr_full_screen.py');
  730. await writeFile(tempScriptPath, pythonCode, 'utf8');
  731. const normalizedScreenshotPath = screenshotPath.replace(/\\/g, '/');
  732. // 使用绝对路径的 Python 命令(用引号包裹,确保路径中的空格被正确处理)
  733. const command = `"${pythonCommand}" "${tempScriptPath}" "${normalizedScreenshotPath}"`;
  734. const env = {
  735. ...process.env,
  736. DISABLE_MODEL_SOURCE_CHECK: 'True'
  737. };
  738. const { stdout, stderr } = await execAsync(command, {
  739. timeout: 60000,
  740. maxBuffer: 10 * 1024 * 1024,
  741. cwd: projectRoot, // 设置工作目录为项目根目录,这样相对路径才能正确解析
  742. encoding: 'utf8',
  743. env: {
  744. ...env,
  745. PYTHONIOENCODING: 'utf-8',
  746. PYTHONUTF8: '1',
  747. // 设置虚拟环境相关环境变量
  748. VIRTUAL_ENV: venvAbsolutePath,
  749. // 将虚拟环境的 Scripts 目录添加到 PATH 前面,确保使用虚拟环境中的工具
  750. PATH: `${venvScriptsAbsolutePath};${process.env.PATH}`
  751. }
  752. });
  753. // 清理临时文件
  754. try {
  755. await import('fs/promises').then(fs => fs.unlink(tempScriptPath));
  756. } catch (e) {
  757. // 忽略删除失败
  758. }
  759. const cleanStdout = stdout.replace(/\[33m.*?\[0m/g, '').replace(/DeprecationWarning.*?\n/g, '');
  760. try {
  761. const result = JSON.parse(cleanStdout.trim());
  762. return result;
  763. } catch (parseError) {
  764. return { success: false, error: `解析失败: ${parseError.message}` };
  765. }
  766. } catch (error) {
  767. return { success: false, error: error.message };
  768. }
  769. }