merge_images_for_gpt.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. # -*- coding: utf-8 -*-
  2. """
  3. 图片拼接工具 - 针对GPT优化
  4. 将多张图片缩放后拼接,生成GPT可读取的最大尺寸图片
  5. """
  6. import os
  7. import sys
  8. import json
  9. from pathlib import Path
  10. from PIL import Image
  11. # GPT图片尺寸限制(根据实际API限制调整)
  12. # 为了减小base64编码后的体积,使用较小的尺寸
  13. # 2048x2048 对于黑白漫画来说足够清晰,AI可以识别文字和内容
  14. # 这样可以显著减小文件大小(约减少75%的体积)
  15. GPT_MAX_WIDTH = 2048
  16. GPT_MAX_HEIGHT = 2048
  17. GPT_MAX_PIXELS = GPT_MAX_WIDTH * GPT_MAX_HEIGHT # 约4.2MP
  18. # 为了保证文字清晰,单张图片的最小尺寸(针对黑白漫画优化)
  19. MIN_IMAGE_WIDTH = 600 # 最小宽度,保证文字可读(黑白漫画可以稍小)
  20. MIN_IMAGE_HEIGHT = 450 # 最小高度
  21. def calculate_grid_layout(images, max_width=GPT_MAX_WIDTH, max_height=GPT_MAX_HEIGHT):
  22. """
  23. 计算网格布局参数,最大化利用空间(确保单张网格图不超过限制)
  24. 参数:
  25. images: 图片列表
  26. max_width: 最大宽度
  27. max_height: 最大高度
  28. 返回:
  29. (cols, rows, scale_factor, single_width, single_height)
  30. """
  31. if not images:
  32. return 1, 1, 1.0, 0, 0
  33. # 获取单张图片的尺寸(假设所有图片尺寸相同)
  34. single_width = images[0].width
  35. single_height = images[0].height
  36. # 尝试不同的行列组合,找到能放最多图片且不超过限制的组合
  37. best_cols = 1
  38. best_rows = 1
  39. best_scale = 1.0
  40. max_images_per_grid = 0
  41. # 遍历可能的列数(从1开始,最多到能放下的最大列数)
  42. max_possible_cols = min(int(max_width / MIN_IMAGE_WIDTH), len(images))
  43. for cols in range(1, max_possible_cols + 1):
  44. # 计算需要的宽度
  45. needed_width = single_width * cols
  46. # 如果宽度超过限制,需要缩放
  47. if needed_width > max_width:
  48. scale_w = max_width / needed_width
  49. else:
  50. scale_w = 1.0
  51. # 计算缩放后单张图片的宽度
  52. scaled_single_width = single_width * scale_w
  53. # 确保缩放后宽度不小于最小值
  54. if scaled_single_width < MIN_IMAGE_WIDTH:
  55. continue
  56. # 计算缩放后单张图片的高度(保持宽高比)
  57. scaled_single_height = single_height * scale_w
  58. # 计算能放多少行
  59. max_rows = int(max_height / scaled_single_height)
  60. if max_rows < 1:
  61. continue
  62. # 计算这个布局能放多少张图片
  63. images_per_grid = cols * max_rows
  64. # 如果这个布局能放更多图片,记录它
  65. if images_per_grid > max_images_per_grid:
  66. max_images_per_grid = images_per_grid
  67. best_cols = cols
  68. best_rows = max_rows
  69. best_scale = scale_w
  70. # 如果没找到合适的,使用保守方案
  71. if max_images_per_grid == 0:
  72. # 计算最小缩放因子,确保单张图片能放下
  73. scale_w = min(max_width / single_width, max_height / single_height)
  74. # 确保缩放后不小于最小值
  75. if single_width * scale_w < MIN_IMAGE_WIDTH:
  76. scale_w = MIN_IMAGE_WIDTH / single_width
  77. if single_height * scale_w < MIN_IMAGE_HEIGHT:
  78. scale_w = MIN_IMAGE_HEIGHT / single_height
  79. best_scale = scale_w
  80. scaled_single_width = single_width * best_scale
  81. scaled_single_height = single_height * best_scale
  82. best_cols = max(1, int(max_width / scaled_single_width))
  83. best_rows = max(1, int(max_height / scaled_single_height))
  84. max_images_per_grid = best_cols * best_rows
  85. return best_cols, best_rows, best_scale, single_width, single_height
  86. def resize_image(img, scale_factor):
  87. """缩放图片"""
  88. # 如果缩放因子大于等于1.0,不放大(只缩小)
  89. if scale_factor >= 1.0:
  90. return img
  91. new_width = int(img.width * scale_factor)
  92. new_height = int(img.height * scale_factor)
  93. # 确保尺寸至少为1
  94. new_width = max(1, new_width)
  95. new_height = max(1, new_height)
  96. # 使用高质量缩放算法(LANCZOS)
  97. return img.resize((new_width, new_height), Image.Resampling.LANCZOS)
  98. def merge_images_for_gpt(image_dir, output_path, max_width=GPT_MAX_WIDTH, max_height=GPT_MAX_HEIGHT):
  99. """
  100. 将目录下的所有图片缩放后拼接,生成GPT可读取的图片
  101. 参数:
  102. image_dir: 图片目录(包含单页图片)
  103. output_path: 输出文件路径
  104. max_width: 单张图片最大宽度
  105. max_height: 单张图片最大高度
  106. """
  107. image_dir = Path(image_dir)
  108. if not image_dir.exists():
  109. print(f"❌ 错误:目录不存在 - {image_dir}")
  110. return False
  111. # 获取所有图片文件(按文件名排序)
  112. image_files = sorted(image_dir.glob("*.jpeg")) + sorted(image_dir.glob("*.jpg")) + \
  113. sorted(image_dir.glob("*.png"))
  114. # 过滤掉已拼接的图片
  115. image_files = [f for f in image_files if 'merged' not in f.name.lower() and 'part' not in f.name.lower()]
  116. if not image_files:
  117. print(f"❌ 在 {image_dir} 中未找到图片文件")
  118. return False
  119. print(f"📚 找到 {len(image_files)} 张图片")
  120. print(f"📁 图片目录: {image_dir}")
  121. print(f"📁 输出路径: {output_path}")
  122. print(f"🖼️ GPT最大尺寸: {max_width} x {max_height} 像素")
  123. print("-" * 60)
  124. # 加载所有图片
  125. print("📖 正在加载图片...")
  126. images = []
  127. for i, img_path in enumerate(image_files, 1):
  128. try:
  129. img = Image.open(img_path)
  130. # 转换为RGB模式(如果是灰度,保持灰度)
  131. if img.mode not in ('L', 'RGB'):
  132. img = img.convert('RGB')
  133. images.append(img)
  134. if i % 50 == 0:
  135. print(f" 已加载 {i}/{len(image_files)} 张...")
  136. except Exception as e:
  137. print(f" ⚠️ 跳过 {img_path.name}: {e}")
  138. if not images:
  139. print("❌ 没有成功加载的图片")
  140. return False
  141. print(f"✅ 成功加载 {len(images)} 张图片")
  142. # 检查所有图片尺寸是否一致
  143. first_width = images[0].width
  144. first_height = images[0].height
  145. all_same_size = all(img.width == first_width and img.height == first_height for img in images)
  146. if not all_same_size:
  147. print(f"⚠️ 警告:图片尺寸不一致,将使用第一张图片的尺寸作为基准")
  148. print(f" 第一张: {first_width}x{first_height}, 其他图片将被缩放")
  149. print(f"📊 单张图片尺寸: {first_width} x {first_height} 像素")
  150. # 计算最佳网格布局
  151. print("🔍 正在计算最佳网格布局(从右到左、从上到下,日式漫画阅读顺序)...")
  152. cols, rows, scale_factor, single_width, single_height = calculate_grid_layout(images, max_width, max_height)
  153. scaled_single_width = int(single_width * scale_factor)
  154. scaled_single_height = int(single_height * scale_factor)
  155. grid_width = scaled_single_width * cols
  156. grid_height = scaled_single_height * rows
  157. print(f"📐 布局方案: {cols}列 x {rows}行")
  158. print(f"📉 缩放比例: {scale_factor:.2%}")
  159. print(f"📊 单张图片缩放后: {scaled_single_width} x {scaled_single_height} 像素")
  160. print(f"📊 网格总尺寸: {grid_width} x {grid_height} 像素 ({grid_width * grid_height/1e6:.2f} MP)")
  161. print(f"📈 每张网格图可容纳: {cols * rows} 张原图")
  162. # 缩放所有图片
  163. print("🔄 正在缩放图片...")
  164. scaled_images = []
  165. for i, img in enumerate(images, 1):
  166. # 如果尺寸不一致,需要缩放
  167. if img.width != single_width or img.height != single_height:
  168. # 先缩放到标准尺寸
  169. img = img.resize((single_width, single_height), Image.Resampling.LANCZOS)
  170. # 应用缩放因子
  171. scaled_img = resize_image(img, scale_factor)
  172. scaled_images.append(scaled_img)
  173. if i % 50 == 0:
  174. print(f" 已缩放 {i}/{len(images)} 张...")
  175. # 创建输出子文件夹
  176. output_dir = Path(output_path).parent
  177. # 如果output_path已经在gpt_merged文件夹中,需要回到上一级
  178. if output_dir.name == "gpt_merged":
  179. output_dir = output_dir.parent
  180. output_folder_name = "gpt_merged"
  181. output_folder = output_dir / output_folder_name
  182. output_folder.mkdir(parents=True, exist_ok=True)
  183. output_stem = "gpt_merged"
  184. output_suffix = Path(output_path).suffix if Path(output_path).suffix else ".jpg"
  185. # 计算需要多少张网格图
  186. images_per_grid = cols * rows
  187. total_grids = (len(scaled_images) + images_per_grid - 1) // images_per_grid
  188. print(f"\n📦 需要生成 {total_grids} 张网格图片(每张包含最多 {images_per_grid} 张原图)")
  189. merged_files = []
  190. for grid_num in range(total_grids):
  191. start_idx = grid_num * images_per_grid
  192. end_idx = min(start_idx + images_per_grid, len(scaled_images))
  193. grid_images = scaled_images[start_idx:end_idx]
  194. # 计算当前网格的实际行数
  195. current_rows = (len(grid_images) + cols - 1) // cols
  196. current_grid_height = scaled_single_height * current_rows
  197. # 创建网格图(从右到左、从上到下排列,符合日式漫画阅读顺序)
  198. print(f"\n🔗 正在生成第 {grid_num + 1}/{total_grids} 张网格图({cols}列 x {current_rows}行,包含{len(grid_images)}张原图,从右到左排列)...")
  199. grid_image = Image.new('L' if grid_images[0].mode == 'L' else 'RGB', (grid_width, current_grid_height))
  200. for idx, img in enumerate(grid_images):
  201. row = idx // cols
  202. # 从右到左排列(日式漫画阅读顺序):第一张图片在最右边,然后向左排列
  203. col = (cols - 1) - (idx % cols)
  204. x = col * scaled_single_width
  205. y = row * scaled_single_height
  206. grid_image.paste(img, (x, y))
  207. if (idx + 1) % 50 == 0 or (idx + 1) == len(grid_images):
  208. print(f" 已拼接 {idx + 1}/{len(grid_images)} 张...")
  209. # 如果是最后一张图片且图片数量不足以填满整行,裁剪掉左侧空白
  210. is_last_grid = (grid_num == total_grids - 1)
  211. current_grid_width = grid_width
  212. if is_last_grid and len(grid_images) % cols != 0:
  213. # 由于是从右到左排列,空白在左侧
  214. # 计算最后一行的实际列数
  215. actual_cols_in_last_row = len(grid_images) % cols
  216. # 从右到左排列时,最后一行的图片在右侧,左侧是空白
  217. # 需要裁剪掉的左侧空白宽度 = (cols - actual_cols_in_last_row) * scaled_single_width
  218. left_blank_width = (cols - actual_cols_in_last_row) * scaled_single_width
  219. if left_blank_width > 0 and left_blank_width < grid_width:
  220. # 裁剪掉左侧空白
  221. actual_width = grid_width - left_blank_width
  222. cropped_image = grid_image.crop((left_blank_width, 0, grid_width, current_grid_height))
  223. grid_image = cropped_image
  224. current_grid_width = actual_width
  225. print(f" ✂️ 裁剪左侧空白: {left_blank_width}px,实际尺寸: {current_grid_width}x{current_grid_height}")
  226. # 保存
  227. if total_grids == 1:
  228. part_path = output_folder / f"{output_stem}{output_suffix}"
  229. else:
  230. part_path = output_folder / f"{output_stem}_part{grid_num + 1:02d}{output_suffix}"
  231. grid_image.save(part_path, 'JPEG', quality=85, optimize=True, progressive=True)
  232. file_size = part_path.stat().st_size / 1024 / 1024
  233. merged_files.append(part_path.name)
  234. print(f" ✅ 已保存: {part_path.name} ({current_grid_width}x{current_grid_height}, {file_size:.2f}MB)")
  235. print(f"\n✅ 拼接完成!共生成 {len(merged_files)} 张网格图片")
  236. return merged_files
  237. def main():
  238. """主函数"""
  239. # 获取项目根目录
  240. project_root = Path(__file__).parent.parent
  241. if len(sys.argv) > 1:
  242. # 如果提供了参数,使用指定的目录
  243. image_dir = Path(sys.argv[1])
  244. if len(sys.argv) > 2:
  245. output_path = Path(sys.argv[2])
  246. else:
  247. # 在图片目录下创建gpt_merged子文件夹
  248. output_folder = image_dir / "gpt_merged"
  249. output_folder.mkdir(parents=True, exist_ok=True)
  250. output_path = output_folder / "gpt_merged.jpg"
  251. else:
  252. # 默认使用漫画图片目录
  253. image_dir = project_root / "static" / "漫画" / "image" / "金-田-一-少-年-之-事-件-簿-日-文-版-第001卷"
  254. # 输出到子文件夹
  255. output_folder = image_dir / "gpt_merged"
  256. output_folder.mkdir(parents=True, exist_ok=True)
  257. output_path = output_folder / "gpt_merged.jpg"
  258. print("📖 图片拼接工具 - GPT优化版")
  259. print("=" * 60)
  260. result = merge_images_for_gpt(image_dir, output_path)
  261. if result:
  262. print("\n" + "=" * 60)
  263. print("✅ 处理完成!")
  264. if isinstance(result, list):
  265. print(f"📊 共生成 {len(result)} 张图片")
  266. for name in result:
  267. print(f" - {name}")
  268. else:
  269. print(f"📊 生成图片: {result}")
  270. else:
  271. print("\n❌ 处理失败")
  272. if __name__ == "__main__":
  273. main()