seal_recognition.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from .._utils.cli import (
  15. add_simple_inference_args,
  16. get_subcommand_args,
  17. perform_simple_inference,
  18. str2bool,
  19. )
  20. from .base import PaddleXPipelineWrapper, PipelineCLISubcommandExecutor
  21. from .utils import create_config_from_structure
  22. class SealRecognition(PaddleXPipelineWrapper):
  23. def __init__(
  24. self,
  25. doc_orientation_classify_model_name=None,
  26. doc_orientation_classify_model_dir=None,
  27. doc_unwarping_model_name=None,
  28. doc_unwarping_model_dir=None,
  29. layout_detection_model_name=None,
  30. layout_detection_model_dir=None,
  31. seal_text_detection_model_name=None,
  32. seal_text_detection_model_dir=None,
  33. text_recognition_model_name=None,
  34. text_recognition_model_dir=None,
  35. text_recognition_batch_size=None,
  36. use_doc_orientation_classify=None,
  37. use_doc_unwarping=None,
  38. use_layout_detection=None,
  39. layout_threshold=None,
  40. layout_nms=None,
  41. layout_unclip_ratio=None,
  42. layout_merge_bboxes_mode=None,
  43. seal_det_limit_side_len=None,
  44. seal_det_limit_type=None,
  45. seal_det_thresh=None,
  46. seal_det_box_thresh=None,
  47. seal_det_unclip_ratio=None,
  48. seal_rec_score_thresh=None,
  49. **kwargs,
  50. ):
  51. self._params = {
  52. "doc_orientation_classify_model_name": doc_orientation_classify_model_name,
  53. "doc_orientation_classify_model_dir": doc_orientation_classify_model_dir,
  54. "doc_unwarping_model_name": doc_unwarping_model_name,
  55. "doc_unwarping_model_dir": doc_unwarping_model_dir,
  56. "layout_detection_model_name": layout_detection_model_name,
  57. "layout_detection_model_dir": layout_detection_model_dir,
  58. "seal_text_detection_model_name": seal_text_detection_model_name,
  59. "seal_text_detection_model_dir": seal_text_detection_model_dir,
  60. "text_recognition_model_name": text_recognition_model_name,
  61. "text_recognition_model_dir": text_recognition_model_dir,
  62. "text_recognition_batch_size": text_recognition_batch_size,
  63. "use_doc_orientation_classify": use_doc_orientation_classify,
  64. "use_doc_unwarping": use_doc_unwarping,
  65. "use_layout_detection": use_layout_detection,
  66. "layout_threshold": layout_threshold,
  67. "layout_nms": layout_nms,
  68. "layout_unclip_ratio": layout_unclip_ratio,
  69. "layout_merge_bboxes_mode": layout_merge_bboxes_mode,
  70. "seal_det_limit_side_len": seal_det_limit_side_len,
  71. "seal_det_limit_type": seal_det_limit_type,
  72. "seal_det_thresh": seal_det_thresh,
  73. "seal_det_box_thresh": seal_det_box_thresh,
  74. "seal_det_unclip_ratio": seal_det_unclip_ratio,
  75. "seal_rec_score_thresh": seal_rec_score_thresh,
  76. }
  77. super().__init__(**kwargs)
  78. @property
  79. def _paddlex_pipeline_name(self):
  80. return "seal_recognition"
  81. def predict_iter(
  82. self,
  83. input,
  84. *,
  85. use_doc_orientation_classify=None,
  86. use_doc_unwarping=None,
  87. use_layout_detection=None,
  88. layout_det_res=None,
  89. layout_threshold=None,
  90. layout_nms=None,
  91. layout_unclip_ratio=None,
  92. layout_merge_bboxes_mode=None,
  93. seal_det_limit_side_len=None,
  94. seal_det_limit_type=None,
  95. seal_det_thresh=None,
  96. seal_det_box_thresh=None,
  97. seal_det_unclip_ratio=None,
  98. seal_rec_score_thresh=None,
  99. **kwargs,
  100. ):
  101. return self.paddlex_pipeline.predict(
  102. input,
  103. use_doc_orientation_classify=use_doc_orientation_classify,
  104. use_doc_unwarping=use_doc_unwarping,
  105. use_layout_detection=use_layout_detection,
  106. layout_det_res=layout_det_res,
  107. layout_threshold=layout_threshold,
  108. layout_nms=layout_nms,
  109. layout_unclip_ratio=layout_unclip_ratio,
  110. layout_merge_bboxes_mode=layout_merge_bboxes_mode,
  111. seal_det_limit_side_len=seal_det_limit_side_len,
  112. seal_det_limit_type=seal_det_limit_type,
  113. seal_det_thresh=seal_det_thresh,
  114. seal_det_box_thresh=seal_det_box_thresh,
  115. seal_det_unclip_ratio=seal_det_unclip_ratio,
  116. seal_rec_score_thresh=seal_rec_score_thresh,
  117. **kwargs,
  118. )
  119. def predict(
  120. self,
  121. input,
  122. *,
  123. use_doc_orientation_classify=None,
  124. use_doc_unwarping=None,
  125. use_layout_detection=None,
  126. layout_det_res=None,
  127. layout_threshold=None,
  128. layout_nms=None,
  129. layout_unclip_ratio=None,
  130. layout_merge_bboxes_mode=None,
  131. seal_det_limit_side_len=None,
  132. seal_det_limit_type=None,
  133. seal_det_thresh=None,
  134. seal_det_box_thresh=None,
  135. seal_det_unclip_ratio=None,
  136. seal_rec_score_thresh=None,
  137. **kwargs,
  138. ):
  139. return list(
  140. self.predict_iter(
  141. input,
  142. use_doc_orientation_classify=use_doc_orientation_classify,
  143. use_doc_unwarping=use_doc_unwarping,
  144. use_layout_detection=use_layout_detection,
  145. layout_det_res=layout_det_res,
  146. layout_threshold=layout_threshold,
  147. layout_nms=layout_nms,
  148. layout_unclip_ratio=layout_unclip_ratio,
  149. layout_merge_bboxes_mode=layout_merge_bboxes_mode,
  150. seal_det_limit_side_len=seal_det_limit_side_len,
  151. seal_det_limit_type=seal_det_limit_type,
  152. seal_det_thresh=seal_det_thresh,
  153. seal_det_box_thresh=seal_det_box_thresh,
  154. seal_det_unclip_ratio=seal_det_unclip_ratio,
  155. seal_rec_score_thresh=seal_rec_score_thresh,
  156. **kwargs,
  157. )
  158. )
  159. @classmethod
  160. def get_cli_subcommand_executor(cls):
  161. return SealRecognitionCLISubcommandExecutor()
  162. def _get_paddlex_config_overrides(self):
  163. STRUCTURE = {
  164. "SubPipelines.DocPreprocessor.SubModules.DocOrientationClassify.model_name": self._params[
  165. "doc_orientation_classify_model_name"
  166. ],
  167. "SubPipelines.DocPreprocessor.SubModules.DocOrientationClassify.model_dir": self._params[
  168. "doc_orientation_classify_model_dir"
  169. ],
  170. "SubPipelines.DocPreprocessor.SubModules.DocUnwarping.model_name": self._params[
  171. "doc_unwarping_model_name"
  172. ],
  173. "SubPipelines.DocPreprocessor.SubModules.DocUnwarping.model_dir": self._params[
  174. "doc_unwarping_model_dir"
  175. ],
  176. "SubModules.LayoutDetection.model_name": self._params[
  177. "layout_detection_model_name"
  178. ],
  179. "SubModules.LayoutDetection.model_dir": self._params[
  180. "layout_detection_model_dir"
  181. ],
  182. "SubModules.LayoutDetection.threshold": self._params["layout_threshold"],
  183. "SubModules.LayoutDetection.layout_nms": self._params["layout_nms"],
  184. "SubModules.LayoutDetection.layout_unclip_ratio": self._params[
  185. "layout_unclip_ratio"
  186. ],
  187. "SubModules.LayoutDetection.layout_merge_bboxes_mode": self._params[
  188. "layout_merge_bboxes_mode"
  189. ],
  190. "SubPipelines.DocPreprocessor.use_doc_orientation_classify": self._params[
  191. "use_doc_orientation_classify"
  192. ],
  193. "SubPipelines.DocPreprocessor.use_doc_unwarping": self._params[
  194. "use_doc_unwarping"
  195. ],
  196. "use_doc_preprocessor": self._params["use_doc_orientation_classify"]
  197. or self._params["use_doc_unwarping"],
  198. "SubPipelines.SealOCR.SubModules.TextDetection.model_name": self._params[
  199. "seal_text_detection_model_name"
  200. ],
  201. "SubPipelines.SealOCR.SubModules.TextDetection.model_dir": self._params[
  202. "seal_text_detection_model_dir"
  203. ],
  204. "SubPipelines.SealOCR.SubModules.TextDetection.limit_side_len": self._params[
  205. "seal_det_limit_side_len"
  206. ],
  207. "SubPipelines.SealOCR.SubModules.TextDetection.limit_type": self._params[
  208. "seal_det_limit_type"
  209. ],
  210. "SubPipelines.SealOCR.SubModules.TextDetection.thresh": self._params[
  211. "seal_det_thresh"
  212. ],
  213. "SubPipelines.SealOCR.SubModules.TextDetection.box_thresh": self._params[
  214. "seal_det_box_thresh"
  215. ],
  216. "SubPipelines.SealOCR.SubModules.TextDetection.unclip_ratio": self._params[
  217. "seal_det_unclip_ratio"
  218. ],
  219. "SubPipelines.SealOCR.SubModules.TextRecognition.model_name": self._params[
  220. "text_recognition_model_name"
  221. ],
  222. "SubPipelines.SealOCR.SubModules.TextRecognition.model_dir": self._params[
  223. "text_recognition_model_dir"
  224. ],
  225. "SubPipelines.SealOCR.SubModules.TextRecognition.batch_size": self._params[
  226. "text_recognition_batch_size"
  227. ],
  228. "SubPipelines.SealOCR.SubModules.TextRecognition.score_thresh": self._params[
  229. "seal_rec_score_thresh"
  230. ],
  231. "use_layout_detection": self._params["use_layout_detection"],
  232. }
  233. return create_config_from_structure(STRUCTURE)
  234. class SealRecognitionCLISubcommandExecutor(PipelineCLISubcommandExecutor):
  235. @property
  236. def subparser_name(self):
  237. return "seal_recognition"
  238. def _update_subparser(self, subparser):
  239. add_simple_inference_args(subparser)
  240. subparser.add_argument(
  241. "--doc_orientation_classify_model_name",
  242. type=str,
  243. help="Name of the document image orientation classification model.",
  244. )
  245. subparser.add_argument(
  246. "--doc_orientation_classify_model_dir",
  247. type=str,
  248. help="Path to the document image orientation classification model directory.",
  249. )
  250. subparser.add_argument(
  251. "--doc_unwarping_model_name",
  252. type=str,
  253. help="Name of the document image unwarping model.",
  254. )
  255. subparser.add_argument(
  256. "--doc_unwarping_model_dir",
  257. type=str,
  258. help="Path to the document image unwarping model directory.",
  259. )
  260. subparser.add_argument(
  261. "--layout_detection_model_name",
  262. type=str,
  263. help="Name of the layout detection model.",
  264. )
  265. subparser.add_argument(
  266. "--layout_detection_model_dir",
  267. type=str,
  268. help="Path to the layout detection model directory.",
  269. )
  270. subparser.add_argument(
  271. "--seal_text_detection_model_name",
  272. type=str,
  273. help="Name of the seal text detection model.",
  274. )
  275. subparser.add_argument(
  276. "--seal_text_detection_model_dir",
  277. type=str,
  278. help="Path to the seal text detection model directory.",
  279. )
  280. subparser.add_argument(
  281. "--text_recognition_model_name",
  282. type=str,
  283. help="Name of the text recognition model.",
  284. )
  285. subparser.add_argument(
  286. "--text_recognition_model_dir",
  287. type=str,
  288. help="Path to the text recognition model directory.",
  289. )
  290. subparser.add_argument(
  291. "--text_recognition_batch_size",
  292. type=int,
  293. help="Batch size for the text recognition model.",
  294. )
  295. subparser.add_argument(
  296. "--use_doc_orientation_classify",
  297. type=str2bool,
  298. help="Whether to use document image orientation classification.",
  299. )
  300. subparser.add_argument(
  301. "--use_doc_unwarping",
  302. type=str2bool,
  303. help="Whether to use document image unwarping.",
  304. )
  305. subparser.add_argument(
  306. "--use_layout_detection",
  307. type=str2bool,
  308. help="Whether to use layout detection.",
  309. )
  310. subparser.add_argument(
  311. "--layout_threshold",
  312. type=float,
  313. help="Threshold for layout detection model.",
  314. )
  315. subparser.add_argument(
  316. "--layout_nms",
  317. type=str2bool,
  318. help="Non-Maximum Suppression threshold for layout detection.",
  319. )
  320. subparser.add_argument(
  321. "--layout_unclip_ratio",
  322. type=float,
  323. help="Layout detection expansion coefficient.",
  324. )
  325. subparser.add_argument(
  326. "--layout_merge_bboxes_mode",
  327. type=str,
  328. help="Mode for merging bounding boxes in layout detection.",
  329. )
  330. subparser.add_argument(
  331. "--seal_det_limit_side_len",
  332. type=int,
  333. help="This sets a limit on the side length of the input image for the seal text detection model.",
  334. )
  335. subparser.add_argument(
  336. "--seal_det_limit_type",
  337. type=str,
  338. help="This determines how the side length limit is applied to the input image before feeding it into the seal text detection model.",
  339. )
  340. subparser.add_argument(
  341. "--seal_det_thresh",
  342. type=float,
  343. help="Detection pixel threshold for the seal text detection model. Pixels with scores greater than this threshold in the output probability map are considered text pixels.",
  344. )
  345. subparser.add_argument(
  346. "--seal_det_box_thresh",
  347. type=float,
  348. help="Detection box threshold for the seal text detection model. A detection result is considered a text region if the average score of all pixels within the border of the result is greater than this threshold.",
  349. )
  350. subparser.add_argument(
  351. "--seal_det_unclip_ratio",
  352. type=float,
  353. help="Seal text detection expansion coefficient, which expands the text region using this method. The larger the value, the larger the expansion area.",
  354. )
  355. subparser.add_argument(
  356. "--seal_rec_score_thresh",
  357. type=float,
  358. help="Text recognition threshold. Text results with scores greater than this threshold are retained.",
  359. )
  360. def execute_with_args(self, args):
  361. params = get_subcommand_args(args)
  362. perform_simple_inference(SealRecognition, params)