_text_detection.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. class TextDetectionMixin:
  15. def __init__(
  16. self,
  17. *,
  18. limit_side_len=None,
  19. limit_type=None,
  20. thresh=None,
  21. box_thresh=None,
  22. unclip_ratio=None,
  23. input_shape=None,
  24. **kwargs,
  25. ):
  26. self._extra_init_args = {
  27. "limit_side_len": limit_side_len,
  28. "limit_type": limit_type,
  29. "thresh": thresh,
  30. "box_thresh": box_thresh,
  31. "unclip_ratio": unclip_ratio,
  32. "input_shape": input_shape,
  33. }
  34. super().__init__(**kwargs)
  35. def _get_extra_paddlex_predictor_init_args(self):
  36. return self._extra_init_args
  37. class TextDetectionSubcommandExecutorMixin:
  38. def _add_text_detection_args(self, subparser):
  39. subparser.add_argument(
  40. "--limit_side_len",
  41. type=int,
  42. help="This sets a limit on the side length of the input image for the model.",
  43. )
  44. subparser.add_argument(
  45. "--limit_type",
  46. type=str,
  47. help="This determines how the side length limit is applied to the input image before feeding it into the model.",
  48. )
  49. subparser.add_argument(
  50. "--thresh",
  51. type=float,
  52. help="Detection pixel threshold for the model. Pixels with scores greater than this threshold in the output probability map are considered text pixels.",
  53. )
  54. subparser.add_argument(
  55. "--box_thresh",
  56. type=float,
  57. help="Detection box threshold for the model. A detection result is considered a text region if the average score of all pixels within the border of the result is greater than this threshold.",
  58. )
  59. subparser.add_argument(
  60. "--unclip_ratio",
  61. type=float,
  62. help="Expansion coefficient, which expands the text region using this method. The larger the value, the larger the expansion area.",
  63. )
  64. subparser.add_argument(
  65. "--input_shape",
  66. nargs=3,
  67. type=int,
  68. metavar=("C", "H", "W"),
  69. help="Input shape of the model.",
  70. )