_common_args.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from paddlex.inference import PaddlePredictorOption
  15. from paddlex.utils.device import get_default_device, parse_device
  16. from ._constants import (
  17. DEFAULT_CPU_THREADS,
  18. DEFAULT_DEVICE,
  19. DEFAULT_ENABLE_MKLDNN,
  20. DEFAULT_MKLDNN_CACHE_CAPACITY,
  21. DEFAULT_PRECISION,
  22. DEFAULT_USE_TENSORRT,
  23. SUPPORTED_PRECISION_LIST,
  24. DEFAULT_USE_CINN,
  25. )
  26. from ._utils.cli import str2bool
  27. def parse_common_args(kwargs, *, default_enable_hpi):
  28. default_vals = {
  29. "device": DEFAULT_DEVICE,
  30. "enable_hpi": default_enable_hpi,
  31. "use_tensorrt": DEFAULT_USE_TENSORRT,
  32. "precision": DEFAULT_PRECISION,
  33. "enable_mkldnn": DEFAULT_ENABLE_MKLDNN,
  34. "mkldnn_cache_capacity": DEFAULT_MKLDNN_CACHE_CAPACITY,
  35. "cpu_threads": DEFAULT_CPU_THREADS,
  36. "enable_cinn": DEFAULT_USE_CINN,
  37. }
  38. unknown_names = kwargs.keys() - default_vals.keys()
  39. for name in unknown_names:
  40. raise ValueError(f"Unknown argument: {name}")
  41. kwargs = {**default_vals, **kwargs}
  42. if kwargs["precision"] not in SUPPORTED_PRECISION_LIST:
  43. raise ValueError(
  44. f"Invalid precision: {kwargs['precision']}. Supported values are: {SUPPORTED_PRECISION_LIST}."
  45. )
  46. kwargs["use_pptrt"] = kwargs.pop("use_tensorrt")
  47. kwargs["pptrt_precision"] = kwargs.pop("precision")
  48. return kwargs
  49. def prepare_common_init_args(model_name, common_args):
  50. device = common_args["device"]
  51. if device is None:
  52. device = get_default_device()
  53. device_type, _ = parse_device(device)
  54. init_kwargs = {}
  55. init_kwargs["device"] = device
  56. init_kwargs["use_hpip"] = common_args["enable_hpi"]
  57. pp_option = PaddlePredictorOption()
  58. if device_type == "gpu":
  59. if common_args["use_pptrt"]:
  60. if common_args["pptrt_precision"] == "fp32":
  61. pp_option.run_mode = "trt_fp32"
  62. else:
  63. assert common_args["pptrt_precision"] == "fp16", common_args[
  64. "pptrt_precision"
  65. ]
  66. pp_option.run_mode = "trt_fp16"
  67. else:
  68. pp_option.run_mode = "paddle"
  69. elif device_type == "cpu":
  70. enable_mkldnn = common_args["enable_mkldnn"]
  71. if enable_mkldnn:
  72. pp_option.mkldnn_cache_capacity = common_args["mkldnn_cache_capacity"]
  73. else:
  74. pp_option.run_mode = "paddle"
  75. pp_option.cpu_threads = common_args["cpu_threads"]
  76. else:
  77. pp_option.run_mode = "paddle"
  78. pp_option.enable_cinn = common_args["enable_cinn"]
  79. init_kwargs["pp_option"] = pp_option
  80. return init_kwargs
  81. def add_common_cli_opts(parser, *, default_enable_hpi, allow_multiple_devices):
  82. if allow_multiple_devices:
  83. help_ = "Device(s) to use for inference, e.g., `cpu`, `gpu`, `npu`, `gpu:0`, `gpu:0,1`. If multiple devices are specified, inference will be performed in parallel. Note that parallel inference is not always supported. By default, GPU 0 will be used if available; otherwise, the CPU will be used."
  84. else:
  85. help_ = "Device to use for inference, e.g., `cpu`, `gpu`, `npu`, `gpu:0`. By default, GPU 0 will be used if available; otherwise, the CPU will be used."
  86. parser.add_argument(
  87. "--device",
  88. type=str,
  89. default=DEFAULT_DEVICE,
  90. help=help_,
  91. )
  92. parser.add_argument(
  93. "--enable_hpi",
  94. type=str2bool,
  95. default=default_enable_hpi,
  96. help="Enable the high performance inference.",
  97. )
  98. parser.add_argument(
  99. "--use_tensorrt",
  100. type=str2bool,
  101. default=DEFAULT_USE_TENSORRT,
  102. help="Whether to use the Paddle Inference TensorRT subgraph engine. If the model does not support TensorRT acceleration, even if this flag is set, acceleration will not be used.",
  103. )
  104. parser.add_argument(
  105. "--precision",
  106. type=str,
  107. default=DEFAULT_PRECISION,
  108. choices=SUPPORTED_PRECISION_LIST,
  109. help="Precision for TensorRT when using the Paddle Inference TensorRT subgraph engine.",
  110. )
  111. parser.add_argument(
  112. "--enable_mkldnn",
  113. type=str2bool,
  114. default=DEFAULT_ENABLE_MKLDNN,
  115. help="Enable MKL-DNN acceleration for inference. If MKL-DNN is unavailable or the model does not support it, acceleration will not be used even if this flag is set.",
  116. )
  117. parser.add_argument(
  118. "--mkldnn_cache_capacity",
  119. type=int,
  120. default=DEFAULT_MKLDNN_CACHE_CAPACITY,
  121. help="MKL-DNN cache capacity.",
  122. )
  123. parser.add_argument(
  124. "--cpu_threads",
  125. type=int,
  126. default=DEFAULT_CPU_THREADS,
  127. help="Number of threads to use for inference on CPUs.",
  128. )
  129. parser.add_argument(
  130. "--enable_cinn",
  131. type=str2bool,
  132. default=DEFAULT_USE_CINN,
  133. help="Whether to use the CINN compiler.",
  134. )