eval_squad.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. # -------------------------------------------------------------------------
  2. # Copyright (c) Microsoft Corporation. All rights reserved.
  3. # Licensed under the MIT License.
  4. # --------------------------------------------------------------------------
  5. #
  6. # This script evaluates accuracy of ONNX models for question-answering task on SQuAD data set.
  7. # Example to evaluate raw and optimized model for CUDA in Linux:
  8. # pip3 install datasets evaluate optimum transformers onnxruntime-gpu
  9. #
  10. # python3 eval_squad.py -m bert-large-uncased-whole-word-masking-finetuned-squad -s 384 -b 1 --use_io_binding
  11. #
  12. # python3 -m onnxruntime.transformers.optimizer \
  13. # --input ./bert-large-uncased-whole-word-masking-finetuned-squad/model.onnx \
  14. # --output ./bert-large-uncased-whole-word-masking-finetuned-squad/optimized_model.onnx
  15. #
  16. # python3 eval_squad.py -m bert-large-uncased-whole-word-masking-finetuned-squad -s 384 -b 1 --use_io_binding \
  17. # --onnx ./bert-large-uncased-whole-word-masking-finetuned-squad/optimized_model.onnx
  18. #
  19. # Snippet of example output in A100:
  20. # {'exact': 86.65089877010406, 'f1': 92.99433524952254, 'total': 10570, 'HasAns_exact': 86.65089877010406
  21. # 'total_time_in_seconds': 81.69239814393222, 'samples_per_second': 129.387804008115,
  22. # 'latency_in_seconds': 0.007728703703304846, 'provider': 'CUDAExecutionProvider',
  23. # 'pretrained_model_name': 'bert-large-uncased-whole-word-masking-finetuned-squad',
  24. # 'batch_size': 1, 'sequence_length': 384, 'use_io_binding': True}
  25. import argparse
  26. import csv
  27. import os
  28. import time
  29. try:
  30. from importlib.metadata import PackageNotFoundError, version
  31. except ImportError:
  32. from importlib_metadata import PackageNotFoundError, version
  33. from pathlib import Path
  34. from typing import Any
  35. from datasets import load_dataset
  36. from evaluate import evaluator
  37. from optimum.onnxruntime import ORTModelForQuestionAnswering
  38. from optimum.version import __version__ as optimum_version
  39. from packaging import version as version_check
  40. from transformers import AutoTokenizer, pipeline
  41. if version_check.parse(optimum_version) < version_check.parse("1.13.1"):
  42. raise ImportError(f"Please install optimum>=1.13.1. Current version: {optimum_version}.")
  43. PRETRAINED_SQUAD_MODELS = [
  44. "bert-large-uncased-whole-word-masking-finetuned-squad",
  45. "deepset/roberta-base-squad2",
  46. "distilbert-base-cased-distilled-squad",
  47. ]
  48. def get_package_version(package_name: str):
  49. try:
  50. return version(package_name)
  51. except PackageNotFoundError:
  52. return None
  53. def load_onnx_model(
  54. model_id: str, onnx_path: str | None = None, provider="CUDAExecutionProvider", use_io_binding: bool = False
  55. ):
  56. """Load onnx model given pretrained model name and optional ONNX model path. If onnx_path is None,
  57. the default onnx model from optimum will be used.
  58. Args:
  59. model_id (str): pretrained model name or checkpoint path
  60. onnx_path (Optional[str], optional): path of onnx model to evaluate. Defaults to None.
  61. Returns:
  62. model: ORTModel for the onnx model
  63. onnx_path: the path of onnx model
  64. """
  65. if onnx_path is None:
  66. # Export onnx to a sub-directory named by the model id
  67. model = ORTModelForQuestionAnswering.from_pretrained(
  68. model_id, export=True, provider=provider, use_io_binding=use_io_binding
  69. )
  70. save_onnx_dir = os.path.join(".", model_id)
  71. model.save_pretrained(save_onnx_dir)
  72. onnx_path = os.path.join(save_onnx_dir, "model.onnx")
  73. print("Model is exported to onnx file:", onnx_path)
  74. else:
  75. model = ORTModelForQuestionAnswering.from_pretrained(
  76. os.path.dirname(onnx_path),
  77. file_name=Path(onnx_path).name,
  78. provider=provider,
  79. use_io_binding=use_io_binding,
  80. # provider_options={"enable_skip_layer_norm_strict_mode": True},
  81. )
  82. return model, onnx_path
  83. def output_details(results: list[dict[str, Any]], csv_filename: str):
  84. """Output a CSV file with detail of each test results.
  85. Args:
  86. results (List[Dict[str, Any]]): list of JSON results.
  87. csv_filename (str): path of output CSV file
  88. """
  89. with open(csv_filename, mode="a", newline="", encoding="ascii") as csv_file:
  90. column_names = [
  91. "pretrained_model_name",
  92. "onnx_path",
  93. "provider",
  94. "disable_fused_attention",
  95. "batch_size",
  96. "sequence_length",
  97. "use_io_binding",
  98. "exact",
  99. "f1",
  100. "total",
  101. "HasAns_exact",
  102. "HasAns_f1",
  103. "HasAns_total",
  104. "best_exact",
  105. "best_exact_thresh",
  106. "best_f1",
  107. "best_f1_thresh",
  108. "total_time_in_seconds",
  109. "samples_per_second",
  110. "latency_in_seconds",
  111. ]
  112. csv_writer = csv.DictWriter(csv_file, fieldnames=column_names)
  113. csv_writer.writeheader()
  114. for result in results:
  115. csv_writer.writerow(result)
  116. csv_file.flush()
  117. print(f"Detail results are saved to csv file: {csv_filename}")
  118. def output_summary(results: list[dict[str, Any]], csv_filename: str, metric_name: str):
  119. """Output a CSV file with summary of a metric on combinations of batch_size and sequence_length.
  120. Args:
  121. results (List[Dict[str, Any]]): list of JSON results.
  122. csv_filename (str): path of output CSV file
  123. metric_name (str): the metric to summarize
  124. """
  125. with open(csv_filename, mode="a", newline="", encoding="ascii") as csv_file:
  126. header_names = [
  127. "pretrained_model_name",
  128. "onnx_path",
  129. "provider",
  130. "disable_fused_attention",
  131. "use_io_binding",
  132. ]
  133. model_list = list({result["onnx_path"] for result in results})
  134. model_list.sort()
  135. batch_sizes = list({result["batch_size"] for result in results})
  136. batch_sizes.sort()
  137. sequence_lengths = list({result["sequence_length"] for result in results})
  138. sequence_lengths.sort()
  139. key_names = []
  140. for sequence_length in sequence_lengths:
  141. for batch_size in batch_sizes:
  142. key_names.append(f"b{batch_size}_s{sequence_length}")
  143. csv_writer = csv.DictWriter(csv_file, fieldnames=header_names + key_names)
  144. csv_writer.writeheader()
  145. for model in model_list:
  146. row = {}
  147. # Metric value for given pair of batch_size and sequence_length.
  148. # Assume that (onnx_path, batch_size and sequence_length) are unique so keep first occurrence only.
  149. values = {}
  150. values.update(dict.fromkeys(key_names, ""))
  151. for result in results:
  152. if result["onnx_path"] == model and result[metric_name]:
  153. headers = {k: v for k, v in result.items() if k in header_names}
  154. if not row:
  155. row.update(headers)
  156. batch_size = result["batch_size"]
  157. sequence_length = result["sequence_length"]
  158. key = f"b{batch_size}_s{sequence_length}"
  159. if key in key_names:
  160. values[key] = result[metric_name]
  161. if row:
  162. for key in key_names:
  163. row[key] = values.get(key, "")
  164. csv_writer.writerow(row)
  165. csv_file.flush()
  166. print(f"Summary results for {metric_name} are saved to csv file: {csv_filename}")
  167. def main():
  168. args = parse_arguments()
  169. print(args)
  170. for name in ["onnxruntime-gpu", "onnxruntime", "onnx", "torch", "transformers", "optimum", "datasets", "evaluate"]:
  171. package_version = get_package_version(name)
  172. if package_version:
  173. print(f"{name} version", package_version)
  174. pretrained_model_name = args.model_name
  175. if args.onnx and not os.path.exists(args.onnx):
  176. raise RuntimeError(f"Onnx model path does not exist: {args.onnx}")
  177. disable_fused_attention = os.environ.get("ORT_DISABLE_FUSED_ATTENTION", "0") == "1"
  178. all_results = []
  179. tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name)
  180. for sequence_length in args.sequence_lengths:
  181. tokenizer.model_max_length = sequence_length
  182. tokenizer.doc_stride = min(sequence_length // 2, 128)
  183. if args.onnx is None:
  184. print("Exporting onnx model. It might take a few minutes...")
  185. start_time = time.time()
  186. ort_model, onnx_path = load_onnx_model(pretrained_model_name, args.onnx, args.provider, args.use_io_binding)
  187. latency = time.time() - start_time
  188. print(f"Onnx model exported or loaded in {latency:.1f} seconds")
  189. print(ort_model.config)
  190. if sequence_length > ort_model.config.max_position_embeddings:
  191. raise RuntimeError("sequence length should not be larger than {ort_model.config.max_position_embeddings}")
  192. qa_pipeline = pipeline(
  193. "question-answering", model=ort_model, tokenizer=tokenizer, question_first=True, batch_size=args.batch_size
  194. )
  195. task_evaluator = evaluator("question-answering")
  196. print("Loading dataset...")
  197. start_time = time.time()
  198. squad_dataset = load_dataset("squad", split=f"validation[:{args.total}]" if args.total > 0 else "validation")
  199. latency = time.time() - start_time
  200. print(f"Dataset loaded in {latency:.1f} seconds")
  201. print("Evaluating squad_v2 with ORT. It might take a few minutes...")
  202. start_time = time.time()
  203. result = task_evaluator.compute(
  204. model_or_pipeline=qa_pipeline,
  205. data=squad_dataset,
  206. metric="squad_v2",
  207. squad_v2_format=True,
  208. )
  209. latency = time.time() - start_time
  210. print(f"Evaluation done in {latency:.1f} seconds")
  211. result["provider"] = args.provider
  212. result["disable_fused_attention"] = disable_fused_attention
  213. result["pretrained_model_name"] = pretrained_model_name
  214. result["onnx_path"] = onnx_path
  215. result["batch_size"] = args.batch_size
  216. result["sequence_length"] = sequence_length
  217. result["use_io_binding"] = args.use_io_binding
  218. print(result)
  219. all_results.append(result)
  220. output_details(all_results, "detail.csv")
  221. for metric_name in ["f1", "exact", "samples_per_second"]:
  222. output_summary(all_results, f"{metric_name}.csv", metric_name)
  223. def parse_arguments(argv=None):
  224. parser = argparse.ArgumentParser()
  225. parser.add_argument(
  226. "-m",
  227. "--model_name",
  228. required=False,
  229. type=str,
  230. default=PRETRAINED_SQUAD_MODELS[0],
  231. help=f"Checkpoint directory or pre-trained model names in the list: {PRETRAINED_SQUAD_MODELS}",
  232. )
  233. parser.add_argument(
  234. "-s",
  235. "--sequence_lengths",
  236. nargs="+",
  237. type=int,
  238. default=[384],
  239. help="Sequence lengths for onnx model inputs. It could have multiple values.",
  240. )
  241. parser.add_argument(
  242. "-b",
  243. "--batch_size",
  244. type=int,
  245. default=1,
  246. help="batch size for inference.",
  247. )
  248. parser.add_argument("-t", "--total", type=int, default=0, help="Total samples to test. 0 means all samples.")
  249. parser.add_argument(
  250. "--onnx",
  251. required=False,
  252. type=str,
  253. default=None,
  254. help="Optional onnx model path. If not specified, optimum will be used to export onnx model for testing.",
  255. )
  256. parser.add_argument(
  257. "--provider",
  258. required=False,
  259. default="CUDAExecutionProvider",
  260. help="Select which Execution Provider to use for runs. Default is CUDAExecutionProvider.",
  261. )
  262. parser.add_argument("--use_io_binding", required=False, action="store_true", help="Use IO Binding for GPU.")
  263. parser.set_defaults(use_io_binding=False)
  264. args = parser.parse_args(argv)
  265. return args
  266. if __name__ == "__main__":
  267. main()