program.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import os
  18. import gc
  19. import sys
  20. import platform
  21. import yaml
  22. import time
  23. import datetime
  24. import paddle
  25. import paddle.distributed as dist
  26. from tqdm import tqdm
  27. import cv2
  28. import numpy as np
  29. import copy
  30. from argparse import ArgumentParser, RawDescriptionHelpFormatter
  31. from ppocr.utils.stats import TrainingStats
  32. from ppocr.utils.save_load import save_model
  33. from ppocr.utils.utility import print_dict, AverageMeter
  34. from ppocr.utils.logging import get_logger
  35. from ppocr.utils.loggers import WandbLogger, Loggers
  36. from ppocr.utils import profiler
  37. from ppocr.data import build_dataloader
  38. from ppocr.utils.export_model import export
  39. class ArgsParser(ArgumentParser):
  40. def __init__(self):
  41. super(ArgsParser, self).__init__(formatter_class=RawDescriptionHelpFormatter)
  42. self.add_argument("-c", "--config", help="configuration file to use")
  43. self.add_argument("-o", "--opt", nargs="+", help="set configuration options")
  44. self.add_argument(
  45. "-p",
  46. "--profiler_options",
  47. type=str,
  48. default=None,
  49. help="The option of profiler, which should be in format "
  50. '"key1=value1;key2=value2;key3=value3".',
  51. )
  52. def parse_args(self, argv=None):
  53. args = super(ArgsParser, self).parse_args(argv)
  54. assert args.config is not None, "Please specify --config=configure_file_path."
  55. args.opt = self._parse_opt(args.opt)
  56. return args
  57. def _parse_opt(self, opts):
  58. config = {}
  59. if not opts:
  60. return config
  61. for s in opts:
  62. s = s.strip()
  63. k, v = s.split("=")
  64. config[k] = yaml.load(v, Loader=yaml.Loader)
  65. return config
  66. def load_config(file_path):
  67. """
  68. Load config from yml/yaml file.
  69. Args:
  70. file_path (str): Path of the config file to be loaded.
  71. Returns: global config
  72. """
  73. _, ext = os.path.splitext(file_path)
  74. assert ext in [".yml", ".yaml"], "only support yaml files for now"
  75. config = yaml.load(open(file_path, "rb"), Loader=yaml.Loader)
  76. return config
  77. def merge_config(config, opts):
  78. """
  79. Merge config into global config.
  80. Args:
  81. config (dict): Config to be merged.
  82. Returns: global config
  83. """
  84. for key, value in opts.items():
  85. if "." not in key:
  86. if isinstance(value, dict) and key in config:
  87. config[key].update(value)
  88. else:
  89. config[key] = value
  90. else:
  91. sub_keys = key.split(".")
  92. assert sub_keys[0] in config, (
  93. "the sub_keys can only be one of global_config: {}, but get: "
  94. "{}, please check your running command".format(
  95. config.keys(), sub_keys[0]
  96. )
  97. )
  98. cur = config[sub_keys[0]]
  99. for idx, sub_key in enumerate(sub_keys[1:]):
  100. if idx == len(sub_keys) - 2:
  101. cur[sub_key] = value
  102. else:
  103. cur = cur[sub_key]
  104. return config
  105. def check_device(
  106. use_gpu,
  107. use_xpu=False,
  108. use_npu=False,
  109. use_mlu=False,
  110. use_gcu=False,
  111. use_iluvatar_gpu=False,
  112. use_metax_gpu=False,
  113. ):
  114. """
  115. Log error and exit when set use_gpu=true in paddlepaddle
  116. cpu version.
  117. """
  118. err = (
  119. "Config {} cannot be set as true while your paddle "
  120. "is not compiled with {} ! \nPlease try: \n"
  121. "\t1. Install paddlepaddle to run model on {} \n"
  122. "\t2. Set {} as false in config file to run "
  123. "model on CPU"
  124. )
  125. try:
  126. if use_gpu and use_xpu:
  127. print("use_xpu and use_gpu can not both be true.")
  128. if use_gpu and not paddle.is_compiled_with_cuda():
  129. print(err.format("use_gpu", "cuda", "gpu", "use_gpu"))
  130. sys.exit(1)
  131. if use_xpu and not paddle.device.is_compiled_with_xpu():
  132. print(err.format("use_xpu", "xpu", "xpu", "use_xpu"))
  133. sys.exit(1)
  134. if use_npu:
  135. if (
  136. int(paddle.version.major) != 0
  137. and int(paddle.version.major) <= 2
  138. and int(paddle.version.minor) <= 4
  139. ):
  140. if not paddle.device.is_compiled_with_npu():
  141. print(err.format("use_npu", "npu", "npu", "use_npu"))
  142. sys.exit(1)
  143. # is_compiled_with_npu() has been updated after paddle-2.4
  144. else:
  145. if not paddle.device.is_compiled_with_custom_device("npu"):
  146. print(err.format("use_npu", "npu", "npu", "use_npu"))
  147. sys.exit(1)
  148. if use_mlu and not paddle.device.is_compiled_with_mlu():
  149. print(err.format("use_mlu", "mlu", "mlu", "use_mlu"))
  150. sys.exit(1)
  151. if use_gcu and not paddle.device.is_compiled_with_custom_device("gcu"):
  152. print(err.format("use_gcu", "gcu", "gcu", "use_gcu"))
  153. sys.exit(1)
  154. if use_metax_gpu and not paddle.device.is_compiled_with_custom_device(
  155. "metax_gpu"
  156. ):
  157. print(
  158. err.format("use_metax_gpu", "metax_gpu", "metax_gpu", "use_metax_gpu")
  159. )
  160. sys.exit(1)
  161. except Exception as e:
  162. pass
  163. def to_float32(preds):
  164. if isinstance(preds, dict):
  165. for k in preds:
  166. if isinstance(preds[k], dict) or isinstance(preds[k], list):
  167. preds[k] = to_float32(preds[k])
  168. elif isinstance(preds[k], paddle.Tensor):
  169. preds[k] = preds[k].astype(paddle.float32)
  170. elif isinstance(preds, list):
  171. for k in range(len(preds)):
  172. if isinstance(preds[k], dict):
  173. preds[k] = to_float32(preds[k])
  174. elif isinstance(preds[k], list):
  175. preds[k] = to_float32(preds[k])
  176. elif isinstance(preds[k], paddle.Tensor):
  177. preds[k] = preds[k].astype(paddle.float32)
  178. elif isinstance(preds, paddle.Tensor):
  179. preds = preds.astype(paddle.float32)
  180. return preds
  181. def train(
  182. config,
  183. train_dataloader,
  184. valid_dataloader,
  185. device,
  186. model,
  187. loss_class,
  188. optimizer,
  189. lr_scheduler,
  190. post_process_class,
  191. eval_class,
  192. pre_best_model_dict,
  193. logger,
  194. step_pre_epoch,
  195. log_writer=None,
  196. scaler=None,
  197. amp_level="O2",
  198. amp_custom_black_list=[],
  199. amp_custom_white_list=[],
  200. amp_dtype="float16",
  201. ):
  202. cal_metric_during_train = config["Global"].get("cal_metric_during_train", False)
  203. calc_epoch_interval = config["Global"].get("calc_epoch_interval", 1)
  204. log_smooth_window = config["Global"]["log_smooth_window"]
  205. epoch_num = config["Global"]["epoch_num"]
  206. print_batch_step = config["Global"]["print_batch_step"]
  207. eval_batch_step = config["Global"]["eval_batch_step"]
  208. eval_batch_epoch = config["Global"].get("eval_batch_epoch", None)
  209. profiler_options = config["profiler_options"]
  210. print_mem_info = config["Global"].get("print_mem_info", True)
  211. uniform_output_enabled = config["Global"].get("uniform_output_enabled", False)
  212. global_step = 0
  213. if "global_step" in pre_best_model_dict:
  214. global_step = pre_best_model_dict["global_step"]
  215. start_eval_step = 0
  216. if isinstance(eval_batch_step, list) and len(eval_batch_step) >= 2:
  217. start_eval_step = eval_batch_step[0] if not eval_batch_epoch else 0
  218. eval_batch_step = (
  219. eval_batch_step[1]
  220. if not eval_batch_epoch
  221. else step_pre_epoch * eval_batch_epoch
  222. )
  223. if len(valid_dataloader) == 0:
  224. logger.info(
  225. "No Images in eval dataset, evaluation during training "
  226. "will be disabled"
  227. )
  228. start_eval_step = 1e111
  229. logger.info(
  230. "During the training process, after the {}th iteration, "
  231. "an evaluation is run every {} iterations".format(
  232. start_eval_step, eval_batch_step
  233. )
  234. )
  235. save_epoch_step = config["Global"]["save_epoch_step"]
  236. save_model_dir = config["Global"]["save_model_dir"]
  237. if not os.path.exists(save_model_dir):
  238. os.makedirs(save_model_dir)
  239. main_indicator = eval_class.main_indicator
  240. best_model_dict = {main_indicator: 0}
  241. best_model_dict.update(pre_best_model_dict)
  242. train_stats = TrainingStats(log_smooth_window, ["lr"])
  243. model_average = False
  244. model.train()
  245. use_srn = config["Architecture"]["algorithm"] == "SRN"
  246. extra_input_models = [
  247. "SRN",
  248. "NRTR",
  249. "SAR",
  250. "SEED",
  251. "SVTR",
  252. "SVTR_LCNet",
  253. "SPIN",
  254. "VisionLAN",
  255. "RobustScanner",
  256. "RFL",
  257. "DRRG",
  258. "SATRN",
  259. "SVTR_HGNet",
  260. "ParseQ",
  261. "CPPD",
  262. ]
  263. extra_input = False
  264. if config["Architecture"]["algorithm"] == "Distillation":
  265. for key in config["Architecture"]["Models"]:
  266. extra_input = (
  267. extra_input
  268. or config["Architecture"]["Models"][key]["algorithm"]
  269. in extra_input_models
  270. )
  271. else:
  272. extra_input = config["Architecture"]["algorithm"] in extra_input_models
  273. try:
  274. model_type = config["Architecture"]["model_type"]
  275. except:
  276. model_type = None
  277. algorithm = config["Architecture"]["algorithm"]
  278. start_epoch = (
  279. best_model_dict["start_epoch"] if "start_epoch" in best_model_dict else 1
  280. )
  281. total_samples = 0
  282. train_reader_cost = 0.0
  283. train_batch_cost = 0.0
  284. reader_start = time.time()
  285. eta_meter = AverageMeter()
  286. max_iter = (
  287. len(train_dataloader) - 1
  288. if platform.system() == "Windows"
  289. else len(train_dataloader)
  290. )
  291. for epoch in range(start_epoch, epoch_num + 1):
  292. if train_dataloader.dataset.need_reset:
  293. train_dataloader = build_dataloader(
  294. config, "Train", device, logger, seed=epoch
  295. )
  296. max_iter = (
  297. len(train_dataloader) - 1
  298. if platform.system() == "Windows"
  299. else len(train_dataloader)
  300. )
  301. for idx, batch in enumerate(train_dataloader):
  302. model.train()
  303. profiler.add_profiler_step(profiler_options)
  304. train_reader_cost += time.time() - reader_start
  305. if idx >= max_iter:
  306. break
  307. lr = optimizer.get_lr()
  308. images = batch[0]
  309. if use_srn:
  310. model_average = True
  311. # use amp
  312. if scaler:
  313. with paddle.amp.auto_cast(
  314. level=amp_level,
  315. custom_black_list=amp_custom_black_list,
  316. custom_white_list=amp_custom_white_list,
  317. dtype=amp_dtype,
  318. ):
  319. if model_type == "table" or extra_input:
  320. preds = model(images, data=batch[1:])
  321. elif model_type in ["kie"]:
  322. preds = model(batch)
  323. elif algorithm in ["CAN"]:
  324. preds = model(batch[:3])
  325. elif algorithm in [
  326. "LaTeXOCR",
  327. "UniMERNet",
  328. "PP-FormulaNet-S",
  329. "PP-FormulaNet-L",
  330. "PP-FormulaNet_plus-S",
  331. "PP-FormulaNet_plus-M",
  332. "PP-FormulaNet_plus-L",
  333. ]:
  334. preds = model(batch)
  335. else:
  336. preds = model(images)
  337. preds = to_float32(preds)
  338. loss = loss_class(preds, batch)
  339. avg_loss = loss["loss"]
  340. scaled_avg_loss = scaler.scale(avg_loss)
  341. scaled_avg_loss.backward()
  342. scaler.minimize(optimizer, scaled_avg_loss)
  343. else:
  344. if model_type == "table" or extra_input:
  345. preds = model(images, data=batch[1:])
  346. elif model_type in ["kie", "sr"]:
  347. preds = model(batch)
  348. elif algorithm in ["CAN"]:
  349. preds = model(batch[:3])
  350. elif algorithm in [
  351. "LaTeXOCR",
  352. "UniMERNet",
  353. "PP-FormulaNet-S",
  354. "PP-FormulaNet-L",
  355. "PP-FormulaNet_plus-S",
  356. "PP-FormulaNet_plus-M",
  357. "PP-FormulaNet_plus-L",
  358. ]:
  359. preds = model(batch)
  360. else:
  361. preds = model(images)
  362. loss = loss_class(preds, batch)
  363. avg_loss = loss["loss"]
  364. avg_loss.backward()
  365. optimizer.step()
  366. optimizer.clear_grad()
  367. if (
  368. cal_metric_during_train and epoch % calc_epoch_interval == 0
  369. ): # only rec and cls need
  370. batch = [item.numpy() for item in batch]
  371. if model_type in ["kie", "sr"]:
  372. eval_class(preds, batch)
  373. elif model_type in ["table"]:
  374. post_result = post_process_class(preds, batch)
  375. eval_class(post_result, batch)
  376. elif algorithm in ["CAN"]:
  377. model_type = "can"
  378. eval_class(preds[0], batch[2:], epoch_reset=(idx == 0))
  379. elif algorithm in ["LaTeXOCR"]:
  380. model_type = "latexocr"
  381. post_result = post_process_class(preds, batch[1], mode="train")
  382. eval_class(post_result[0], post_result[1], epoch_reset=(idx == 0))
  383. elif algorithm in ["UniMERNet"]:
  384. model_type = "unimernet"
  385. post_result = post_process_class(preds[0], batch[1], mode="train")
  386. eval_class(post_result[0], post_result[1], epoch_reset=(idx == 0))
  387. elif algorithm in [
  388. "PP-FormulaNet-S",
  389. "PP-FormulaNet-L",
  390. "PP-FormulaNet_plus-S",
  391. "PP-FormulaNet_plus-M",
  392. "PP-FormulaNet_plus-L",
  393. ]:
  394. model_type = "pp_formulanet"
  395. post_result = post_process_class(preds[0], batch[1], mode="train")
  396. eval_class(post_result[0], post_result[1], epoch_reset=(idx == 0))
  397. else:
  398. if config["Loss"]["name"] in [
  399. "MultiLoss",
  400. "MultiLoss_v2",
  401. ]: # for multi head loss
  402. post_result = post_process_class(
  403. preds["ctc"], batch[1]
  404. ) # for CTC head out
  405. elif config["Loss"]["name"] in ["VLLoss"]:
  406. post_result = post_process_class(preds, batch[1], batch[-1])
  407. else:
  408. post_result = post_process_class(preds, batch[1])
  409. eval_class(post_result, batch)
  410. metric = eval_class.get_metric()
  411. train_stats.update(metric)
  412. train_batch_time = time.time() - reader_start
  413. train_batch_cost += train_batch_time
  414. eta_meter.update(train_batch_time)
  415. global_step += 1
  416. total_samples += len(images)
  417. if not isinstance(lr_scheduler, float):
  418. lr_scheduler.step()
  419. # logger and visualdl
  420. stats = {
  421. k: float(v) if v.shape == [] else v.numpy().mean()
  422. for k, v in loss.items()
  423. }
  424. stats["lr"] = lr
  425. train_stats.update(stats)
  426. if log_writer is not None and dist.get_rank() == 0:
  427. log_writer.log_metrics(
  428. metrics=train_stats.get(), prefix="TRAIN", step=global_step
  429. )
  430. if (global_step > 0 and global_step % print_batch_step == 0) or (
  431. idx >= len(train_dataloader) - 1
  432. ):
  433. logs = train_stats.log()
  434. eta_sec = (
  435. (epoch_num + 1 - epoch) * len(train_dataloader) - idx - 1
  436. ) * eta_meter.avg
  437. eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec)))
  438. max_mem_reserved_str = ""
  439. max_mem_allocated_str = ""
  440. if paddle.device.is_compiled_with_cuda() and print_mem_info:
  441. max_mem_reserved_str = f", max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB,"
  442. max_mem_allocated_str = f" max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"
  443. strs = (
  444. "epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: "
  445. "{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, "
  446. "ips: {:.5f} samples/s, eta: {}{}{}".format(
  447. epoch,
  448. epoch_num,
  449. global_step,
  450. logs,
  451. train_reader_cost / print_batch_step,
  452. train_batch_cost / print_batch_step,
  453. total_samples / print_batch_step,
  454. total_samples / train_batch_cost,
  455. eta_sec_format,
  456. max_mem_reserved_str,
  457. max_mem_allocated_str,
  458. )
  459. )
  460. logger.info(strs)
  461. total_samples = 0
  462. train_reader_cost = 0.0
  463. train_batch_cost = 0.0
  464. # eval
  465. if (
  466. global_step > start_eval_step
  467. and (global_step - start_eval_step) % eval_batch_step == 0
  468. and dist.get_rank() == 0
  469. ):
  470. if model_average:
  471. Model_Average = paddle.incubate.ModelAverage(
  472. 0.15,
  473. parameters=model.parameters(),
  474. min_average_window=10000,
  475. max_average_window=15625,
  476. )
  477. Model_Average.apply()
  478. cur_metric = eval(
  479. model,
  480. valid_dataloader,
  481. post_process_class,
  482. eval_class,
  483. model_type,
  484. extra_input=extra_input,
  485. scaler=scaler,
  486. amp_level=amp_level,
  487. amp_custom_black_list=amp_custom_black_list,
  488. amp_custom_white_list=amp_custom_white_list,
  489. amp_dtype=amp_dtype,
  490. )
  491. cur_metric_str = "cur metric, {}".format(
  492. ", ".join(["{}: {}".format(k, v) for k, v in cur_metric.items()])
  493. )
  494. logger.info(cur_metric_str)
  495. # logger metric
  496. if log_writer is not None:
  497. log_writer.log_metrics(
  498. metrics=cur_metric, prefix="EVAL", step=global_step
  499. )
  500. if cur_metric[main_indicator] >= best_model_dict[main_indicator]:
  501. best_model_dict.update(cur_metric)
  502. best_model_dict["best_epoch"] = epoch
  503. prefix = "best_accuracy"
  504. if uniform_output_enabled:
  505. export(
  506. config,
  507. model,
  508. os.path.join(save_model_dir, prefix, "inference"),
  509. )
  510. gc.collect()
  511. model_info = {"epoch": epoch, "metric": best_model_dict}
  512. else:
  513. model_info = None
  514. save_model(
  515. model,
  516. optimizer,
  517. (
  518. os.path.join(save_model_dir, prefix)
  519. if uniform_output_enabled
  520. else save_model_dir
  521. ),
  522. logger,
  523. config,
  524. is_best=True,
  525. prefix=prefix,
  526. save_model_info=model_info,
  527. best_model_dict=best_model_dict,
  528. epoch=epoch,
  529. global_step=global_step,
  530. )
  531. best_str = "best metric, {}".format(
  532. ", ".join(
  533. ["{}: {}".format(k, v) for k, v in best_model_dict.items()]
  534. )
  535. )
  536. logger.info(best_str)
  537. # logger best metric
  538. if log_writer is not None:
  539. log_writer.log_metrics(
  540. metrics={
  541. "best_{}".format(main_indicator): best_model_dict[
  542. main_indicator
  543. ]
  544. },
  545. prefix="EVAL",
  546. step=global_step,
  547. )
  548. log_writer.log_model(
  549. is_best=True, prefix="best_accuracy", metadata=best_model_dict
  550. )
  551. reader_start = time.time()
  552. if dist.get_rank() == 0:
  553. prefix = "latest"
  554. if uniform_output_enabled:
  555. export(config, model, os.path.join(save_model_dir, prefix, "inference"))
  556. gc.collect()
  557. model_info = {"epoch": epoch, "metric": best_model_dict}
  558. else:
  559. model_info = None
  560. save_model(
  561. model,
  562. optimizer,
  563. (
  564. os.path.join(save_model_dir, prefix)
  565. if uniform_output_enabled
  566. else save_model_dir
  567. ),
  568. logger,
  569. config,
  570. is_best=False,
  571. prefix=prefix,
  572. save_model_info=model_info,
  573. best_model_dict=best_model_dict,
  574. epoch=epoch,
  575. global_step=global_step,
  576. )
  577. if log_writer is not None:
  578. log_writer.log_model(is_best=False, prefix="latest")
  579. if dist.get_rank() == 0 and epoch > 0 and epoch % save_epoch_step == 0:
  580. prefix = "iter_epoch_{}".format(epoch)
  581. if uniform_output_enabled:
  582. export(config, model, os.path.join(save_model_dir, prefix, "inference"))
  583. gc.collect()
  584. model_info = {"epoch": epoch, "metric": best_model_dict}
  585. else:
  586. model_info = None
  587. save_model(
  588. model,
  589. optimizer,
  590. (
  591. os.path.join(save_model_dir, prefix)
  592. if uniform_output_enabled
  593. else save_model_dir
  594. ),
  595. logger,
  596. config,
  597. is_best=False,
  598. prefix=prefix,
  599. save_model_info=model_info,
  600. best_model_dict=best_model_dict,
  601. epoch=epoch,
  602. global_step=global_step,
  603. done_flag=epoch == config["Global"]["epoch_num"],
  604. )
  605. if log_writer is not None:
  606. log_writer.log_model(
  607. is_best=False, prefix="iter_epoch_{}".format(epoch)
  608. )
  609. best_str = "best metric, {}".format(
  610. ", ".join(["{}: {}".format(k, v) for k, v in best_model_dict.items()])
  611. )
  612. logger.info(best_str)
  613. if dist.get_rank() == 0 and log_writer is not None:
  614. log_writer.close()
  615. return
  616. def eval(
  617. model,
  618. valid_dataloader,
  619. post_process_class,
  620. eval_class,
  621. model_type=None,
  622. extra_input=False,
  623. scaler=None,
  624. amp_level="O2",
  625. amp_custom_black_list=[],
  626. amp_custom_white_list=[],
  627. amp_dtype="float16",
  628. ):
  629. model.eval()
  630. with paddle.no_grad():
  631. total_frame = 0.0
  632. total_time = 0.0
  633. pbar = tqdm(
  634. total=len(valid_dataloader), desc="eval model:", position=0, leave=True
  635. )
  636. max_iter = (
  637. len(valid_dataloader) - 1
  638. if platform.system() == "Windows"
  639. else len(valid_dataloader)
  640. )
  641. sum_images = 0
  642. for idx, batch in enumerate(valid_dataloader):
  643. if idx >= max_iter:
  644. break
  645. images = batch[0]
  646. start = time.time()
  647. # use amp
  648. if scaler:
  649. with paddle.amp.auto_cast(
  650. level=amp_level,
  651. custom_black_list=amp_custom_black_list,
  652. dtype=amp_dtype,
  653. ):
  654. if model_type == "table" or extra_input:
  655. preds = model(images, data=batch[1:])
  656. elif model_type in ["kie"]:
  657. preds = model(batch)
  658. elif model_type in ["can"]:
  659. preds = model(batch[:3])
  660. elif model_type in ["latexocr"]:
  661. preds = model(batch)
  662. elif model_type in ["sr"]:
  663. preds = model(batch)
  664. sr_img = preds["sr_img"]
  665. lr_img = preds["lr_img"]
  666. else:
  667. preds = model(images)
  668. preds = to_float32(preds)
  669. else:
  670. if model_type == "table" or extra_input:
  671. preds = model(images, data=batch[1:])
  672. elif model_type in ["kie"]:
  673. preds = model(batch)
  674. elif model_type in ["can"]:
  675. preds = model(batch[:3])
  676. elif model_type in ["latexocr", "unimernet", "pp_formulanet"]:
  677. preds = model(batch)
  678. elif model_type in ["sr"]:
  679. preds = model(batch)
  680. sr_img = preds["sr_img"]
  681. lr_img = preds["lr_img"]
  682. else:
  683. preds = model(images)
  684. batch_numpy = []
  685. for item in batch:
  686. if isinstance(item, paddle.Tensor):
  687. batch_numpy.append(item.numpy())
  688. else:
  689. batch_numpy.append(item)
  690. # Obtain usable results from post-processing methods
  691. total_time += time.time() - start
  692. # Evaluate the results of the current batch
  693. if model_type in ["table", "kie"]:
  694. if post_process_class is None:
  695. eval_class(preds, batch_numpy)
  696. else:
  697. post_result = post_process_class(preds, batch_numpy)
  698. eval_class(post_result, batch_numpy)
  699. elif model_type in ["sr"]:
  700. eval_class(preds, batch_numpy)
  701. elif model_type in ["can"]:
  702. eval_class(preds[0], batch_numpy[2:], epoch_reset=(idx == 0))
  703. elif model_type in ["latexocr", "unimernet", "pp_formulanet"]:
  704. post_result = post_process_class(preds, batch[1], "eval")
  705. eval_class(post_result[0], post_result[1], epoch_reset=(idx == 0))
  706. else:
  707. post_result = post_process_class(preds, batch_numpy[1])
  708. eval_class(post_result, batch_numpy)
  709. pbar.update(1)
  710. total_frame += len(images)
  711. sum_images += 1
  712. # Get final metric,eg. acc or hmean
  713. metric = eval_class.get_metric()
  714. pbar.close()
  715. model.train()
  716. # Avoid ZeroDivisionError
  717. if total_time > 0:
  718. metric["fps"] = total_frame / total_time
  719. else:
  720. metric["fps"] = 0 # or set to a fallback value
  721. return metric
  722. def update_center(char_center, post_result, preds):
  723. result, label = post_result
  724. feats, logits = preds
  725. logits = paddle.argmax(logits, axis=-1)
  726. feats = feats.numpy()
  727. logits = logits.numpy()
  728. for idx_sample in range(len(label)):
  729. if result[idx_sample][0] == label[idx_sample][0]:
  730. feat = feats[idx_sample]
  731. logit = logits[idx_sample]
  732. for idx_time in range(len(logit)):
  733. index = logit[idx_time]
  734. if index in char_center.keys():
  735. char_center[index][0] = (
  736. char_center[index][0] * char_center[index][1] + feat[idx_time]
  737. ) / (char_center[index][1] + 1)
  738. char_center[index][1] += 1
  739. else:
  740. char_center[index] = [feat[idx_time], 1]
  741. return char_center
  742. def get_center(model, eval_dataloader, post_process_class):
  743. pbar = tqdm(total=len(eval_dataloader), desc="get center:")
  744. max_iter = (
  745. len(eval_dataloader) - 1
  746. if platform.system() == "Windows"
  747. else len(eval_dataloader)
  748. )
  749. char_center = dict()
  750. for idx, batch in enumerate(eval_dataloader):
  751. if idx >= max_iter:
  752. break
  753. images = batch[0]
  754. start = time.time()
  755. preds = model(images)
  756. batch = [item.numpy() for item in batch]
  757. # Obtain usable results from post-processing methods
  758. post_result = post_process_class(preds, batch[1])
  759. # update char_center
  760. char_center = update_center(char_center, post_result, preds)
  761. pbar.update(1)
  762. pbar.close()
  763. for key in char_center.keys():
  764. char_center[key] = char_center[key][0]
  765. return char_center
  766. def preprocess(is_train=False):
  767. FLAGS = ArgsParser().parse_args()
  768. profiler_options = FLAGS.profiler_options
  769. config = load_config(FLAGS.config)
  770. config = merge_config(config, FLAGS.opt)
  771. profile_dic = {"profiler_options": FLAGS.profiler_options}
  772. config = merge_config(config, profile_dic)
  773. if is_train:
  774. # save_config
  775. save_model_dir = config["Global"]["save_model_dir"]
  776. os.makedirs(save_model_dir, exist_ok=True)
  777. with open(os.path.join(save_model_dir, "config.yml"), "w") as f:
  778. yaml.dump(dict(config), f, default_flow_style=False, sort_keys=False)
  779. log_file = "{}/train.log".format(save_model_dir)
  780. else:
  781. log_file = None
  782. log_ranks = config["Global"].get("log_ranks", "0")
  783. logger = get_logger(log_file=log_file, log_ranks=log_ranks)
  784. # check if set use_gpu=True in paddlepaddle cpu version
  785. use_gpu = config["Global"].get("use_gpu", False)
  786. use_xpu = config["Global"].get("use_xpu", False)
  787. use_npu = config["Global"].get("use_npu", False)
  788. use_mlu = config["Global"].get("use_mlu", False)
  789. use_gcu = config["Global"].get("use_gcu", False)
  790. use_metax_gpu = config["Global"].get("use_metax_gpu", False)
  791. use_iluvatar_gpu = config["Global"].get("use_iluvatar_gpu", False)
  792. alg = config["Architecture"]["algorithm"]
  793. assert alg in [
  794. "EAST",
  795. "DB",
  796. "SAST",
  797. "Rosetta",
  798. "CRNN",
  799. "STARNet",
  800. "RARE",
  801. "SRN",
  802. "CLS",
  803. "PGNet",
  804. "Distillation",
  805. "NRTR",
  806. "TableAttn",
  807. "SAR",
  808. "PSE",
  809. "SEED",
  810. "SDMGR",
  811. "LayoutXLM",
  812. "LayoutLM",
  813. "LayoutLMv2",
  814. "PREN",
  815. "FCE",
  816. "SVTR",
  817. "SVTR_LCNet",
  818. "ViTSTR",
  819. "ABINet",
  820. "DB++",
  821. "TableMaster",
  822. "SPIN",
  823. "VisionLAN",
  824. "Gestalt",
  825. "SLANet",
  826. "RobustScanner",
  827. "CT",
  828. "RFL",
  829. "DRRG",
  830. "CAN",
  831. "Telescope",
  832. "SATRN",
  833. "SVTR_HGNet",
  834. "ParseQ",
  835. "CPPD",
  836. "LaTeXOCR",
  837. "UniMERNet",
  838. "SLANeXt",
  839. "PP-FormulaNet-S",
  840. "PP-FormulaNet-L",
  841. "PP-FormulaNet_plus-S",
  842. "PP-FormulaNet_plus-M",
  843. "PP-FormulaNet_plus-L",
  844. ]
  845. if use_xpu:
  846. device = "xpu:{0}".format(os.getenv("FLAGS_selected_xpus", 0))
  847. elif use_npu:
  848. device = "npu:{0}".format(os.getenv("FLAGS_selected_npus", 0))
  849. elif use_mlu:
  850. device = "mlu:{0}".format(os.getenv("FLAGS_selected_mlus", 0))
  851. elif use_gcu: # Use Enflame GCU(General Compute Unit)
  852. device = "gcu:{0}".format(os.getenv("FLAGS_selected_gcus", 0))
  853. elif use_metax_gpu: # Use Enflame GCU(General Compute Unit)
  854. device = "metax:{0}".format(os.getenv("FLAGS_selected_metaxs", 0))
  855. elif use_iluvatar_gpu:
  856. device = "iluvatar_gpu:{0}".format(dist.ParallelEnv().dev_id)
  857. else:
  858. device = "gpu:{}".format(dist.ParallelEnv().dev_id) if use_gpu else "cpu"
  859. check_device(
  860. use_gpu, use_xpu, use_npu, use_mlu, use_gcu, use_iluvatar_gpu, use_metax_gpu
  861. )
  862. device = paddle.set_device(device)
  863. config["Global"]["distributed"] = dist.get_world_size() != 1
  864. loggers = []
  865. if "use_visualdl" in config["Global"] and config["Global"]["use_visualdl"]:
  866. logger.warning(
  867. "You are using VisualDL, the VisualDL is deprecated and "
  868. "removed in ppocr!"
  869. )
  870. log_writer = None
  871. if (
  872. "use_wandb" in config["Global"] and config["Global"]["use_wandb"]
  873. ) or "wandb" in config:
  874. save_dir = config["Global"]["save_model_dir"]
  875. wandb_writer_path = "{}/wandb".format(save_dir)
  876. if "wandb" in config:
  877. wandb_params = config["wandb"]
  878. else:
  879. wandb_params = dict()
  880. wandb_params.update({"save_dir": save_dir})
  881. log_writer = WandbLogger(**wandb_params, config=config)
  882. loggers.append(log_writer)
  883. else:
  884. log_writer = None
  885. print_dict(config, logger)
  886. if loggers:
  887. log_writer = Loggers(loggers)
  888. else:
  889. log_writer = None
  890. logger.info("train with paddle {} and device {}".format(paddle.__version__, device))
  891. return config, device, logger, log_writer