rec_svtrnet_cppd_base_en.yml 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. Global:
  2. use_gpu: True
  3. epoch_num: 20
  4. log_smooth_window: 20
  5. print_batch_step: 10
  6. save_model_dir: ./output/rec/svtr_cppd_base/
  7. save_epoch_step: 1
  8. # evaluation is run every 2000 iterations after the 0th iteration
  9. eval_batch_step: [0, 2000]
  10. cal_metric_during_train: True
  11. pretrained_model:
  12. checkpoints:
  13. save_inference_dir:
  14. use_visualdl: False
  15. infer_img: doc/imgs_words_en/word_10.png
  16. # for data or label process
  17. character_dict_path:
  18. character_type: en
  19. max_text_length: 25
  20. infer_mode: False
  21. use_space_char: False
  22. save_res_path: ./output/rec/predicts_svtr_cppd_base.txt
  23. Optimizer:
  24. name: AdamW
  25. beta1: 0.9
  26. beta2: 0.99
  27. epsilon: 1.e-8
  28. weight_decay: 0.05
  29. no_weight_decay_name: norm pos_embed char_node_embed pos_node_embed char_pos_embed vis_pos_embed
  30. one_dim_param_no_weight_decay: True
  31. lr:
  32. name: Cosine
  33. learning_rate: 0.0005 # 4gpus 256bs
  34. warmup_epoch: 2
  35. Architecture:
  36. model_type: rec
  37. algorithm: CPPD
  38. Transform:
  39. Backbone:
  40. name: SVTRNet
  41. img_size: [32, 100]
  42. patch_merging: 'Conv'
  43. embed_dim: [128, 256, 384]
  44. depth: [6, 6, 4]
  45. num_heads: [4, 8, 12]
  46. mixer: ['Conv','Conv','Conv','Conv','Conv','Conv', 'Conv','Conv', 'Global','Global','Global','Global','Global','Global','Global','Global','Global','Global']
  47. local_mixer: [[5, 5], [5, 5], [5, 5]]
  48. last_stage: False
  49. prenorm: True
  50. Head:
  51. name: CPPDHead
  52. dim: 384
  53. vis_seq: 50
  54. num_layer: 3
  55. Loss:
  56. name: CPPDLoss
  57. ignore_index: &ignore_index 100 # must be greater than the number of character classes
  58. smoothing: True
  59. sideloss_weight: 1.0
  60. PostProcess:
  61. name: CPPDLabelDecode
  62. Metric:
  63. name: RecMetric
  64. main_indicator: acc
  65. Train:
  66. dataset:
  67. name: LMDBDataSet
  68. data_dir: ./train_data/data_lmdb_release/training/
  69. transforms:
  70. - DecodeImage: # load image
  71. img_mode: BGR
  72. channel_first: False
  73. - CPPDLabelEncode: # Class handling label
  74. ignore_index: *ignore_index
  75. - SVTRRecResizeImg:
  76. image_shape: [3, 32, 100]
  77. padding: False
  78. - KeepKeys:
  79. keep_keys: ['image', 'label', 'label_node', 'length'] # dataloader will return list in this order
  80. loader:
  81. shuffle: True
  82. batch_size_per_card: 256
  83. drop_last: True
  84. num_workers: 8
  85. Eval:
  86. dataset:
  87. name: LMDBDataSet
  88. data_dir: ./train_data/data_lmdb_release/evaluation/
  89. transforms:
  90. - DecodeImage: # load image
  91. img_mode: BGR
  92. channel_first: False
  93. - CPPDLabelEncode: # Class handling label
  94. ignore_index: *ignore_index
  95. - SVTRRecResizeImg:
  96. image_shape: [3, 32, 100]
  97. padding: False
  98. - KeepKeys:
  99. keep_keys: ['image', 'label', 'label_node','length'] # dataloader will return list in this order
  100. loader:
  101. shuffle: False
  102. drop_last: False
  103. batch_size_per_card: 256
  104. num_workers: 2