SLANet_lcnetv2.yml 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. Global:
  2. use_gpu: true
  3. epoch_num: 50
  4. log_smooth_window: 20
  5. print_batch_step: 10
  6. save_model_dir: ./output/SLANet_lcnetv2
  7. save_epoch_step: 20
  8. # evaluation is run every 1000 iterations after the 0th iteration
  9. eval_batch_step: [0, 1000]
  10. cal_metric_during_train: True
  11. pretrained_model:
  12. checkpoints:
  13. save_inference_dir: ./SLANet_lcnetv2_infer
  14. use_visualdl: False
  15. infer_img: ppstructure/docs/table/table.jpg
  16. # for data or label process
  17. character_dict_path: ppocr/utils/dict/table_structure_dict.txt
  18. character_type: en
  19. max_text_length: &max_text_length 500
  20. box_format: &box_format 'xyxy' # 'xywh', 'xyxy', 'xyxyxyxy'
  21. infer_mode: False
  22. use_sync_bn: True
  23. save_res_path: 'output/infer'
  24. d2s_train_image_shape: [3, -1, -1]
  25. amp_custom_white_list: ['concat', 'elementwise_sub', 'set_value']
  26. Optimizer:
  27. name: Adam
  28. beta1: 0.9
  29. beta2: 0.999
  30. clip_norm: 5.0
  31. lr:
  32. learning_rate: 0.001
  33. regularizer:
  34. name: 'L2'
  35. factor: 0.00000
  36. Architecture:
  37. model_type: table
  38. algorithm: SLANet
  39. Backbone:
  40. name: PPLCNetV2_base
  41. Neck:
  42. name: CSPPAN
  43. out_channels: 96
  44. Head:
  45. name: SLAHead
  46. hidden_size: 256
  47. max_text_length: *max_text_length
  48. loc_reg_num: &loc_reg_num 4
  49. Loss:
  50. name: SLALoss
  51. structure_weight: 1.0
  52. loc_weight: 2.0
  53. loc_loss: smooth_l1
  54. PostProcess:
  55. name: TableLabelDecode
  56. merge_no_span_structure: &merge_no_span_structure True
  57. Metric:
  58. name: TableMetric
  59. main_indicator: acc
  60. compute_bbox_metric: False
  61. loc_reg_num: *loc_reg_num
  62. box_format: *box_format
  63. Train:
  64. dataset:
  65. name: PubTabDataSet
  66. data_dir: ../table_data/pubtabnet/train/
  67. label_file_list: [../table_data/pubtabnet/PubTabNet_2.0.0_train.jsonl]
  68. transforms:
  69. - DecodeImage: # load image
  70. img_mode: BGR
  71. channel_first: False
  72. - TableLabelEncode:
  73. learn_empty_box: False
  74. merge_no_span_structure: *merge_no_span_structure
  75. replace_empty_cell_token: False
  76. loc_reg_num: *loc_reg_num
  77. max_text_length: *max_text_length
  78. - TableBoxEncode:
  79. in_box_format: *box_format
  80. out_box_format: *box_format
  81. - ResizeTableImage:
  82. max_len: 488
  83. - NormalizeImage:
  84. scale: 1./255.
  85. mean: [0.485, 0.456, 0.406]
  86. std: [0.229, 0.224, 0.225]
  87. order: 'hwc'
  88. - PaddingTableImage:
  89. size: [488, 488]
  90. - ToCHWImage:
  91. - KeepKeys:
  92. keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'length', 'shape' ]
  93. loader:
  94. shuffle: True
  95. batch_size_per_card: 24
  96. drop_last: True
  97. num_workers: 8
  98. Eval:
  99. dataset:
  100. name: PubTabDataSet
  101. data_dir: ../table_data/pubtabnet/val/
  102. label_file_list: [../table_data/pubtabnet/PubTabNet_2.0.0_val.jsonl]
  103. transforms:
  104. - DecodeImage: # load image
  105. img_mode: BGR
  106. channel_first: False
  107. - TableLabelEncode:
  108. learn_empty_box: False
  109. merge_no_span_structure: *merge_no_span_structure
  110. replace_empty_cell_token: False
  111. loc_reg_num: *loc_reg_num
  112. max_text_length: *max_text_length
  113. - TableBoxEncode:
  114. in_box_format: *box_format
  115. out_box_format: *box_format
  116. - ResizeTableImage:
  117. max_len: 488
  118. - NormalizeImage:
  119. scale: 1./255.
  120. mean: [0.485, 0.456, 0.406]
  121. std: [0.229, 0.224, 0.225]
  122. order: 'hwc'
  123. - PaddingTableImage:
  124. size: [488, 488]
  125. - ToCHWImage:
  126. - KeepKeys:
  127. keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  128. loader:
  129. shuffle: False
  130. drop_last: False
  131. batch_size_per_card: 48
  132. num_workers: 4