SLANet.yml 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. Global:
  2. model_name: SLANet # To use static model for inference.
  3. use_gpu: true
  4. epoch_num: 100
  5. log_smooth_window: 20
  6. print_batch_step: 20
  7. save_model_dir: ./output/SLANet
  8. save_epoch_step: 400
  9. # evaluation is run every 1000 iterations after the 0th iteration
  10. eval_batch_step: [0, 1000]
  11. cal_metric_during_train: True
  12. pretrained_model: "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/SLANet_pretrained.pdparams"
  13. checkpoints:
  14. save_inference_dir: ./output/SLANet/infer
  15. use_visualdl: False
  16. infer_img: ppstructure/docs/table/table.jpg
  17. # for data or label process
  18. character_dict_path: ppocr/utils/dict/table_structure_dict.txt
  19. character_type: en
  20. max_text_length: &max_text_length 500
  21. box_format: &box_format 'xyxy' # 'xywh', 'xyxy', 'xyxyxyxy'
  22. infer_mode: False
  23. use_sync_bn: True
  24. save_res_path: 'output/infer'
  25. d2s_train_image_shape: [3, -1, -1]
  26. amp_custom_white_list: ['concat', 'elementwise_sub', 'set_value']
  27. Optimizer:
  28. name: Adam
  29. beta1: 0.9
  30. beta2: 0.999
  31. clip_norm: 5.0
  32. lr:
  33. name: Piecewise
  34. learning_rate: 0.001
  35. decay_epochs : [40, 50]
  36. values : [0.001, 0.0001, 0.00005]
  37. regularizer:
  38. name: 'L2'
  39. factor: 0.00000
  40. Architecture:
  41. model_type: table
  42. algorithm: SLANet
  43. Backbone:
  44. name: PPLCNet
  45. scale: 1.0
  46. pretrained: true
  47. use_ssld: true
  48. Neck:
  49. name: CSPPAN
  50. out_channels: 96
  51. Head:
  52. name: SLAHead
  53. hidden_size: 256
  54. max_text_length: *max_text_length
  55. loc_reg_num: &loc_reg_num 4
  56. Loss:
  57. name: SLALoss
  58. structure_weight: 1.0
  59. loc_weight: 2.0
  60. loc_loss: smooth_l1
  61. PostProcess:
  62. name: TableLabelDecode
  63. merge_no_span_structure: &merge_no_span_structure True
  64. Metric:
  65. name: TableMetric
  66. main_indicator: acc
  67. compute_bbox_metric: False
  68. loc_reg_num: *loc_reg_num
  69. box_format: *box_format
  70. Train:
  71. dataset:
  72. name: PubTabDataSet
  73. data_dir: train_data/table/pubtabnet/train/
  74. label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_train.jsonl]
  75. transforms:
  76. - DecodeImage: # load image
  77. img_mode: BGR
  78. channel_first: False
  79. - TableLabelEncode:
  80. learn_empty_box: False
  81. merge_no_span_structure: *merge_no_span_structure
  82. replace_empty_cell_token: False
  83. loc_reg_num: *loc_reg_num
  84. max_text_length: *max_text_length
  85. - TableBoxEncode:
  86. in_box_format: *box_format
  87. out_box_format: *box_format
  88. - ResizeTableImage:
  89. max_len: 488
  90. - NormalizeImage:
  91. scale: 1./255.
  92. mean: [0.485, 0.456, 0.406]
  93. std: [0.229, 0.224, 0.225]
  94. order: 'hwc'
  95. - PaddingTableImage:
  96. size: [488, 488]
  97. - ToCHWImage:
  98. - KeepKeys:
  99. keep_keys: ['image', 'structure', 'bboxes', 'bbox_masks', 'length', 'shape']
  100. loader:
  101. shuffle: True
  102. batch_size_per_card: 48
  103. drop_last: True
  104. num_workers: 1
  105. Eval:
  106. dataset:
  107. name: PubTabDataSet
  108. data_dir: train_data/table/pubtabnet/val/
  109. label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_val.jsonl]
  110. transforms:
  111. - DecodeImage: # load image
  112. img_mode: BGR
  113. channel_first: False
  114. - TableLabelEncode:
  115. learn_empty_box: False
  116. merge_no_span_structure: *merge_no_span_structure
  117. replace_empty_cell_token: False
  118. loc_reg_num: *loc_reg_num
  119. max_text_length: *max_text_length
  120. - TableBoxEncode:
  121. in_box_format: *box_format
  122. out_box_format: *box_format
  123. - ResizeTableImage:
  124. max_len: 488
  125. - NormalizeImage:
  126. scale: 1./255.
  127. mean: [0.485, 0.456, 0.406]
  128. std: [0.229, 0.224, 0.225]
  129. order: 'hwc'
  130. - PaddingTableImage:
  131. size: [488, 488]
  132. - ToCHWImage:
  133. - KeepKeys:
  134. keep_keys: ['image', 'structure', 'bboxes', 'bbox_masks', 'length', 'shape']
  135. loader:
  136. shuffle: False
  137. drop_last: False
  138. batch_size_per_card: 48
  139. num_workers: 1