table_master.yml 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. Global:
  2. use_gpu: true
  3. epoch_num: 17
  4. log_smooth_window: 20
  5. print_batch_step: 100
  6. save_model_dir: ./output/table_master/
  7. save_epoch_step: 17
  8. eval_batch_step: [0, 6259]
  9. cal_metric_during_train: true
  10. pretrained_model: null
  11. checkpoints:
  12. save_inference_dir: output/table_master/infer
  13. use_visualdl: false
  14. infer_img: ppstructure/docs/table/table.jpg
  15. save_res_path: ./output/table_master
  16. character_dict_path: ppocr/utils/dict/table_master_structure_dict.txt
  17. infer_mode: false
  18. max_text_length: &max_text_length 500
  19. box_format: &box_format 'xywh' # 'xywh', 'xyxy', 'xyxyxyxy'
  20. d2s_train_image_shape: [3, 480, 480]
  21. Optimizer:
  22. name: Adam
  23. beta1: 0.9
  24. beta2: 0.999
  25. lr:
  26. name: MultiStepDecay
  27. learning_rate: 0.001
  28. milestones: [12, 15]
  29. gamma: 0.1
  30. warmup_epoch: 0.02
  31. regularizer:
  32. name: L2
  33. factor: 0.0
  34. Architecture:
  35. model_type: table
  36. algorithm: TableMaster
  37. Backbone:
  38. name: TableResNetExtra
  39. gcb_config:
  40. ratio: 0.0625
  41. headers: 1
  42. att_scale: False
  43. fusion_type: channel_add
  44. layers: [False, True, True, True]
  45. layers: [1,2,5,3]
  46. Head:
  47. name: TableMasterHead
  48. hidden_size: 512
  49. headers: 8
  50. dropout: 0
  51. d_ff: 2024
  52. max_text_length: *max_text_length
  53. loc_reg_num: &loc_reg_num 4
  54. Loss:
  55. name: TableMasterLoss
  56. ignore_index: 42 # set to len of dict + 3
  57. PostProcess:
  58. name: TableMasterLabelDecode
  59. box_shape: pad
  60. merge_no_span_structure: &merge_no_span_structure True
  61. Metric:
  62. name: TableMetric
  63. main_indicator: acc
  64. compute_bbox_metric: False
  65. box_format: *box_format
  66. Train:
  67. dataset:
  68. name: PubTabDataSet
  69. data_dir: train_data/table/pubtabnet/train/
  70. label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_train.jsonl]
  71. transforms:
  72. - DecodeImage:
  73. img_mode: BGR
  74. channel_first: False
  75. - TableMasterLabelEncode:
  76. learn_empty_box: False
  77. merge_no_span_structure: *merge_no_span_structure
  78. replace_empty_cell_token: True
  79. loc_reg_num: *loc_reg_num
  80. max_text_length: *max_text_length
  81. - ResizeTableImage:
  82. max_len: 480
  83. resize_bboxes: True
  84. - PaddingTableImage:
  85. size: [480, 480]
  86. - TableBoxEncode:
  87. in_box_format: *box_format
  88. out_box_format: *box_format
  89. - NormalizeImage:
  90. scale: 1./255.
  91. mean: [0.5, 0.5, 0.5]
  92. std: [0.5, 0.5, 0.5]
  93. order: hwc
  94. - ToCHWImage: null
  95. - KeepKeys:
  96. keep_keys: [image, structure, bboxes, bbox_masks, shape]
  97. loader:
  98. shuffle: True
  99. batch_size_per_card: 10
  100. drop_last: True
  101. num_workers: 8
  102. Eval:
  103. dataset:
  104. name: PubTabDataSet
  105. data_dir: train_data/table/pubtabnet/val/
  106. label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_val.jsonl]
  107. transforms:
  108. - DecodeImage:
  109. img_mode: BGR
  110. channel_first: False
  111. - TableMasterLabelEncode:
  112. learn_empty_box: False
  113. merge_no_span_structure: *merge_no_span_structure
  114. replace_empty_cell_token: True
  115. loc_reg_num: *loc_reg_num
  116. max_text_length: *max_text_length
  117. - ResizeTableImage:
  118. max_len: 480
  119. resize_bboxes: True
  120. - PaddingTableImage:
  121. size: [480, 480]
  122. - TableBoxEncode:
  123. in_box_format: *box_format
  124. out_box_format: *box_format
  125. - NormalizeImage:
  126. scale: 1./255.
  127. mean: [0.5, 0.5, 0.5]
  128. std: [0.5, 0.5, 0.5]
  129. order: hwc
  130. - ToCHWImage: null
  131. - KeepKeys:
  132. keep_keys: [image, structure, bboxes, bbox_masks, shape]
  133. loader:
  134. shuffle: False
  135. drop_last: False
  136. batch_size_per_card: 10
  137. num_workers: 8