SLANeXt_wireless.yml 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. Global:
  2. model_name: SLANeXt_wireless # To use static model for inference.
  3. use_gpu: true
  4. epoch_num: 400
  5. log_smooth_window: 20
  6. print_batch_step: 20
  7. save_model_dir: ./output/SLANeXt_wireless
  8. save_epoch_step: 400
  9. eval_batch_step:
  10. - 0
  11. - 331
  12. cal_metric_during_train: true
  13. pretrained_model: "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/SLANeXt_wireless_pretrained.pdparams"
  14. checkpoints: null
  15. save_inference_dir: ./output/SLANeXt_wireless/infer
  16. use_visualdl: false
  17. infer_img: ppstructure/docs/table/table.jpg
  18. character_dict_path: ppocr/utils/dict/table_structure_dict_ch.txt
  19. character_type: en
  20. max_text_length: 500
  21. box_format: xyxyxyxy
  22. infer_mode: false
  23. use_sync_bn: true
  24. save_res_path: output/infer
  25. d2s_train_image_shape: [3, 512, 512]
  26. Optimizer:
  27. name: AdamW
  28. beta1: 0.9
  29. beta2: 0.999
  30. clip_norm: 5.0
  31. lr:
  32. name: Cosine
  33. learning_rate: 0.0001
  34. warmup_epoch: 1
  35. regularizer:
  36. name: L2
  37. factor: 0.0
  38. Architecture:
  39. model_type: table
  40. algorithm: SLANeXt
  41. Backbone:
  42. name: Vary_VIT_B
  43. image_size: 512
  44. encoder_embed_dim: 768
  45. encoder_depth: 12
  46. encoder_num_heads: 12
  47. encoder_global_attn_indexes: [2, 5, 8, 11]
  48. Head:
  49. name: SLAHead
  50. hidden_size: 512
  51. max_text_length: 500
  52. loc_reg_num: 8
  53. Loss:
  54. name: SLALoss
  55. structure_weight: 1.0
  56. # SLANeXt does not train the cell location task by default, set the loc_weight if needed.
  57. loc_weight: 0.0
  58. loc_loss: smooth_l1
  59. PostProcess:
  60. name: TableLabelDecode
  61. merge_no_span_structure: true
  62. Metric:
  63. name: TableMetric
  64. main_indicator: acc
  65. compute_bbox_metric: false
  66. loc_reg_num: 8
  67. box_format: xyxyxyxy
  68. del_thead_tbody: true
  69. Train:
  70. dataset:
  71. name: PubTabDataSet
  72. data_dir: train_data/table/train/
  73. label_file_list:
  74. - train_data/table/train.txt
  75. ratio_list:
  76. - 1
  77. transforms:
  78. - DecodeImage:
  79. img_mode: BGR
  80. channel_first: false
  81. - TableLabelEncode:
  82. learn_empty_box: false
  83. merge_no_span_structure: true
  84. replace_empty_cell_token: false
  85. loc_reg_num: 8
  86. max_text_length: 500
  87. - TableBoxEncode:
  88. in_box_format: xyxyxyxy
  89. out_box_format: xyxyxyxy
  90. - ResizeTableImage:
  91. max_len: 512
  92. resize_bboxes: true
  93. - NormalizeImage:
  94. scale: 1./255.
  95. mean:
  96. - 0.485
  97. - 0.456
  98. - 0.406
  99. std:
  100. - 0.229
  101. - 0.224
  102. - 0.225
  103. order: hwc
  104. - PaddingTableImage:
  105. size:
  106. - 512
  107. - 512
  108. - ToCHWImage: null
  109. - KeepKeys:
  110. keep_keys:
  111. - image
  112. - structure
  113. - bboxes
  114. - bbox_masks
  115. - length
  116. - shape
  117. loader:
  118. shuffle: true
  119. batch_size_per_card: 48
  120. drop_last: true
  121. num_workers: 1
  122. Eval:
  123. dataset:
  124. name: PubTabDataSet
  125. data_dir: train_data/table/val/
  126. label_file_list:
  127. - train_data/table/val.txt
  128. transforms:
  129. - DecodeImage:
  130. img_mode: BGR
  131. channel_first: false
  132. - TableLabelEncode:
  133. learn_empty_box: false
  134. merge_no_span_structure: true
  135. replace_empty_cell_token: false
  136. loc_reg_num: 8
  137. max_text_length: 500
  138. - TableBoxEncode:
  139. in_box_format: xyxyxyxy
  140. out_box_format: xyxyxyxy
  141. - ResizeTableImage:
  142. max_len: 512
  143. resize_bboxes: true
  144. - NormalizeImage:
  145. scale: 1./255.
  146. mean:
  147. - 0.485
  148. - 0.456
  149. - 0.406
  150. std:
  151. - 0.229
  152. - 0.224
  153. - 0.225
  154. order: hwc
  155. - PaddingTableImage:
  156. size:
  157. - 512
  158. - 512
  159. - ToCHWImage: null
  160. - KeepKeys:
  161. keep_keys:
  162. - image
  163. - structure
  164. - bboxes
  165. - bbox_masks
  166. - length
  167. - shape
  168. loader:
  169. shuffle: false
  170. drop_last: false
  171. batch_size_per_card: 48
  172. num_workers: 1
  173. profiler_options: null