vit_busnet.yml 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. Global:
  2. device: gpu
  3. epoch_num: 10
  4. log_smooth_window: 20
  5. print_batch_step: 10
  6. output_dir: ./output/rec/u14m_filter/vit_busnet/
  7. eval_epoch_step: [0, 1]
  8. eval_batch_step: [0, 500]
  9. cal_metric_during_train: True
  10. pretrained_model:
  11. checkpoints:
  12. use_tensorboard: false
  13. infer_img:
  14. # for data or label process
  15. character_dict_path: ./tools/utils/EN_symbol_dict.txt
  16. max_text_length: 25
  17. use_space_char: False
  18. save_res_path: ./output/rec/u14m_filter/predicts_vit_busnet.txt
  19. grad_clip_val: 20
  20. use_amp: True
  21. Optimizer:
  22. name: Adam
  23. lr: 0.00053 # 4gpus bs256/gpu
  24. weight_decay: 0.0
  25. filter_bias_and_bn: False
  26. LRScheduler:
  27. name: MultiStepLR
  28. milestones: [6]
  29. gamma: 0.1
  30. Architecture:
  31. model_type: rec
  32. algorithm: BUSBet
  33. Transform:
  34. Encoder:
  35. name: ViT
  36. img_size: [32,128]
  37. patch_size: [4, 8]
  38. embed_dim: 384
  39. depth: 12
  40. num_heads: 6
  41. mlp_ratio: 4
  42. qkv_bias: True
  43. Decoder:
  44. name: BUSDecoder
  45. nhead: 6
  46. num_layers: 6
  47. dim_feedforward: 1536
  48. ignore_index: &ignore_index 100
  49. pretraining: False
  50. Loss:
  51. name: ABINetLoss
  52. ignore_index: *ignore_index
  53. PostProcess:
  54. name: ABINetLabelDecode
  55. Metric:
  56. name: RecMetric
  57. main_indicator: acc
  58. is_filter: True
  59. Train:
  60. dataset:
  61. name: LMDBDataSet
  62. data_dir: ../Union14M-L-LMDB-Filtered
  63. transforms:
  64. - DecodeImagePIL: # load image
  65. img_mode: RGB
  66. - PARSeqAugPIL:
  67. - ABINetLabelEncode:
  68. ignore_index: *ignore_index
  69. - RecTVResize:
  70. image_shape: [32, 128]
  71. padding: False
  72. - KeepKeys:
  73. keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
  74. loader:
  75. shuffle: True
  76. batch_size_per_card: 256
  77. drop_last: True
  78. num_workers: 4
  79. Eval:
  80. dataset:
  81. name: LMDBDataSet
  82. data_dir: ../evaluation
  83. transforms:
  84. - DecodeImagePIL: # load image
  85. img_mode: RGB
  86. - ABINetLabelEncode:
  87. ignore_index: *ignore_index
  88. - RecTVResize:
  89. image_shape: [32, 128]
  90. padding: False
  91. - KeepKeys:
  92. keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
  93. loader:
  94. shuffle: False
  95. drop_last: False
  96. batch_size_per_card: 256
  97. num_workers: 2