convnextv2_tiny_cam_tps_on.yml 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. Global:
  2. device: gpu
  3. epoch_num: 20
  4. log_smooth_window: 20
  5. print_batch_step: 10
  6. output_dir: ./output/rec/u14m_filter/convnextv2_tiny_cam_tps_on
  7. eval_epoch_step: [0, 1]
  8. eval_batch_step: [0, 500]
  9. cal_metric_during_train: False
  10. pretrained_model:
  11. checkpoints:
  12. use_tensorboard: false
  13. infer_img:
  14. # for data or label process
  15. character_dict_path: ./tools/utils/EN_symbol_dict.txt
  16. max_text_length: &max_text_length 25
  17. use_space_char: False
  18. save_res_path: ./output/rec/u14m_filter/predicts_convnextv2_cam_tps_on.txt
  19. use_amp: True
  20. Optimizer:
  21. name: AdamW
  22. lr: 0.0008 # for 4gpus bs256/gpu
  23. weight_decay: 0.05
  24. filter_bias_and_bn: True
  25. eps: 1.e-8
  26. LRScheduler:
  27. name: OneCycleLR
  28. warmup_epoch: 1.5 # pct_start 0.075*20 : 1.5ep
  29. cycle_momentum: False
  30. Architecture:
  31. model_type: rec
  32. algorithm: CAM
  33. Transform:
  34. name: Aster_TPS
  35. tps_inputsize: [32, 64]
  36. tps_outputsize: &img_shape [32, 128]
  37. Encoder:
  38. name: CAMEncoder
  39. encoder_config:
  40. name: ConvNeXtV2
  41. depths: [3, 3, 9, 3]
  42. dims: [96, 192, 384, 768]
  43. strides: [[4,4], [2,1], [2,1], [1,1]]
  44. drop_path_rate: 0.2
  45. feat2d: True
  46. nb_classes: 97
  47. strides: [[4,4], [2,1], [2,1], [1,1]]
  48. deform_stride: 2
  49. stage_idx: 2
  50. use_depthwise_unet: True
  51. use_more_unet: False
  52. binary_loss_type: BanlanceMultiClassCrossEntropyLoss
  53. mid_size: False
  54. d_embedding: 512
  55. Decoder:
  56. name: CAMDecoder
  57. num_encoder_layers: -1
  58. beam_size: 0
  59. num_decoder_layers: 2
  60. nhead: 8
  61. max_len: *max_text_length
  62. Loss:
  63. name: CAMLoss
  64. loss_weight_binary: 1.5
  65. label_smoothing: 0.
  66. Metric:
  67. name: RecMetric
  68. main_indicator: acc
  69. is_filter: True
  70. PostProcess:
  71. name: ARLabelDecode
  72. Train:
  73. dataset:
  74. name: LMDBDataSet
  75. data_dir: ../Union14M-L-LMDB-Filtered
  76. transforms:
  77. - DecodeImagePIL: # load image
  78. img_mode: RGB
  79. - PARSeqAugPIL:
  80. - CAMLabelEncode: # Class handling label
  81. font_path: ./arial.ttf
  82. image_shape: *img_shape
  83. - RecTVResize:
  84. image_shape: [64, 256]
  85. padding: False
  86. - KeepKeys:
  87. keep_keys: ['image', 'label', 'length', 'binary_mask'] # dataloader will return list in this order
  88. loader:
  89. shuffle: True
  90. batch_size_per_card: 256
  91. drop_last: True
  92. num_workers: 4
  93. Eval:
  94. dataset:
  95. name: LMDBDataSet
  96. data_dir: ../evaluation
  97. transforms:
  98. - DecodeImagePIL: # load image
  99. img_mode: RGB
  100. - ARLabelEncode: # Class handling label
  101. - RecTVResize:
  102. image_shape: [64, 256]
  103. padding: False
  104. - KeepKeys:
  105. keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
  106. loader:
  107. shuffle: False
  108. drop_last: False
  109. batch_size_per_card: 256
  110. num_workers: 2