svtrv2_ch.yml 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. Global:
  2. device: gpu
  3. epoch_num: 100
  4. log_smooth_window: 20
  5. print_batch_step: 10
  6. output_dir: ./output/rec/ch/svtrv2_ctc_ch/
  7. save_epoch_step: [150, 10]
  8. # evaluation is run every 2000 iterations
  9. eval_epoch_step: [0, 1]
  10. eval_batch_step: [0, 2000]
  11. cal_metric_during_train: True
  12. pretrained_model: ./openocr_svtrv2_ch.pth
  13. checkpoints:
  14. use_tensorboard: false
  15. infer_img:
  16. # for data or label process
  17. character_dict_path: &character_dict_path ./tools/utils/ppocr_keys_v1.txt
  18. max_text_length: &max_text_length 25
  19. use_space_char: &use_space_char True
  20. save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_ctc.txt
  21. use_amp: True
  22. project_name: svtrv2_ctc_ch
  23. Optimizer:
  24. name: AdamW
  25. lr: 0.0001 # for 4gpus bs256/gpu
  26. weight_decay: 0.05
  27. filter_bias_and_bn: True
  28. LRScheduler:
  29. name: CosineAnnealingLR
  30. warmup_epoch: 5
  31. Architecture:
  32. model_type: rec
  33. algorithm: SVTRv2_server
  34. Transform:
  35. Encoder:
  36. name: SVTRv2LNConvTwo33
  37. use_pos_embed: False
  38. out_channels: 256
  39. dims: [128, 256, 384]
  40. depths: [6, 6, 6]
  41. num_heads: [4, 8, 12]
  42. mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
  43. kernel_sizes: [[5,5,5,5,5,5], [5,5,5,5,5,5], [-1]]
  44. num_convs: [[1,1,1,1,1,1], [1,1,1,1,1,1], [-1]]
  45. sub_k: [[2, 1], [2, 1], [-1, -1]]
  46. last_stage: False
  47. feat2d: True
  48. pope_bias: True
  49. Decoder:
  50. name: CTCDecoder
  51. svtr_encoder:
  52. dims: 256
  53. depth: 2
  54. hidden_dims: 256
  55. kernel_size: [1, 3]
  56. use_guide: True
  57. Loss:
  58. name: CTCLoss
  59. zero_infinity: True
  60. PostProcess:
  61. name: CTCLabelDecode
  62. character_dict_path: *character_dict_path
  63. Metric:
  64. name: RecMetric
  65. main_indicator: acc
  66. ignore_space: False
  67. # is_filter: True
  68. Train:
  69. dataset:
  70. name: SimpleDataSet
  71. data_dir: ../ic15_data/
  72. label_file_list:
  73. - ../ic15_data/rec_gt_train.txt
  74. transforms:
  75. - DecodeImagePIL: # load image
  76. img_mode: RGB
  77. - PARSeqAugPIL:
  78. - CTCLabelEncode: # Class handling label
  79. character_dict_path: *character_dict_path
  80. use_space_char: *use_space_char
  81. max_text_length: *max_text_length
  82. - RecTVResize:
  83. image_shape: [48, 320]
  84. padding: True
  85. - KeepKeys:
  86. keep_keys: ['image', 'label', 'length']
  87. loader:
  88. shuffle: True
  89. batch_size_per_card: 256
  90. drop_last: True
  91. num_workers: 4
  92. Eval:
  93. dataset:
  94. name: SimpleDataSet
  95. data_dir: ../ic15_data/
  96. label_file_list:
  97. - ../ic15_data/rec_gt_test.txt
  98. transforms:
  99. - DecodeImage: # load image
  100. img_mode: BGR
  101. - CTCLabelEncode: # Class handling label
  102. character_dict_path: *character_dict_path
  103. use_space_char: *use_space_char
  104. max_text_length: *max_text_length
  105. - RecDynamicResize:
  106. image_shape: [48, 320]
  107. padding: False
  108. # - RecTVResize:
  109. # image_shape: [48, 320]
  110. # padding: True
  111. - KeepKeys:
  112. keep_keys: ['image', 'label', 'length']
  113. loader:
  114. shuffle: False
  115. drop_last: False
  116. batch_size_per_card: 1
  117. num_workers: 4