repvit_db.yml 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. Global:
  2. device: gpu
  3. epoch_num: &epoch_num 500
  4. log_smooth_window: 20
  5. print_batch_step: 100
  6. output_dir: ./output/det_repsvtr_db
  7. save_epoch_step: [400, 25]
  8. eval_batch_step:
  9. - 0
  10. - 1000
  11. cal_metric_during_train: false
  12. checkpoints:
  13. pretrained_model: openocr_det_repvit_ch.pth
  14. save_inference_dir: null
  15. use_tensorboard: false
  16. infer_img:
  17. save_res_path: ./checkpoints/det_db/predicts_db.txt
  18. distributed: true
  19. model_type: det
  20. Architecture:
  21. algorithm: DB_mobile
  22. Backbone:
  23. name: RepSVTR_det
  24. Neck:
  25. name: RSEFPN
  26. out_channels: 96
  27. shortcut: True
  28. Head:
  29. name: DBHead
  30. k: 50
  31. Loss:
  32. name: DBLoss
  33. balance_loss: true
  34. main_loss_type: DiceLoss
  35. alpha: 5
  36. beta: 10
  37. ohem_ratio: 3
  38. Optimizer:
  39. name: Adam
  40. lr: 0.001
  41. weight_decay: 5.0e-05
  42. filter_bias_and_bn: False
  43. LRScheduler:
  44. name: CosineAnnealingLR
  45. warmup_epoch: 2
  46. PostProcess:
  47. name: DBPostProcess
  48. thresh: 0.3
  49. box_thresh: 0.6
  50. max_candidates: 1000
  51. unclip_ratio: 1.5
  52. score_mode: 'slow'
  53. Metric:
  54. name: DetMetric
  55. main_indicator: hmean
  56. Train:
  57. dataset:
  58. name: SimpleDataSet
  59. data_dir: ../icdar2015/text_localization/
  60. label_file_list:
  61. - ../icdar2015/text_localization/train_icdar2015_label.txt
  62. ratio_list: [1.0]
  63. transforms:
  64. - DecodeImage:
  65. img_mode: BGR
  66. channel_first: false
  67. - DetLabelEncode: null
  68. - CopyPaste: null
  69. - IaaAugment:
  70. augmenter_args:
  71. - type: Fliplr
  72. args:
  73. p: 0.5
  74. - type: Affine
  75. args:
  76. rotate:
  77. - -10
  78. - 10
  79. - type: Resize
  80. args:
  81. size:
  82. - 0.5
  83. - 3
  84. - EastRandomCropData:
  85. size:
  86. - 640
  87. - 640
  88. max_tries: 50
  89. keep_ratio: true
  90. - MakeBorderMap:
  91. shrink_ratio: 0.4
  92. thresh_min: 0.3
  93. thresh_max: 0.7
  94. total_epoch: *epoch_num
  95. - MakeShrinkMap:
  96. shrink_ratio: 0.4
  97. min_text_size: 8
  98. total_epoch: *epoch_num
  99. - NormalizeImage:
  100. scale: 1./255.
  101. mean:
  102. - 0.485
  103. - 0.456
  104. - 0.406
  105. std:
  106. - 0.229
  107. - 0.224
  108. - 0.225
  109. order: hwc
  110. - ToCHWImage: null
  111. - KeepKeys:
  112. keep_keys:
  113. - image
  114. - threshold_map
  115. - threshold_mask
  116. - shrink_map
  117. - shrink_mask
  118. loader:
  119. shuffle: true
  120. drop_last: false
  121. batch_size_per_card: 8
  122. num_workers: 8
  123. Eval:
  124. dataset:
  125. name: SimpleDataSet
  126. data_dir: ../icdar2015/text_localization/
  127. label_file_list:
  128. - ../icdar2015/text_localization/test_icdar2015_label.txt
  129. transforms:
  130. - DecodeImage:
  131. img_mode: BGR
  132. channel_first: false
  133. - DetLabelEncode: null
  134. - DetResizeForTest:
  135. # image_shape: [1280, 1280]
  136. # keep_ratio: True
  137. # padding: True
  138. limit_side_len: 960
  139. limit_type: max
  140. - NormalizeImage:
  141. scale: 1./255.
  142. mean:
  143. - 0.485
  144. - 0.456
  145. - 0.406
  146. std:
  147. - 0.229
  148. - 0.224
  149. - 0.225
  150. order: hwc
  151. - ToCHWImage: null
  152. - KeepKeys:
  153. keep_keys:
  154. - image
  155. - shape
  156. - polys
  157. - ignore_tags
  158. loader:
  159. shuffle: false
  160. drop_last: false
  161. batch_size_per_card: 1
  162. num_workers: 2
  163. profiler_options: null