dptr_parseq_pretrain.yml 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. Global:
  2. device: gpu
  3. epoch_num: 20
  4. log_smooth_window: 20
  5. print_batch_step: 10
  6. output_dir: /share/ckpt/zhaoshuai/openocr/dptr_parseq/
  7. eval_epoch_step: [0, 1]
  8. eval_batch_step: [0, 500]
  9. cal_metric_during_train: True
  10. pretrained_model:
  11. checkpoints:
  12. use_tensorboard: false
  13. infer_img:
  14. # for data or label process
  15. character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt
  16. max_text_length: &max_text_length 25
  17. use_space_char: &use_space_char False
  18. use_amp: True
  19. save_res_path: /share/ckpt/zhaoshuai/openocr/dptr_parseq/predicts_dptr_parseq.txt
  20. grad_clip_val: 20
  21. Optimizer:
  22. name: AdamW
  23. lr: 0.001485 # 2gpus 384bs/gpu
  24. weight_decay: 0.
  25. filter_bias_and_bn: False
  26. LRScheduler:
  27. name: OneCycleLR
  28. warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
  29. cycle_momentum: False
  30. Architecture:
  31. model_type: rec
  32. algorithm: DPTR
  33. Decoder:
  34. name: DptrParseq
  35. decode_ar: True
  36. refine_iters: 1
  37. is_pretrain: True
  38. ORP_path: /share/ckpt/zhaoshuai/parseq/clip_background.pth
  39. Loss:
  40. name: PARSeqLoss
  41. PostProcess:
  42. name: ARLabelDecode
  43. character_dict_path: *character_dict_path
  44. use_space_char: *use_space_char
  45. Metric:
  46. name: RecMetric
  47. main_indicator: acc
  48. is_filter: True
  49. Train:
  50. dataset:
  51. name: TextLMDBDataSet
  52. data_dir: /share/test/zhaoshuai/parseq-data/data/train/real/ArT
  53. transforms:
  54. - DPTRLabelEncode: # Class handling label
  55. character_dict_path: *character_dict_path
  56. use_space_char: *use_space_char
  57. max_text_length: *max_text_length
  58. - KeepKeys:
  59. keep_keys: ['clip_label', 'label'] # dataloader will return list in this order
  60. loader:
  61. shuffle: True
  62. batch_size_per_card: 256
  63. drop_last: True
  64. num_workers: 4
  65. Eval:
  66. dataset:
  67. name: TextLMDBDataSet
  68. data_dir: /share/test/zhaoshuai/parseq-data/data/val
  69. transforms:
  70. - DPTRLabelEncode: # Class handling label
  71. character_dict_path: *character_dict_path
  72. use_space_char: *use_space_char
  73. max_text_length: *max_text_length
  74. - KeepKeys:
  75. keep_keys: ['clip_label', 'label'] # dataloader will return list in this order
  76. loader:
  77. shuffle: False
  78. drop_last: False
  79. batch_size_per_card: 256
  80. num_workers: 2