visionlan_loss.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. import torch
  2. import torch.nn.functional as F
  3. from torch import nn
  4. def flatten_label(target):
  5. label_flatten = []
  6. label_length = []
  7. for i in range(0, target.size()[0]):
  8. cur_label = target[i].tolist()
  9. label_flatten += cur_label[:cur_label.index(0) + 1]
  10. label_length.append(cur_label.index(0) + 1)
  11. label_flatten = torch.LongTensor(label_flatten)
  12. label_length = torch.IntTensor(label_length)
  13. return (label_flatten, label_length)
  14. def _flatten(sources, lengths):
  15. return torch.cat([t[:l] for t, l in zip(sources, lengths)])
  16. class VisionLANLoss(nn.Module):
  17. def __init__(self,
  18. training_step='LA',
  19. ratio_res=0.5,
  20. ratio_sub=0.5,
  21. **kwargs):
  22. super(VisionLANLoss, self).__init__()
  23. self.loss_func = nn.CrossEntropyLoss(reduction='mean')
  24. self.ratio_res = ratio_res
  25. self.ratio_sub = ratio_sub
  26. assert training_step in ['LF_1', 'LF_2', 'LA']
  27. self.training_step = training_step
  28. def forward(self, pred, batch):
  29. text_pre, text_rem, text_mas, _ = pred
  30. target = batch[1].to(dtype=torch.int64)
  31. label_flatten, length = flatten_label(target)
  32. text_pre = _flatten(text_pre, length)
  33. if self.training_step == 'LF_1':
  34. loss = self.loss_func(text_pre, label_flatten.to(text_pre.device))
  35. else:
  36. target_res = batch[2].to(dtype=torch.int64)
  37. target_sub = batch[3].to(dtype=torch.int64)
  38. label_flatten_res, length_res = flatten_label(target_res)
  39. label_flatten_sub, length_sub = flatten_label(target_sub)
  40. text_rem = _flatten(text_rem, length_res)
  41. text_mas = _flatten(text_mas, length_sub)
  42. loss_ori = self.loss_func(text_pre,
  43. label_flatten.to(text_pre.device))
  44. loss_res = self.loss_func(text_rem,
  45. label_flatten_res.to(text_rem.device))
  46. loss_mas = self.loss_func(text_mas,
  47. label_flatten_sub.to(text_mas.device))
  48. loss = loss_ori + loss_res * self.ratio_res + loss_mas * self.ratio_sub
  49. return {'loss': loss}