vqa_token_layoutlm_loss.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. from paddle import nn
  18. from ppocr.losses.basic_loss import DMLLoss
  19. class VQASerTokenLayoutLMLoss(nn.Layer):
  20. def __init__(self, num_classes, key=None):
  21. super().__init__()
  22. self.loss_class = nn.CrossEntropyLoss()
  23. self.num_classes = num_classes
  24. self.ignore_index = self.loss_class.ignore_index
  25. self.key = key
  26. def forward(self, predicts, batch):
  27. if isinstance(predicts, dict) and self.key is not None:
  28. predicts = predicts[self.key]
  29. labels = batch[5]
  30. attention_mask = batch[2]
  31. if attention_mask is not None:
  32. active_loss = (
  33. attention_mask.reshape(
  34. [
  35. -1,
  36. ]
  37. )
  38. == 1
  39. )
  40. active_output = predicts.reshape([-1, self.num_classes])[active_loss]
  41. active_label = labels.reshape(
  42. [
  43. -1,
  44. ]
  45. )[active_loss]
  46. loss = self.loss_class(active_output, active_label)
  47. else:
  48. loss = self.loss_class(
  49. predicts.reshape([-1, self.num_classes]),
  50. labels.reshape(
  51. [
  52. -1,
  53. ]
  54. ),
  55. )
  56. return {"loss": loss}