modeling_shieldgemma2.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. # coding=utf-8
  2. # Copyright 2025 Google Inc. HuggingFace Inc. team. All rights reserved.
  3. #
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. from dataclasses import dataclass
  17. from typing import Optional, Union
  18. import torch
  19. from ...cache_utils import Cache
  20. from ...modeling_outputs import ImageClassifierOutputWithNoAttention
  21. from ...modeling_utils import PreTrainedModel
  22. from ...utils import (
  23. auto_docstring,
  24. logging,
  25. )
  26. from ..auto import AutoModelForImageTextToText
  27. from .configuration_shieldgemma2 import ShieldGemma2Config
  28. logger = logging.get_logger(__name__)
  29. @dataclass
  30. class ShieldGemma2ImageClassifierOutputWithNoAttention(ImageClassifierOutputWithNoAttention):
  31. """ShieldGemma2 classifies imags as violative or not relative to a specific policy
  32. Args:
  33. """
  34. probabilities: Optional[torch.Tensor] = None
  35. @auto_docstring
  36. class ShieldGemma2ForImageClassification(PreTrainedModel):
  37. config: ShieldGemma2Config
  38. _checkpoint_conversion_mapping = {
  39. "model.language_model.model": "model.model.language_model",
  40. "model.vision_tower": "model.model.vision_tower",
  41. "model.multi_modal_projector": "model.model.multi_modal_projector",
  42. "model.language_model.lm_head": "model.lm_head",
  43. }
  44. def __init__(self, config: ShieldGemma2Config):
  45. super().__init__(config=config)
  46. self.yes_token_index = getattr(config, "yes_token_index", 10_784)
  47. self.no_token_index = getattr(config, "no_token_index", 3771)
  48. self.model = AutoModelForImageTextToText.from_config(config=config)
  49. def get_input_embeddings(self):
  50. return self.model.language_model.get_input_embeddings()
  51. def set_input_embeddings(self, value):
  52. self.model.language_model.set_input_embeddings(value)
  53. def get_output_embeddings(self):
  54. return self.model.language_model.get_output_embeddings()
  55. def set_output_embeddings(self, new_embeddings):
  56. self.model.language_model.set_output_embeddings(new_embeddings)
  57. def set_decoder(self, decoder):
  58. self.model.language_model.set_decoder(decoder)
  59. def get_decoder(self):
  60. return self.model.language_model.get_decoder()
  61. def tie_weights(self):
  62. return self.model.language_model.tie_weights()
  63. @auto_docstring
  64. def forward(
  65. self,
  66. input_ids: Optional[torch.LongTensor] = None,
  67. pixel_values: Optional[torch.FloatTensor] = None,
  68. attention_mask: Optional[torch.Tensor] = None,
  69. position_ids: Optional[torch.LongTensor] = None,
  70. past_key_values: Optional[Cache] = None,
  71. token_type_ids: Optional[torch.LongTensor] = None,
  72. cache_position: Optional[torch.LongTensor] = None,
  73. inputs_embeds: Optional[torch.FloatTensor] = None,
  74. labels: Optional[torch.LongTensor] = None,
  75. use_cache: Optional[bool] = None,
  76. output_attentions: Optional[bool] = None,
  77. output_hidden_states: Optional[bool] = None,
  78. return_dict: Optional[bool] = None,
  79. logits_to_keep: Union[int, torch.Tensor] = 0,
  80. **lm_kwargs,
  81. ) -> ShieldGemma2ImageClassifierOutputWithNoAttention:
  82. r"""
  83. Returns:
  84. A `ShieldGemma2ImageClassifierOutputWithNoAttention` instance containing the logits and probabilities
  85. associated with the model predicting the `Yes` or `No` token as the response to that prompt, captured in the
  86. following properties.
  87. * `logits` (`torch.Tensor` of shape `(batch_size, 2)`):
  88. The first position along dim=1 is the logits for the `Yes` token and the second position along dim=1 is
  89. the logits for the `No` token.
  90. * `probabilities` (`torch.Tensor` of shape `(batch_size, 2)`):
  91. The first position along dim=1 is the probability of predicting the `Yes` token and the second position
  92. along dim=1 is the probability of predicting the `No` token.
  93. ShieldGemma prompts are constructed such that predicting the `Yes` token means the content *does violate* the
  94. policy as described. If you are only interested in the violative condition, use
  95. `violated = outputs.probabilities[:, 1]` to extract that slice from the output tensors.
  96. When used with the `ShieldGemma2Processor`, the `batch_size` will be equal to `len(images) * len(policies)`,
  97. and the order within the batch will be img1_policy1, ... img1_policyN, ... imgM_policyN.
  98. """
  99. outputs = self.model(
  100. input_ids=input_ids,
  101. pixel_values=pixel_values,
  102. attention_mask=attention_mask,
  103. position_ids=position_ids,
  104. past_key_values=past_key_values,
  105. token_type_ids=token_type_ids,
  106. cache_position=cache_position,
  107. inputs_embeds=inputs_embeds,
  108. labels=labels,
  109. use_cache=use_cache,
  110. output_attentions=output_attentions,
  111. output_hidden_states=output_hidden_states,
  112. return_dict=return_dict,
  113. logits_to_keep=logits_to_keep,
  114. **lm_kwargs,
  115. )
  116. logits = outputs.logits
  117. selected_logits = logits[:, -1, [self.yes_token_index, self.no_token_index]]
  118. probabilities = torch.softmax(selected_logits, dim=-1)
  119. return ShieldGemma2ImageClassifierOutputWithNoAttention(
  120. logits=selected_logits,
  121. probabilities=probabilities,
  122. )
  123. __all__ = [
  124. "ShieldGemma2ForImageClassification",
  125. ]