processing_siglip2.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. # coding=utf-8
  2. # Copyright 2025 The HuggingFace Inc. team.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. """
  16. Image/Text processor class for SigLIP2.
  17. """
  18. from typing import Optional
  19. from ...processing_utils import ImagesKwargs, ProcessingKwargs, ProcessorMixin
  20. class Siglip2ImagesKwargs(ImagesKwargs, total=False):
  21. max_num_patches: Optional[int]
  22. patch_size: Optional[int]
  23. class Siglip2ProcessorKwargs(ProcessingKwargs, total=False):
  24. images_kwargs: Siglip2ImagesKwargs
  25. _defaults = {
  26. "text_kwargs": {
  27. "padding": "max_length",
  28. "truncation": True,
  29. "max_length": 64,
  30. },
  31. "images_kwargs": {
  32. "max_num_patches": 256,
  33. "patch_size": 16,
  34. },
  35. }
  36. class Siglip2Processor(ProcessorMixin):
  37. r"""
  38. Constructs a Siglip2 processor which wraps a Siglip2 image processor and a Gemma tokenizer into a single processor.
  39. [`Siglip2Processor`] offers all the functionalities of [`Siglip2ImageProcessor`] and [`GemmaTokenizerFast`]. See the
  40. [`~Siglip2Processor.__call__`] and [`~Siglip2Processor.decode`] for more information.
  41. Args:
  42. image_processor ([`Siglip2ImageProcessor`]):
  43. The image processor is a required input.
  44. tokenizer ([`GemmaTokenizerFast`]):
  45. The tokenizer is a required input.
  46. """
  47. attributes = ["image_processor", "tokenizer"]
  48. image_processor_class = "AutoImageProcessor"
  49. tokenizer_class = "AutoTokenizer"
  50. valid_processor_kwargs = Siglip2ProcessorKwargs
  51. def __init__(self, image_processor, tokenizer):
  52. super().__init__(image_processor, tokenizer)
  53. __all__ = ["Siglip2Processor"]