| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- # coding=utf-8
- # Copyright 2023 The HuggingFace Inc. team.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """
- Image/Text processor class for ALIGN
- """
- from ...processing_utils import ProcessingKwargs, ProcessorMixin
- class AlignProcessorKwargs(ProcessingKwargs, total=False):
- # see processing_utils.ProcessingKwargs documentation for usage.
- _defaults = {
- "text_kwargs": {
- "padding": "max_length",
- "max_length": 64,
- },
- }
- class AlignProcessor(ProcessorMixin):
- r"""
- Constructs an ALIGN processor which wraps [`EfficientNetImageProcessor`] and
- [`BertTokenizer`]/[`BertTokenizerFast`] into a single processor that inherits both the image processor and
- tokenizer functionalities. See the [`~AlignProcessor.__call__`] and [`~OwlViTProcessor.decode`] for more
- information.
- The preferred way of passing kwargs is as a dictionary per modality, see usage example below.
- ```python
- from transformers import AlignProcessor
- from PIL import Image
- model_id = "kakaobrain/align-base"
- processor = AlignProcessor.from_pretrained(model_id)
- processor(
- images=your_pil_image,
- text=["What is that?"],
- images_kwargs = {"crop_size": {"height": 224, "width": 224}},
- text_kwargs = {"padding": "do_not_pad"},
- common_kwargs = {"return_tensors": "pt"},
- )
- ```
- Args:
- image_processor ([`EfficientNetImageProcessor`]):
- The image processor is a required input.
- tokenizer ([`BertTokenizer`, `BertTokenizerFast`]):
- The tokenizer is a required input.
- """
- attributes = ["image_processor", "tokenizer"]
- image_processor_class = "EfficientNetImageProcessor"
- tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
- valid_processor_kwargs = AlignProcessorKwargs
- def __init__(self, image_processor, tokenizer):
- super().__init__(image_processor, tokenizer)
- __all__ = ["AlignProcessor"]
|