| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475 |
- # coding=utf-8
- # Copyright 2023 The HuggingFace Inc. team.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """
- Audio/Text processor class for CLAP
- """
- from typing import Optional, Union
- from ...audio_utils import AudioInput
- from ...processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
- from ...tokenization_utils_base import PreTokenizedInput, TextInput
- from ...utils import logging
- from ...utils.deprecation import deprecate_kwarg
- logger = logging.get_logger(__name__)
- class ClapProcessor(ProcessorMixin):
- r"""
- Constructs a CLAP processor which wraps a CLAP feature extractor and a RoBerta tokenizer into a single processor.
- [`ClapProcessor`] offers all the functionalities of [`ClapFeatureExtractor`] and [`RobertaTokenizerFast`]. See the
- [`~ClapProcessor.__call__`] and [`~ClapProcessor.decode`] for more information.
- Args:
- feature_extractor ([`ClapFeatureExtractor`]):
- The audio processor is a required input.
- tokenizer ([`RobertaTokenizerFast`]):
- The tokenizer is a required input.
- """
- feature_extractor_class = "ClapFeatureExtractor"
- tokenizer_class = ("RobertaTokenizer", "RobertaTokenizerFast")
- def __init__(self, feature_extractor, tokenizer):
- super().__init__(feature_extractor, tokenizer)
- @deprecate_kwarg("audios", version="v4.59.0", new_name="audio")
- def __call__(
- self,
- text: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None,
- audios: Optional[AudioInput] = None,
- audio: Optional[AudioInput] = None,
- **kwargs: Unpack[ProcessingKwargs],
- ):
- """
- Forwards the `audio` and `sampling_rate` arguments to [`~ClapFeatureExtractor.__call__`] and the `text`
- argument to [`~RobertaTokenizerFast.__call__`]. Please refer to the docstring of the above two methods for more
- information.
- """
- # The `deprecate_kwarg` will not work if the inputs are passed as arguments, so we check
- # again that the correct naming is used
- if audios is not None and audio is None:
- logger.warning(
- "Using `audios` keyword argument is deprecated when calling ClapProcessor, instead use `audio`."
- )
- audio = audios
- return super().__call__(text=text, audio=audio, **kwargs)
- __all__ = ["ClapProcessor"]
|