| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109 |
- # Copyright 2021 The HuggingFace Team. All rights reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from ctypes import c_float, sizeof
- from enum import Enum
- from typing import TYPE_CHECKING, Optional, Union
- if TYPE_CHECKING:
- from .. import AutoFeatureExtractor, AutoProcessor, AutoTokenizer # tests_ignore
- class ParameterFormat(Enum):
- Float = c_float
- @property
- def size(self) -> int:
- """
- Number of byte required for this data type
- Returns:
- Integer > 0
- """
- return sizeof(self.value)
- def compute_effective_axis_dimension(dimension: int, fixed_dimension: int, num_token_to_add: int = 0) -> int:
- """
- Args:
- dimension:
- fixed_dimension:
- num_token_to_add:
- Returns:
- """
- # < 0 is possible if using a dynamic axis
- if dimension <= 0:
- dimension = fixed_dimension
- dimension -= num_token_to_add
- return dimension
- def compute_serialized_parameters_size(num_parameters: int, dtype: ParameterFormat) -> int:
- """
- Compute the size taken by all the parameters in the given the storage format when serializing the model
- Args:
- num_parameters: Number of parameters to be saved
- dtype: The data format each parameter will be saved
- Returns:
- Size (in byte) taken to save all the parameters
- """
- return num_parameters * dtype.size
- def get_preprocessor(model_name: str) -> Optional[Union["AutoTokenizer", "AutoFeatureExtractor", "AutoProcessor"]]:
- """
- Gets a preprocessor (tokenizer, feature extractor or processor) that is available for `model_name`.
- Args:
- model_name (`str`): Name of the model for which a preprocessor are loaded.
- Returns:
- `Optional[Union[AutoTokenizer, AutoFeatureExtractor, AutoProcessor]]`:
- If a processor is found, it is returned. Otherwise, if a tokenizer or a feature extractor exists, it is
- returned. If both a tokenizer and a feature extractor exist, an error is raised. The function returns
- `None` if no preprocessor is found.
- """
- # Avoid circular imports by only importing this here.
- from .. import AutoFeatureExtractor, AutoProcessor, AutoTokenizer # tests_ignore
- try:
- return AutoProcessor.from_pretrained(model_name)
- except (ValueError, OSError, KeyError):
- tokenizer, feature_extractor = None, None
- try:
- tokenizer = AutoTokenizer.from_pretrained(model_name)
- except (OSError, KeyError):
- pass
- try:
- feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
- except (OSError, KeyError):
- pass
- if tokenizer is not None and feature_extractor is not None:
- raise ValueError(
- f"Couldn't auto-detect preprocessor for {model_name}. Found both a tokenizer and a feature extractor."
- )
- elif tokenizer is None and feature_extractor is None:
- return None
- elif tokenizer is not None:
- return tokenizer
- else:
- return feature_extractor
|