| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637 |
- # Copyright (c) Alibaba, Inc. and its affiliates.
- import enum
- import os
- class Fields(object):
- """ Names for different application fields
- """
- hub = 'hub'
- datasets = 'datasets'
- framework = 'framework'
- cv = 'cv'
- nlp = 'nlp'
- audio = 'audio'
- multi_modal = 'multi-modal'
- science = 'science'
- server = 'server'
- class CVTasks(object):
- # ocr
- ocr_detection = 'ocr-detection'
- ocr_recognition = 'ocr-recognition'
- table_recognition = 'table-recognition'
- lineless_table_recognition = 'lineless-table-recognition'
- license_plate_detection = 'license-plate-detection'
- card_detection_correction = 'card-detection-correction'
- # human face body related
- animal_recognition = 'animal-recognition'
- face_detection = 'face-detection'
- face_liveness = 'face-liveness'
- face_quality_assessment = 'face-quality-assessment'
- card_detection = 'card-detection'
- face_recognition = 'face-recognition'
- facial_expression_recognition = 'facial-expression-recognition'
- face_processing_base = 'face-processing-base'
- face_attribute_recognition = 'face-attribute-recognition'
- face_2d_keypoints = 'face-2d-keypoints'
- facial_68ldk_detection = 'facial-68ldk-detection'
- human_detection = 'human-detection'
- human_object_interaction = 'human-object-interaction'
- face_image_generation = 'face-image-generation'
- body_2d_keypoints = 'body-2d-keypoints'
- body_3d_keypoints = 'body-3d-keypoints'
- hand_2d_keypoints = 'hand-2d-keypoints'
- general_recognition = 'general-recognition'
- human_wholebody_keypoint = 'human-wholebody-keypoint'
- pedestrian_attribute_recognition = 'pedestrian-attribute-recognition'
- image_classification = 'image-classification'
- image_multilabel_classification = 'image-multilabel-classification'
- image_classification_imagenet = 'image-classification-imagenet'
- image_classification_dailylife = 'image-classification-dailylife'
- image_object_detection = 'image-object-detection'
- video_object_detection = 'video-object-detection'
- image_fewshot_detection = 'image-fewshot-detection'
- open_vocabulary_detection = 'open-vocabulary-detection'
- object_detection_3d = 'object-detection-3d'
- image_segmentation = 'image-segmentation'
- semantic_segmentation = 'semantic-segmentation'
- image_driving_perception = 'image-driving-perception'
- image_depth_estimation = 'image-depth-estimation'
- dense_optical_flow_estimation = 'dense-optical-flow-estimation'
- image_normal_estimation = 'image-normal-estimation'
- indoor_layout_estimation = 'indoor-layout-estimation'
- video_depth_estimation = 'video-depth-estimation'
- panorama_depth_estimation = 'panorama-depth-estimation'
- portrait_matting = 'portrait-matting'
- universal_matting = 'universal-matting'
- text_driven_segmentation = 'text-driven-segmentation'
- shop_segmentation = 'shop-segmentation'
- hand_static = 'hand-static'
- face_human_hand_detection = 'face-human-hand-detection'
- face_emotion = 'face-emotion'
- product_segmentation = 'product-segmentation'
- image_matching = 'image-matching'
- image_local_feature_matching = 'image-local-feature-matching'
- image_quality_assessment_degradation = 'image-quality-assessment-degradation'
- human_normal_estimation = 'human-normal-estimation'
- crowd_counting = 'crowd-counting'
- # image editing
- skin_retouching = 'skin-retouching'
- image_super_resolution = 'image-super-resolution'
- image_super_resolution_pasd = 'image-super-resolution-pasd'
- image_debanding = 'image-debanding'
- image_colorization = 'image-colorization'
- image_color_enhancement = 'image-color-enhancement'
- image_denoising = 'image-denoising'
- image_deblurring = 'image-deblurring'
- image_portrait_enhancement = 'image-portrait-enhancement'
- image_inpainting = 'image-inpainting'
- image_paintbyexample = 'image-paintbyexample'
- image_skychange = 'image-skychange'
- image_demoireing = 'image-demoireing'
- image_editing = 'image-editing'
- # image generation
- image_to_image_translation = 'image-to-image-translation'
- image_to_image_generation = 'image-to-image-generation'
- image_style_transfer = 'image-style-transfer'
- image_portrait_stylization = 'image-portrait-stylization'
- image_body_reshaping = 'image-body-reshaping'
- image_embedding = 'image-embedding'
- image_face_fusion = 'image-face-fusion'
- product_retrieval_embedding = 'product-retrieval-embedding'
- controllable_image_generation = 'controllable-image-generation'
- text_to_360panorama_image = 'text-to-360panorama-image'
- image_try_on = 'image-try-on'
- human_image_generation = 'human-image-generation'
- image_view_transform = 'image-view-transform'
- # video recognition
- live_category = 'live-category'
- action_recognition = 'action-recognition'
- action_detection = 'action-detection'
- video_category = 'video-category'
- video_embedding = 'video-embedding'
- virtual_try_on = 'virtual-try-on'
- movie_scene_segmentation = 'movie-scene-segmentation'
- language_guided_video_summarization = 'language-guided-video-summarization'
- vop_retrieval = 'video-text-retrieval'
- # video segmentation
- video_object_segmentation = 'video-object-segmentation'
- referring_video_object_segmentation = 'referring-video-object-segmentation'
- video_human_matting = 'video-human-matting'
- video_panoptic_segmentation = 'video-panoptic-segmentation'
- video_instance_segmentation = 'video-instance-segmentation'
- # video editing
- video_inpainting = 'video-inpainting'
- video_frame_interpolation = 'video-frame-interpolation'
- video_stabilization = 'video-stabilization'
- video_super_resolution = 'video-super-resolution'
- video_deinterlace = 'video-deinterlace'
- video_colorization = 'video-colorization'
- # reid and tracking
- video_single_object_tracking = 'video-single-object-tracking'
- video_multi_object_tracking = 'video-multi-object-tracking'
- video_summarization = 'video-summarization'
- image_reid_person = 'image-reid-person'
- # pointcloud task
- pointcloud_sceneflow_estimation = 'pointcloud-sceneflow-estimation'
- # image multi-view depth estimation
- image_multi_view_depth_estimation = 'image-multi-view-depth-estimation'
- # domain specific object detection
- domain_specific_object_detection = 'domain-specific-object-detection'
- # content check
- content_check = 'content-check'
- # 3d face reconstruction
- face_reconstruction = 'face-reconstruction'
- head_reconstruction = 'head-reconstruction'
- text_to_head = 'text-to-head'
- # 3d human reconstruction
- human_reconstruction = 'human-reconstruction'
- text_texture_generation = 'text-texture-generation'
- # image quality assessment mos
- image_quality_assessment_mos = 'image-quality-assessment-mos'
- # motion generation
- motion_generation = 'motion-generation'
- # 3d reconstruction
- nerf_recon_acc = 'nerf-recon-acc'
- nerf_recon_4k = 'nerf-recon-4k'
- nerf_recon_vq_compression = 'nerf-recon-vq-compression'
- surface_recon_common = 'surface-recon-common'
- human3d_render = 'human3d-render'
- human3d_animation = 'human3d-animation'
- image_control_3d_portrait = 'image-control-3d-portrait'
- self_supervised_depth_completion = 'self-supervised-depth-completion'
- # 3d generation
- image_to_3d = 'image-to-3d'
- # vision efficient tuning
- vision_efficient_tuning = 'vision-efficient-tuning'
- # bad image detecting
- bad_image_detecting = 'bad-image-detecting'
- class NLPTasks(object):
- # chat
- chat = 'chat'
- # nlp tasks
- word_segmentation = 'word-segmentation'
- part_of_speech = 'part-of-speech'
- named_entity_recognition = 'named-entity-recognition'
- nli = 'nli'
- sentiment_classification = 'sentiment-classification'
- sentiment_analysis = 'sentiment-analysis'
- sentence_similarity = 'sentence-similarity'
- text_classification = 'text-classification'
- sentence_embedding = 'sentence-embedding'
- text_ranking = 'text-ranking'
- relation_extraction = 'relation-extraction'
- zero_shot = 'zero-shot'
- translation = 'translation'
- competency_aware_translation = 'competency-aware-translation'
- token_classification = 'token-classification'
- transformer_crf = 'transformer-crf'
- conversational = 'conversational'
- text_generation = 'text-generation'
- fid_dialogue = 'fid-dialogue'
- text2text_generation = 'text2text-generation'
- task_oriented_conversation = 'task-oriented-conversation'
- dialog_intent_prediction = 'dialog-intent-prediction'
- dialog_state_tracking = 'dialog-state-tracking'
- table_question_answering = 'table-question-answering'
- fill_mask = 'fill-mask'
- text_summarization = 'text-summarization'
- question_answering = 'question-answering'
- code_translation = 'code-translation'
- code_generation = 'code-generation'
- zero_shot_classification = 'zero-shot-classification'
- backbone = 'backbone'
- text_error_correction = 'text-error-correction'
- word_alignment = 'word-alignment'
- faq_question_answering = 'faq-question-answering'
- information_extraction = 'information-extraction'
- document_segmentation = 'document-segmentation'
- extractive_summarization = 'extractive-summarization'
- feature_extraction = 'feature-extraction'
- translation_evaluation = 'translation-evaluation'
- sudoku = 'sudoku'
- text2sql = 'text2sql'
- siamese_uie = 'siamese-uie'
- document_grounded_dialog_retrieval = 'document-grounded-dialog-retrieval'
- document_grounded_dialog_rerank = 'document-grounded-dialog-rerank'
- document_grounded_dialog_generate = 'document-grounded-dialog-generate'
- machine_reading_comprehension = 'machine-reading-comprehension'
- class AudioTasks(object):
- # audio tasks
- auto_speech_recognition = 'auto-speech-recognition'
- text_to_speech = 'text-to-speech'
- speech_signal_process = 'speech-signal-process'
- speech_separation = 'speech-separation'
- acoustic_echo_cancellation = 'acoustic-echo-cancellation'
- acoustic_noise_suppression = 'acoustic-noise-suppression'
- keyword_spotting = 'keyword-spotting'
- inverse_text_processing = 'inverse-text-processing'
- punctuation = 'punctuation'
- speaker_verification = 'speaker-verification'
- speech_language_recognition = 'speech-language-recognition'
- speaker_diarization = 'speaker-diarization'
- audio_quantization = 'audio-quantization'
- voice_activity_detection = 'voice-activity-detection'
- language_score_prediction = 'language-score-prediction'
- speech_timestamp = 'speech-timestamp'
- speaker_diarization_dialogue_detection = 'speaker-diarization-dialogue-detection'
- speaker_diarization_semantic_speaker_turn_detection = 'speaker-diarization-semantic-speaker-turn-detection'
- emotion_recognition = 'emotion-recognition'
- speech_super_resolution = 'speech-super-resolution'
- voice_conversion = 'voice-conversion'
- class MultiModalTasks(object):
- # multi-modal tasks
- image_captioning = 'image-captioning'
- visual_grounding = 'visual-grounding'
- text_to_image_synthesis = 'text-to-image-synthesis'
- multi_modal_embedding = 'multi-modal-embedding'
- text_video_retrieval = 'text-video-retrieval'
- generative_multi_modal_embedding = 'generative-multi-modal-embedding'
- multi_modal_similarity = 'multi-modal-similarity'
- visual_question_answering = 'visual-question-answering'
- visual_entailment = 'visual-entailment'
- video_multi_modal_embedding = 'video-multi-modal-embedding'
- image_text_retrieval = 'image-text-retrieval'
- document_vl_embedding = 'document-vl-embedding'
- video_captioning = 'video-captioning'
- video_question_answering = 'video-question-answering'
- video_temporal_grounding = 'video-temporal-grounding'
- text_to_video_synthesis = 'text-to-video-synthesis'
- efficient_diffusion_tuning = 'efficient-diffusion-tuning'
- multimodal_dialogue = 'multimodal-dialogue'
- image_to_video = 'image-to-video'
- video_to_video = 'video-to-video'
- class ScienceTasks(object):
- protein_structure = 'protein-structure'
- class Other(object):
- other = 'other'
- class TasksIODescriptions(object):
- image_to_image = 'image_to_image',
- images_to_image = 'images_to_image',
- image_to_text = 'image_to_text',
- seed_to_image = 'seed_to_image',
- text_to_speech = 'text_to_speech',
- text_to_text = 'text_to_text',
- speech_to_text = 'speech_to_text',
- speech_to_speech = 'speech_to_speech'
- speeches_to_speech = 'speeches_to_speech',
- visual_grounding = 'visual_grounding',
- visual_question_answering = 'visual_question_answering',
- visual_entailment = 'visual_entailment',
- generative_multi_modal_embedding = 'generative_multi_modal_embedding'
- efficient_diffusion_tuning = 'efficient_diffusion_tuning'
- class Tasks(CVTasks, NLPTasks, AudioTasks, MultiModalTasks, ScienceTasks,
- Other):
- """ Names for tasks supported by modelscope.
- Holds the standard task name to use for identifying different tasks.
- This should be used to register models, pipelines, trainers.
- """
- reverse_field_index = {}
- task_template = 'task-template'
- @staticmethod
- def find_field_by_task(task_name):
- if len(Tasks.reverse_field_index) == 0:
- # Lazy init, not thread safe
- field_dict = {
- Fields.cv: [
- getattr(Tasks, attr) for attr in dir(CVTasks)
- if not attr.startswith('__')
- ],
- Fields.nlp: [
- getattr(Tasks, attr) for attr in dir(NLPTasks)
- if not attr.startswith('__')
- ],
- Fields.audio: [
- getattr(Tasks, attr) for attr in dir(AudioTasks)
- if not attr.startswith('__')
- ],
- Fields.multi_modal: [
- getattr(Tasks, attr) for attr in dir(MultiModalTasks)
- if not attr.startswith('__')
- ],
- Fields.science: [
- getattr(Tasks, attr) for attr in dir(ScienceTasks)
- if not attr.startswith('__')
- ],
- }
- for field, tasks in field_dict.items():
- for task in tasks:
- if task in Tasks.reverse_field_index:
- raise ValueError(f'Duplicate task: {task}')
- Tasks.reverse_field_index[task] = field
- return Tasks.reverse_field_index.get(task_name)
- class InputFields(object):
- """ Names for input data fields in the input data for pipelines
- """
- img = 'img'
- text = 'text'
- audio = 'audio'
- class Hubs(enum.Enum):
- """ Source from which an entity (such as a Dataset or Model) is stored
- """
- modelscope = 'modelscope'
- huggingface = 'huggingface'
- virgo = 'virgo'
- class DownloadMode(enum.Enum):
- """ How to treat existing datasets
- """
- REUSE_DATASET_IF_EXISTS = 'reuse_dataset_if_exists'
- FORCE_REDOWNLOAD = 'force_redownload'
- class DownloadChannel(enum.Enum):
- """ Channels of datasets downloading for uv/pv counting.
- """
- LOCAL = 'local'
- DSW = 'dsw'
- EAIS = 'eais'
- class UploadMode(enum.Enum):
- """ How to upload object to remote.
- """
- # Upload all objects from local, existing remote objects may be overwritten. (Default)
- OVERWRITE = 'overwrite'
- # Upload local objects in append mode, skipping all existing remote objects.
- APPEND = 'append'
- class DatasetFormations(enum.Enum):
- """ How a dataset is organized and interpreted
- """
- # formation that is compatible with official huggingface dataset, which
- # organizes whole dataset into one single (zip) file.
- hf_compatible = 1
- # native modelscope formation that supports, among other things,
- # multiple files in a dataset
- native = 2
- # general formation for datasets
- general = 4
- # for local meta cache mark
- formation_mark_ext = '.formation_mark'
- DatasetMetaFormats = {
- DatasetFormations.native: ['.json'],
- DatasetFormations.hf_compatible: ['.py'],
- DatasetFormations.general: ['.py'],
- }
- class ModelFile(object):
- CONFIGURATION = 'configuration.json'
- README = 'README.md'
- TF_SAVED_MODEL_FILE = 'saved_model.pb'
- TF_GRAPH_FILE = 'tf_graph.pb'
- TF_CHECKPOINT_FOLDER = 'tf_ckpts'
- TF_CKPT_PREFIX = 'ckpt-'
- TORCH_MODEL_FILE = 'pytorch_model.pt'
- TORCH_MODEL_BIN_FILE = 'pytorch_model.bin'
- VOCAB_FILE = 'vocab.txt'
- ONNX_MODEL_FILE = 'model.onnx'
- LABEL_MAPPING = 'label_mapping.json'
- TRAIN_OUTPUT_DIR = 'output'
- TRAIN_BEST_OUTPUT_DIR = 'output_best'
- TS_MODEL_FILE = 'model.ts'
- YAML_FILE = 'model.yaml'
- TOKENIZER_FOLDER = 'tokenizer'
- CONFIG = 'config.json'
- class Invoke(object):
- KEY = 'invoked_by'
- PRETRAINED = 'from_pretrained'
- PIPELINE = 'pipeline'
- TRAINER = 'trainer'
- LOCAL_TRAINER = 'local_trainer'
- PREPROCESSOR = 'preprocessor'
- class ThirdParty(object):
- KEY = 'third_party'
- EASYCV = 'easycv'
- ADASEQ = 'adaseq'
- ADADET = 'adadet'
- class ConfigFields(object):
- """ First level keyword in configuration file
- """
- framework = 'framework'
- task = 'task'
- pipeline = 'pipeline'
- model = 'model'
- dataset = 'dataset'
- preprocessor = 'preprocessor'
- train = 'train'
- evaluation = 'evaluation'
- postprocessor = 'postprocessor'
- class ConfigKeys(object):
- """Fixed keywords in configuration file"""
- train = 'train'
- val = 'val'
- test = 'test'
- class Requirements(object):
- """Requirement names for each module
- """
- protobuf = 'protobuf'
- sentencepiece = 'sentencepiece'
- sklearn = 'sklearn'
- scipy = 'scipy'
- timm = 'timm'
- tokenizers = 'tokenizers'
- tf = 'tf'
- torch = 'torch'
- class Frameworks(object):
- tf = 'tensorflow'
- torch = 'pytorch'
- kaldi = 'kaldi'
- REPO_TYPE_MODEL = 'model'
- REPO_TYPE_DATASET = 'dataset'
- REPO_TYPE_SUPPORT = [REPO_TYPE_MODEL, REPO_TYPE_DATASET]
- DEFAULT_MODEL_REVISION = 'master'
- MASTER_MODEL_BRANCH = 'master'
- DEFAULT_REPOSITORY_REVISION = 'master'
- DEFAULT_DATASET_REVISION = 'master'
- DEFAULT_DATASET_NAMESPACE = 'modelscope'
- DEFAULT_DATA_ACCELERATION_ENDPOINT = 'https://oss-accelerate.aliyuncs.com'
- INTRA_CLOUD_ACCELERATION = str(
- os.environ.get('INTRA_CLOUD_ACCELERATION', 'True')).lower()
- INTRA_CLOUD_ACCELERATION_REGION = str(
- os.environ.get('INTRA_CLOUD_ACCELERATION_REGION', '')).lower()
- class ModeKeys:
- TRAIN = 'train'
- EVAL = 'eval'
- INFERENCE = 'inference'
- class LogKeys:
- ITER = 'iter'
- ITER_TIME = 'iter_time'
- EPOCH = 'epoch'
- LR = 'lr' # learning rate
- MODE = 'mode'
- DATA_LOAD_TIME = 'data_load_time'
- ETA = 'eta' # estimated time of arrival
- MEMORY = 'memory'
- LOSS = 'loss'
- class TrainerStages:
- after_init = 'after_init'
- before_run = 'before_run'
- before_val = 'before_val'
- before_train_epoch = 'before_train_epoch'
- before_train_iter = 'before_train_iter'
- after_train_iter = 'after_train_iter'
- after_train_epoch = 'after_train_epoch'
- before_val_epoch = 'before_val_epoch'
- before_val_iter = 'before_val_iter'
- after_val_iter = 'after_val_iter'
- after_val_epoch = 'after_val_epoch'
- after_run = 'after_run'
- after_val = 'after_val'
- class ColorCodes:
- MAGENTA = '\033[95m'
- YELLOW = '\033[93m'
- GREEN = '\033[92m'
- RED = '\033[91m'
- END = '\033[0m'
- class Devices:
- """device used for training and inference"""
- cpu = 'cpu'
- gpu = 'gpu'
- # Supported extensions for text datasets.
- EXTENSIONS_TO_LOAD = {
- 'csv': 'csv',
- 'tsv': 'csv',
- 'json': 'json',
- 'jsonl': 'json',
- 'parquet': 'parquet',
- 'txt': 'text'
- }
- META_FILES_FORMAT = ('.csv', '.jsonl')
- class DatasetPathName:
- META_NAME = 'meta'
- DATA_FILES_NAME = 'data_files'
- LOCK_FILE_NAME_ANY = 'any'
- LOCK_FILE_NAME_DELIMITER = '-'
- class MetaDataFields:
- ARGS_BIG_DATA = 'big_data'
- class DistributedParallelType(object):
- """Parallel Strategies for Distributed Models"""
- DP = 'data_parallel'
- TP = 'tensor_model_parallel'
- PP = 'pipeline_model_parallel'
- class DatasetTensorflowConfig:
- BATCH_SIZE = 'batch_size'
- DEFAULT_BATCH_SIZE_VALUE = 5
- class VirgoDatasetConfig:
- default_virgo_namespace = 'default_namespace'
- default_dataset_version = '1'
- env_virgo_endpoint = 'VIRGO_ENDPOINT'
- # Columns for meta request
- meta_content = 'metaContent'
- sampling_type = 'samplingType'
- # Columns for meta content
- col_id = 'id'
- col_meta_info = 'meta_info'
- col_analysis_result = 'analysis_result'
- col_external_info = 'external_info'
- col_cache_file = 'cache_file'
- DEFAULT_MAXCOMPUTE_ENDPOINT = 'http://service-corp.odps.aliyun-inc.com/api'
- class MaxComputeEnvs:
- ACCESS_ID = 'ODPS_ACCESS_ID'
- ACCESS_SECRET_KEY = 'ODPS_ACCESS_SECRET_KEY'
- PROJECT_NAME = 'ODPS_PROJECT_NAME'
- ENDPOINT = 'ODPS_ENDPOINT'
|