metainfo.py 62 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. from modelscope.utils.constant import Fields, Tasks
  3. class Models(object):
  4. """ Names for different models.
  5. Holds the standard model name to use for identifying different model.
  6. This should be used to register models.
  7. Model name should only contain model information but not task information.
  8. """
  9. # tinynas models
  10. tinynas_detection = 'tinynas-detection'
  11. tinynas_damoyolo = 'tinynas-damoyolo'
  12. # vision models
  13. detection = 'detection'
  14. mask_scoring = 'MaskScoring'
  15. image_restoration = 'image-restoration'
  16. realtime_object_detection = 'realtime-object-detection'
  17. realtime_video_object_detection = 'realtime-video-object-detection'
  18. scrfd = 'scrfd'
  19. depe = 'depe'
  20. classification_model = 'ClassificationModel'
  21. easyrobust_model = 'EasyRobustModel'
  22. bnext = 'bnext'
  23. yolopv2 = 'yolopv2'
  24. nafnet = 'nafnet'
  25. csrnet = 'csrnet'
  26. adaint = 'adaint'
  27. deeplpfnet = 'deeplpfnet'
  28. rrdb = 'rrdb'
  29. cascade_mask_rcnn_swin = 'cascade_mask_rcnn_swin'
  30. maskdino_swin = 'maskdino_swin'
  31. gpen = 'gpen'
  32. product_retrieval_embedding = 'product-retrieval-embedding'
  33. body_2d_keypoints = 'body-2d-keypoints'
  34. body_3d_keypoints = 'body-3d-keypoints'
  35. body_3d_keypoints_hdformer = 'hdformer'
  36. crowd_counting = 'HRNetCrowdCounting'
  37. face_2d_keypoints = 'face-2d-keypoints'
  38. star_68ldk_detection = 'star-68ldk-detection'
  39. panoptic_segmentation = 'swinL-panoptic-segmentation'
  40. r50_panoptic_segmentation = 'r50-panoptic-segmentation'
  41. image_reid_person = 'passvitb'
  42. image_inpainting = 'FFTInpainting'
  43. image_paintbyexample = 'Stablediffusion-Paintbyexample'
  44. video_summarization = 'pgl-video-summarization'
  45. video_panoptic_segmentation = 'swinb-video-panoptic-segmentation'
  46. video_instance_segmentation = 'swinb-video-instance-segmentation'
  47. language_guided_video_summarization = 'clip-it-language-guided-video-summarization'
  48. swinL_semantic_segmentation = 'swinL-semantic-segmentation'
  49. vitadapter_semantic_segmentation = 'vitadapter-semantic-segmentation'
  50. text_driven_segmentation = 'text-driven-segmentation'
  51. newcrfs_depth_estimation = 'newcrfs-depth-estimation'
  52. omnidata_normal_estimation = 'omnidata-normal-estimation'
  53. panovit_layout_estimation = 'panovit-layout-estimation'
  54. unifuse_depth_estimation = 'unifuse-depth-estimation'
  55. s2net_depth_estimation = 's2net-depth-estimation'
  56. dro_resnet18_depth_estimation = 'dro-resnet18-depth-estimation'
  57. raft_dense_optical_flow_estimation = 'raft-dense-optical-flow-estimation'
  58. human_normal_estimation = 'human-normal-estimation'
  59. resnet50_bert = 'resnet50-bert'
  60. referring_video_object_segmentation = 'swinT-referring-video-object-segmentation'
  61. fer = 'fer'
  62. fairface = 'fairface'
  63. retinaface = 'retinaface'
  64. damofd = 'damofd'
  65. shop_segmentation = 'shop-segmentation'
  66. mogface = 'mogface'
  67. mtcnn = 'mtcnn'
  68. ulfd = 'ulfd'
  69. rts = 'rts'
  70. flir = 'flir'
  71. arcface = 'arcface'
  72. facemask = 'facemask'
  73. flc = 'flc'
  74. tinymog = 'tinymog'
  75. video_inpainting = 'video-inpainting'
  76. human_wholebody_keypoint = 'human-wholebody-keypoint'
  77. hand_static = 'hand-static'
  78. face_human_hand_detection = 'face-human-hand-detection'
  79. face_emotion = 'face-emotion'
  80. product_segmentation = 'product-segmentation'
  81. image_body_reshaping = 'image-body-reshaping'
  82. image_skychange = 'image-skychange'
  83. video_human_matting = 'video-human-matting'
  84. human_reconstruction = 'human-reconstruction'
  85. text_texture_generation = 'text-texture-generation'
  86. video_frame_interpolation = 'video-frame-interpolation'
  87. video_object_segmentation = 'video-object-segmentation'
  88. video_deinterlace = 'video-deinterlace'
  89. quadtree_attention_image_matching = 'quadtree-attention-image-matching'
  90. loftr_image_local_feature_matching = 'loftr-image-local-feature-matching'
  91. lightglue_image_matching = 'lightglue-image-matching'
  92. vision_middleware = 'vision-middleware'
  93. vidt = 'vidt'
  94. video_stabilization = 'video-stabilization'
  95. real_basicvsr = 'real-basicvsr'
  96. rcp_sceneflow_estimation = 'rcp-sceneflow-estimation'
  97. image_casmvs_depth_estimation = 'image-casmvs-depth-estimation'
  98. image_geomvsnet_depth_estimation = 'image-geomvsnet-depth-estimation'
  99. vop_retrieval_model = 'vop-retrieval-model'
  100. vop_retrieval_model_se = 'vop-retrieval-model-se'
  101. ddcolor = 'ddcolor'
  102. image_probing_model = 'image-probing-model'
  103. defrcn = 'defrcn'
  104. image_face_fusion = 'image-face-fusion'
  105. content_check = 'content-check'
  106. open_vocabulary_detection_vild = 'open-vocabulary-detection-vild'
  107. ecbsr = 'ecbsr'
  108. msrresnet_lite = 'msrresnet-lite'
  109. object_detection_3d = 'object_detection_3d'
  110. ddpm = 'ddpm'
  111. ocr_recognition = 'OCRRecognition'
  112. ocr_detection = 'OCRDetection'
  113. lineless_table_recognition = 'LoreModel'
  114. image_quality_assessment_mos = 'image-quality-assessment-mos'
  115. image_quality_assessment_man = 'image-quality-assessment-man'
  116. image_quality_assessment_degradation = 'image-quality-assessment-degradation'
  117. m2fp = 'm2fp'
  118. nerf_recon_acc = 'nerf-recon-acc'
  119. nerf_recon_4k = 'nerf-recon-4k'
  120. nerf_recon_vq_compression = 'nerf-recon-vq-compression'
  121. surface_recon_common = 'surface-recon-common'
  122. bts_depth_estimation = 'bts-depth-estimation'
  123. vision_efficient_tuning = 'vision-efficient-tuning'
  124. bad_image_detecting = 'bad-image-detecting'
  125. controllable_image_generation = 'controllable-image-generation'
  126. longshortnet = 'longshortnet'
  127. fastinst = 'fastinst'
  128. pedestrian_attribute_recognition = 'pedestrian-attribute-recognition'
  129. image_try_on = 'image-try-on'
  130. human_image_generation = 'human-image-generation'
  131. image_view_transform = 'image-view-transform'
  132. image_control_3d_portrait = 'image-control-3d-portrait'
  133. rife = 'rife'
  134. anydoor = 'anydoor'
  135. self_supervised_depth_completion = 'self-supervised-depth-completion'
  136. # nlp models
  137. bert = 'bert'
  138. palm = 'palm-v2'
  139. structbert = 'structbert'
  140. deberta_v2 = 'deberta_v2'
  141. veco = 'veco'
  142. translation = 'csanmt-translation'
  143. canmt = 'canmt'
  144. space_dst = 'space-dst'
  145. space_intent = 'space-intent'
  146. space_modeling = 'space-modeling'
  147. space_T_en = 'space-T-en'
  148. space_T_cn = 'space-T-cn'
  149. tcrf = 'transformer-crf'
  150. token_classification_for_ner = 'token-classification-for-ner'
  151. tcrf_wseg = 'transformer-crf-for-word-segmentation'
  152. transformer_softmax = 'transformer-softmax'
  153. lcrf = 'lstm-crf'
  154. lcrf_wseg = 'lstm-crf-for-word-segmentation'
  155. gcnncrf = 'gcnn-crf'
  156. bart = 'bart'
  157. gpt2 = 'gpt2'
  158. gpt3 = 'gpt3'
  159. gpt_moe = 'gpt-moe'
  160. gpt_neo = 'gpt-neo'
  161. plug = 'plug'
  162. bert_for_ds = 'bert-for-document-segmentation'
  163. ponet_for_ds = 'ponet-for-document-segmentation'
  164. ponet = 'ponet'
  165. polylm = 'polylm'
  166. T5 = 'T5'
  167. mglm = 'mglm'
  168. codegeex = 'codegeex'
  169. glm130b = 'glm130b'
  170. bloom = 'bloom'
  171. unite = 'unite'
  172. megatron_bert = 'megatron-bert'
  173. use = 'user-satisfaction-estimation'
  174. fid_plug = 'fid-plug'
  175. fid_T5 = 'fid-T5'
  176. lstm = 'lstm'
  177. xlm_roberta = 'xlm-roberta'
  178. transformers = 'transformers'
  179. plug_mental = 'plug-mental'
  180. doc2bot = 'doc2bot'
  181. peer = 'peer'
  182. llama = 'llama'
  183. llama2 = 'llama2'
  184. chatglm_6b = 'chatglm6b'
  185. chatglm2_6b = 'chatglm2-6b'
  186. qwen_7b = 'qwen-7b'
  187. # audio models
  188. sambert_hifigan = 'sambert-hifigan'
  189. speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k'
  190. speech_zipenhancer_ans_multiloss_16k_base = 'speech_zipenhancer_ans_multiloss_16k_base'
  191. speech_dfsmn_ans = 'speech_dfsmn_ans'
  192. speech_dfsmn_kws_char_farfield = 'speech_dfsmn_kws_char_farfield'
  193. speech_dfsmn_kws_char_farfield_iot = 'speech_dfsmn_kws_char_farfield_iot'
  194. speech_kws_fsmn_char_ctc_nearfield = 'speech_kws_fsmn_char_ctc_nearfield'
  195. speech_mossformer_separation_temporal_8k = 'speech_mossformer_separation_temporal_8k'
  196. speech_mossformer2_separation_temporal_8k = 'speech_mossformer2_separation_temporal_8k'
  197. kws_kwsbp = 'kws-kwsbp'
  198. generic_asr = 'generic-asr'
  199. wenet_asr = 'wenet-asr'
  200. generic_itn = 'generic-itn'
  201. generic_punc = 'generic-punc'
  202. generic_sv = 'generic-sv'
  203. tdnn_sv = 'tdnn-sv'
  204. ecapa_tdnn_sv = 'ecapa-tdnn-sv'
  205. campplus_sv = 'cam++-sv'
  206. eres2net_sv = 'eres2net-sv'
  207. eres2netv2_sv = 'eres2netv2-sv'
  208. resnet_sv = 'resnet-sv'
  209. res2net_sv = 'res2net-sv'
  210. eres2net_aug_sv = 'eres2net-aug-sv'
  211. scl_sd = 'scl-sd'
  212. scl_sd_xvector = 'scl-sd-xvector'
  213. campplus_lre = 'cam++-lre'
  214. eres2net_lre = 'eres2net-lre'
  215. cluster_backend = 'cluster-backend'
  216. rdino_tdnn_sv = 'rdino_ecapa-tdnn-sv'
  217. sdpn_sv = 'sdpn_ecapa-sv'
  218. generic_lm = 'generic-lm'
  219. audio_quantization = 'audio-quantization'
  220. laura_codec = 'laura-codec'
  221. funasr = 'funasr'
  222. hifissr = 'hifissr'
  223. unetvc_16k = 'unetvc_16k'
  224. # multi-modal models
  225. ofa = 'ofa'
  226. clip = 'clip-multi-modal-embedding'
  227. gemm = 'gemm-generative-multi-modal'
  228. rleg = 'rleg-generative-multi-modal'
  229. mplug = 'mplug'
  230. diffusion = 'diffusion-text-to-image-synthesis'
  231. multi_stage_diffusion = 'multi-stage-diffusion-text-to-image-synthesis'
  232. video_synthesis = 'latent-text-to-video-synthesis'
  233. team = 'team-multi-modal-similarity'
  234. video_clip = 'video-clip-multi-modal-embedding'
  235. prost = 'prost-clip-text-video-retrieval'
  236. mgeo = 'mgeo'
  237. vldoc = 'vldoc'
  238. hitea = 'hitea'
  239. soonet = 'soonet'
  240. efficient_diffusion_tuning = 'efficient-diffusion-tuning'
  241. cones2_inference = 'cones2-inference'
  242. mplug_owl = 'mplug-owl'
  243. clip_interrogator = 'clip-interrogator'
  244. stable_diffusion = 'stable-diffusion'
  245. stable_diffusion_xl = 'stable-diffusion-xl'
  246. videocomposer = 'videocomposer'
  247. text_to_360panorama_image = 'text-to-360panorama-image'
  248. image_to_video_model = 'image-to-video-model'
  249. video_to_video_model = 'video-to-video-model'
  250. # science models
  251. unifold = 'unifold'
  252. unifold_symmetry = 'unifold-symmetry'
  253. class TaskModels(object):
  254. # nlp task
  255. text_classification = 'text-classification'
  256. token_classification = 'token-classification'
  257. information_extraction = 'information-extraction'
  258. fill_mask = 'fill-mask'
  259. feature_extraction = 'feature-extraction'
  260. text_generation = 'text-generation'
  261. text_ranking = 'text-ranking'
  262. machine_reading_comprehension = 'machine-reading-comprehension'
  263. class Heads(object):
  264. # nlp heads
  265. # text cls
  266. text_classification = 'text-classification'
  267. # fill mask
  268. fill_mask = 'fill-mask'
  269. bert_mlm = 'bert-mlm'
  270. roberta_mlm = 'roberta-mlm'
  271. xlm_roberta_mlm = 'xlm-roberta-mlm'
  272. # token cls
  273. token_classification = 'token-classification'
  274. # extraction
  275. information_extraction = 'information-extraction'
  276. # text gen
  277. text_generation = 'text-generation'
  278. # text ranking
  279. text_ranking = 'text-ranking'
  280. # crf
  281. lstm_crf = 'lstm-crf'
  282. transformer_crf = 'transformer-crf'
  283. class Pipelines(object):
  284. """ Names for different pipelines.
  285. Holds the standard pipeline name to use for identifying different pipeline.
  286. This should be used to register pipelines.
  287. For pipeline which support different models and implements the common function, we
  288. should use task name for this pipeline.
  289. For pipeline which support only one model, we should use ${Model}-${Task} as its name.
  290. """
  291. pipeline_template = 'pipeline-template'
  292. # vision tasks
  293. portrait_matting = 'unet-image-matting'
  294. universal_matting = 'unet-universal-matting'
  295. image_denoise = 'nafnet-image-denoise'
  296. image_deblur = 'nafnet-image-deblur'
  297. image_editing = 'masactrl-image-editing'
  298. freeu_stable_diffusion_text2image = 'freeu-stable-diffusion-text2image'
  299. person_image_cartoon = 'unet-person-image-cartoon'
  300. ocr_detection = 'resnet18-ocr-detection'
  301. table_recognition = 'dla34-table-recognition'
  302. lineless_table_recognition = 'lore-lineless-table-recognition'
  303. license_plate_detection = 'resnet18-license-plate-detection'
  304. card_detection_correction = 'resnet18-card-detection-correction'
  305. action_recognition = 'TAdaConv_action-recognition'
  306. animal_recognition = 'resnet101-animal-recognition'
  307. general_recognition = 'resnet101-general-recognition'
  308. cmdssl_video_embedding = 'cmdssl-r2p1d_video_embedding'
  309. hicossl_video_embedding = 'hicossl-s3dg-video_embedding'
  310. body_2d_keypoints = 'hrnetv2w32_body-2d-keypoints_image'
  311. body_3d_keypoints = 'canonical_body-3d-keypoints_video'
  312. hand_2d_keypoints = 'hrnetv2w18_hand-2d-keypoints_image'
  313. human_detection = 'resnet18-human-detection'
  314. tbs_detection = 'tbs-detection'
  315. object_detection = 'vit-object-detection'
  316. abnormal_object_detection = 'abnormal-object-detection'
  317. face_2d_keypoints = 'mobilenet_face-2d-keypoints_alignment'
  318. salient_detection = 'u2net-salient-detection'
  319. salient_boudary_detection = 'res2net-salient-detection'
  320. camouflaged_detection = 'res2net-camouflaged-detection'
  321. image_demoire = 'uhdm-image-demoireing'
  322. image_classification = 'image-classification'
  323. face_detection = 'resnet-face-detection-scrfd10gkps'
  324. face_liveness_ir = 'manual-face-liveness-flir'
  325. face_liveness_rgb = 'manual-face-liveness-flir'
  326. face_liveness_xc = 'manual-face-liveness-flxc'
  327. card_detection = 'resnet-card-detection-scrfd34gkps'
  328. ulfd_face_detection = 'manual-face-detection-ulfd'
  329. tinymog_face_detection = 'manual-face-detection-tinymog'
  330. facial_expression_recognition = 'vgg19-facial-expression-recognition-fer'
  331. facial_landmark_confidence = 'manual-facial-landmark-confidence-flcm'
  332. facial_68ldk_detection = 'facial-68ldk-detection'
  333. face_attribute_recognition = 'resnet34-face-attribute-recognition-fairface'
  334. retina_face_detection = 'resnet50-face-detection-retinaface'
  335. mog_face_detection = 'resnet101-face-detection-cvpr22papermogface'
  336. mtcnn_face_detection = 'manual-face-detection-mtcnn'
  337. live_category = 'live-category'
  338. general_image_classification = 'vit-base_image-classification_ImageNet-labels'
  339. daily_image_classification = 'vit-base_image-classification_Dailylife-labels'
  340. nextvit_small_daily_image_classification = 'nextvit-small_image-classification_Dailylife-labels'
  341. convnext_base_image_classification_garbage = 'convnext-base_image-classification_garbage'
  342. bnext_small_image_classification = 'bnext-small_image-classification_ImageNet-labels'
  343. yolopv2_image_driving_percetion_bdd100k = 'yolopv2_image-driving-percetion_bdd100k'
  344. common_image_classification = 'common-image-classification'
  345. image_color_enhance = 'csrnet-image-color-enhance'
  346. adaint_image_color_enhance = 'adaint-image-color-enhance'
  347. deeplpf_image_color_enhance = 'deeplpf-image-color-enhance'
  348. virtual_try_on = 'virtual-try-on'
  349. image_colorization = 'unet-image-colorization'
  350. image_style_transfer = 'AAMS-style-transfer'
  351. image_super_resolution = 'rrdb-image-super-resolution'
  352. image_super_resolution_pasd = 'image-super-resolution-pasd'
  353. image_debanding = 'rrdb-image-debanding'
  354. face_image_generation = 'gan-face-image-generation'
  355. product_retrieval_embedding = 'resnet50-product-retrieval-embedding'
  356. realtime_video_object_detection = 'cspnet_realtime-video-object-detection_streamyolo'
  357. face_recognition = 'ir101-face-recognition-cfglint'
  358. face_recognition_ood = 'ir-face-recognition-ood-rts'
  359. face_quality_assessment = 'manual-face-quality-assessment-fqa'
  360. face_recognition_ood = 'ir-face-recognition-rts'
  361. face_recognition_onnx_ir = 'manual-face-recognition-frir'
  362. face_recognition_onnx_fm = 'manual-face-recognition-frfm'
  363. arc_face_recognition = 'ir50-face-recognition-arcface'
  364. mask_face_recognition = 'resnet-face-recognition-facemask'
  365. content_check = 'resnet50-image-classification-cc'
  366. image_instance_segmentation = 'cascade-mask-rcnn-swin-image-instance-segmentation'
  367. maskdino_instance_segmentation = 'maskdino-swin-image-instance-segmentation'
  368. image2image_translation = 'image-to-image-translation'
  369. live_category = 'live-category'
  370. video_category = 'video-category'
  371. ocr_recognition = 'convnextTiny-ocr-recognition'
  372. image_portrait_enhancement = 'gpen-image-portrait-enhancement'
  373. image_to_image_generation = 'image-to-image-generation'
  374. image_object_detection_auto = 'yolox_image-object-detection-auto'
  375. hand_detection = 'yolox-pai_hand-detection'
  376. skin_retouching = 'unet-skin-retouching'
  377. face_reconstruction = 'resnet50-face-reconstruction'
  378. head_reconstruction = 'HRN-head-reconstruction'
  379. text_to_head = 'HRN-text-to-head'
  380. tinynas_classification = 'tinynas-classification'
  381. easyrobust_classification = 'easyrobust-classification'
  382. tinynas_detection = 'tinynas-detection'
  383. crowd_counting = 'hrnet-crowd-counting'
  384. action_detection = 'ResNetC3D-action-detection'
  385. video_single_object_tracking = 'ostrack-vitb-video-single-object-tracking'
  386. video_single_object_tracking_procontext = 'procontext-vitb-video-single-object-tracking'
  387. video_multi_object_tracking = 'video-multi-object-tracking'
  388. image_panoptic_segmentation = 'image-panoptic-segmentation'
  389. video_summarization = 'googlenet_pgl_video_summarization'
  390. language_guided_video_summarization = 'clip-it-video-summarization'
  391. image_semantic_segmentation = 'image-semantic-segmentation'
  392. image_depth_estimation = 'image-depth-estimation'
  393. image_normal_estimation = 'image-normal-estimation'
  394. indoor_layout_estimation = 'indoor-layout-estimation'
  395. image_local_feature_matching = 'image-local-feature-matching'
  396. video_depth_estimation = 'video-depth-estimation'
  397. panorama_depth_estimation = 'panorama-depth-estimation'
  398. panorama_depth_estimation_s2net = 'panorama-depth-estimation-s2net'
  399. dense_optical_flow_estimation = 'dense-optical-flow-estimation'
  400. image_reid_person = 'passvitb-image-reid-person'
  401. image_inpainting = 'fft-inpainting'
  402. image_paintbyexample = 'stablediffusion-paintbyexample'
  403. image_inpainting_sdv2 = 'image-inpainting-sdv2'
  404. text_driven_segmentation = 'text-driven-segmentation'
  405. movie_scene_segmentation = 'resnet50-bert-movie-scene-segmentation'
  406. shop_segmentation = 'shop-segmentation'
  407. video_inpainting = 'video-inpainting'
  408. human_wholebody_keypoint = 'hrnetw48_human-wholebody-keypoint_image'
  409. pst_action_recognition = 'patchshift-action-recognition'
  410. hand_static = 'hand-static'
  411. face_human_hand_detection = 'face-human-hand-detection'
  412. face_emotion = 'face-emotion'
  413. product_segmentation = 'product-segmentation'
  414. image_body_reshaping = 'flow-based-body-reshaping'
  415. referring_video_object_segmentation = 'referring-video-object-segmentation'
  416. image_skychange = 'image-skychange'
  417. video_human_matting = 'video-human-matting'
  418. human_reconstruction = 'human-reconstruction'
  419. text_texture_generation = 'text-texture-generation'
  420. vision_middleware_multi_task = 'vision-middleware-multi-task'
  421. vidt = 'vidt'
  422. video_frame_interpolation = 'video-frame-interpolation'
  423. video_object_segmentation = 'video-object-segmentation'
  424. video_deinterlace = 'video-deinterlace'
  425. image_matching = 'image-matching'
  426. image_matching_fast = 'image-matching-fast'
  427. video_stabilization = 'video-stabilization'
  428. video_super_resolution = 'realbasicvsr-video-super-resolution'
  429. pointcloud_sceneflow_estimation = 'pointcloud-sceneflow-estimation'
  430. image_multi_view_depth_estimation = 'image-multi-view-depth-estimation'
  431. video_panoptic_segmentation = 'video-panoptic-segmentation'
  432. video_instance_segmentation = 'video-instance-segmentation'
  433. vop_retrieval = 'vop-video-text-retrieval'
  434. vop_retrieval_se = 'vop-video-text-retrieval-se'
  435. ddcolor_image_colorization = 'ddcolor-image-colorization'
  436. image_structured_model_probing = 'image-structured-model-probing'
  437. image_fewshot_detection = 'image-fewshot-detection'
  438. image_face_fusion = 'image-face-fusion'
  439. open_vocabulary_detection_vild = 'open-vocabulary-detection-vild'
  440. ddpm_image_semantic_segmentation = 'ddpm-image-semantic-segmentation'
  441. video_colorization = 'video-colorization'
  442. motion_generattion = 'mdm-motion-generation'
  443. mobile_image_super_resolution = 'mobile-image-super-resolution'
  444. image_human_parsing = 'm2fp-image-human-parsing'
  445. object_detection_3d_depe = 'object-detection-3d-depe'
  446. nerf_recon_acc = 'nerf-recon-acc'
  447. nerf_recon_4k = 'nerf-recon-4k'
  448. nerf_recon_vq_compression = 'nerf-recon-vq-compression'
  449. surface_recon_common = 'surface-recon-common'
  450. bad_image_detecting = 'bad-image-detecting'
  451. controllable_image_generation = 'controllable-image-generation'
  452. fast_instance_segmentation = 'fast-instance-segmentation'
  453. image_quality_assessment_mos = 'image-quality-assessment-mos'
  454. image_quality_assessment_man = 'image-quality-assessment-man'
  455. image_quality_assessment_degradation = 'image-quality-assessment-degradation'
  456. vision_efficient_tuning = 'vision-efficient-tuning'
  457. image_bts_depth_estimation = 'image-bts-depth-estimation'
  458. image_depth_estimation_marigold = 'image-depth-estimation-marigold'
  459. pedestrian_attribute_recognition = 'resnet50_pedestrian-attribute-recognition_image'
  460. text_to_360panorama_image = 'text-to-360panorama-image'
  461. image_try_on = 'image-try-on'
  462. human_image_generation = 'human-image-generation'
  463. human3d_render = 'human3d-render'
  464. human3d_animation = 'human3d-animation'
  465. image_view_transform = 'image-view-transform'
  466. image_control_3d_portrait = 'image-control-3d-portrait'
  467. rife_video_frame_interpolation = 'rife-video-frame-interpolation'
  468. anydoor = 'anydoor'
  469. image_to_3d = 'image-to-3d'
  470. self_supervised_depth_completion = 'self-supervised-depth-completion'
  471. human_normal_estimation = 'human-normal-estimation'
  472. # nlp tasks
  473. automatic_post_editing = 'automatic-post-editing'
  474. translation_quality_estimation = 'translation-quality-estimation'
  475. domain_classification = 'domain-classification'
  476. sentence_similarity = 'sentence-similarity'
  477. word_segmentation = 'word-segmentation'
  478. multilingual_word_segmentation = 'multilingual-word-segmentation'
  479. word_segmentation_thai = 'word-segmentation-thai'
  480. part_of_speech = 'part-of-speech'
  481. named_entity_recognition = 'named-entity-recognition'
  482. named_entity_recognition_thai = 'named-entity-recognition-thai'
  483. named_entity_recognition_viet = 'named-entity-recognition-viet'
  484. text_generation = 'text-generation'
  485. fid_dialogue = 'fid-dialogue'
  486. text2text_generation = 'text2text-generation'
  487. sentiment_analysis = 'sentiment-analysis'
  488. sentiment_classification = 'sentiment-classification'
  489. text_classification = 'text-classification'
  490. fill_mask = 'fill-mask'
  491. fill_mask_ponet = 'fill-mask-ponet'
  492. csanmt_translation = 'csanmt-translation'
  493. canmt_translation = 'canmt-translation'
  494. interactive_translation = 'interactive-translation'
  495. nli = 'nli'
  496. dialog_intent_prediction = 'dialog-intent-prediction'
  497. dialog_modeling = 'dialog-modeling'
  498. dialog_state_tracking = 'dialog-state-tracking'
  499. zero_shot_classification = 'zero-shot-classification'
  500. text_error_correction = 'text-error-correction'
  501. word_alignment = 'word-alignment'
  502. plug_generation = 'plug-generation'
  503. gpt3_generation = 'gpt3-generation'
  504. polylm_text_generation = 'polylm-text-generation'
  505. gpt_moe_generation = 'gpt-moe-generation'
  506. faq_question_answering = 'faq-question-answering'
  507. conversational_text_to_sql = 'conversational-text-to-sql'
  508. table_question_answering_pipeline = 'table-question-answering-pipeline'
  509. sentence_embedding = 'sentence-embedding'
  510. text_ranking = 'text-ranking'
  511. mgeo_ranking = 'mgeo-ranking'
  512. relation_extraction = 'relation-extraction'
  513. document_segmentation = 'document-segmentation'
  514. extractive_summarization = 'extractive-summarization'
  515. feature_extraction = 'feature-extraction'
  516. mglm_text_summarization = 'mglm-text-summarization'
  517. codegeex_code_translation = 'codegeex-code-translation'
  518. codegeex_code_generation = 'codegeex-code-generation'
  519. glm130b_text_generation = 'glm130b-text-generation'
  520. translation_en_to_de = 'translation_en_to_de' # keep it underscore
  521. translation_en_to_ro = 'translation_en_to_ro' # keep it underscore
  522. translation_en_to_fr = 'translation_en_to_fr' # keep it underscore
  523. token_classification = 'token-classification'
  524. translation_evaluation = 'translation-evaluation'
  525. user_satisfaction_estimation = 'user-satisfaction-estimation'
  526. siamese_uie = 'siamese-uie'
  527. document_grounded_dialog_retrieval = 'document-grounded-dialog-retrieval'
  528. document_grounded_dialog_rerank = 'document-grounded-dialog-rerank'
  529. document_grounded_dialog_generate = 'document-grounded-dialog-generate'
  530. language_identification = 'language_identification'
  531. machine_reading_comprehension_for_ner = 'machine-reading-comprehension-for-ner'
  532. llm = 'llm'
  533. # audio tasks
  534. sambert_hifigan_tts = 'sambert-hifigan-tts'
  535. speech_dfsmn_aec_psm_16k = 'speech-dfsmn-aec-psm-16k'
  536. speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k'
  537. speech_zipenhancer_ans_multiloss_16k_base = 'speech_zipenhancer_ans_multiloss_16k_base'
  538. speech_dfsmn_ans_psm_48k_causal = 'speech_dfsmn_ans_psm_48k_causal'
  539. speech_dfsmn_kws_char_farfield = 'speech_dfsmn_kws_char_farfield'
  540. speech_separation = 'speech-separation'
  541. kws_kwsbp = 'kws-kwsbp'
  542. asr_wenet_inference = 'asr-wenet-inference'
  543. itn_inference = 'itn-inference'
  544. speaker_diarization_inference = 'speaker-diarization-inference'
  545. vad_inference = 'vad-inference'
  546. funasr_speech_separation = 'funasr-speech-separation'
  547. speaker_verification = 'speaker-verification'
  548. speaker_verification_tdnn = 'speaker-verification-tdnn'
  549. speaker_verification_rdino = 'speaker-verification-rdino'
  550. speaker_verification_sdpn = 'speaker-verification-sdpn'
  551. speaker_verification_eres2net = 'speaker-verification-eres2net'
  552. speaker_verification_eres2netv2 = 'speaker-verification-eres2netv2'
  553. speaker_verification_resnet = 'speaker-verification-resnet'
  554. speaker_verification_res2net = 'speaker-verification-res2net'
  555. speech_language_recognition = 'speech-language-recognition'
  556. speech_language_recognition_eres2net = 'speech-language-recognition-eres2net'
  557. speaker_change_locating = 'speaker-change-locating'
  558. speaker_diarization_dialogue_detection = 'speaker-diarization-dialogue-detection'
  559. speaker_diarization_semantic_speaker_turn_detection = 'speaker-diarization-semantic-speaker-turn-detection'
  560. segmentation_clustering = 'segmentation-clustering'
  561. lm_inference = 'language-score-prediction'
  562. speech_timestamp_inference = 'speech-timestamp-inference'
  563. audio_quantization = 'audio-quantization'
  564. audio_quantization_inference = 'audio-quantization-inference'
  565. laura_codec_tts_inference = 'laura-codec-tts-inference'
  566. speech_super_resolution_inference = 'speech-super-resolution-inference'
  567. voice_conversion = 'voice-conversion'
  568. # multi-modal tasks
  569. image_captioning = 'image-captioning'
  570. multi_modal_embedding = 'multi-modal-embedding'
  571. generative_multi_modal_embedding = 'generative-multi-modal-embedding'
  572. visual_question_answering = 'visual-question-answering'
  573. visual_grounding = 'visual-grounding'
  574. visual_entailment = 'visual-entailment'
  575. multi_modal_similarity = 'multi-modal-similarity'
  576. text_to_image_synthesis = 'text-to-image-synthesis'
  577. video_multi_modal_embedding = 'video-multi-modal-embedding'
  578. prost_text_video_retrieval = 'prost-text-video-retrieval'
  579. videocomposer = 'videocomposer'
  580. image_text_retrieval = 'image-text-retrieval'
  581. ofa_ocr_recognition = 'ofa-ocr-recognition'
  582. ofa_asr = 'ofa-asr'
  583. ofa_sudoku = 'ofa-sudoku'
  584. ofa_text2sql = 'ofa-text2sql'
  585. video_captioning = 'video-captioning'
  586. video_question_answering = 'video-question-answering'
  587. diffusers_stable_diffusion = 'diffusers-stable-diffusion'
  588. disco_guided_diffusion = 'disco_guided_diffusion'
  589. document_vl_embedding = 'document-vl-embedding'
  590. chinese_stable_diffusion = 'chinese-stable-diffusion'
  591. cones2_inference = 'cones2-inference'
  592. text_to_video_synthesis = 'latent-text-to-video-synthesis' # latent-text-to-video-synthesis
  593. gridvlp_multi_modal_classification = 'gridvlp-multi-modal-classification'
  594. gridvlp_multi_modal_embedding = 'gridvlp-multi-modal-embedding'
  595. soonet_video_temporal_grounding = 'soonet-video-temporal-grounding'
  596. efficient_diffusion_tuning = 'efficient-diffusion-tuning'
  597. multimodal_dialogue = 'multimodal-dialogue'
  598. llama2_text_generation_pipeline = 'llama2-text-generation-pipeline'
  599. llama2_text_generation_chat_pipeline = 'llama2-text-generation-chat-pipeline'
  600. image_to_video_task_pipeline = 'image-to-video-task-pipeline'
  601. video_to_video_pipeline = 'video-to-video-pipeline'
  602. # science tasks
  603. protein_structure = 'unifold-protein-structure'
  604. # funasr task
  605. funasr_pipeline = 'funasr-pipeline'
  606. DEFAULT_MODEL_FOR_PIPELINE = {
  607. # TaskName: (pipeline_module_name, model_repo)
  608. Tasks.sentence_embedding:
  609. (Pipelines.sentence_embedding,
  610. 'damo/nlp_corom_sentence-embedding_english-base'),
  611. Tasks.text_ranking: (Pipelines.mgeo_ranking,
  612. 'damo/mgeo_address_ranking_chinese_base'),
  613. Tasks.text_ranking: (Pipelines.text_ranking,
  614. 'damo/nlp_corom_passage-ranking_english-base'),
  615. Tasks.word_segmentation:
  616. (Pipelines.word_segmentation,
  617. 'damo/nlp_structbert_word-segmentation_chinese-base'),
  618. Tasks.part_of_speech: (Pipelines.part_of_speech,
  619. 'damo/nlp_structbert_part-of-speech_chinese-base'),
  620. Tasks.token_classification:
  621. (Pipelines.part_of_speech,
  622. 'damo/nlp_structbert_part-of-speech_chinese-base'),
  623. Tasks.named_entity_recognition:
  624. (Pipelines.named_entity_recognition,
  625. 'damo/nlp_raner_named-entity-recognition_chinese-base-news'),
  626. Tasks.relation_extraction:
  627. (Pipelines.relation_extraction,
  628. 'damo/nlp_bert_relation-extraction_chinese-base'),
  629. Tasks.information_extraction:
  630. (Pipelines.relation_extraction,
  631. 'damo/nlp_bert_relation-extraction_chinese-base'),
  632. Tasks.sentence_similarity:
  633. (Pipelines.sentence_similarity,
  634. 'damo/nlp_structbert_sentence-similarity_chinese-base'),
  635. Tasks.competency_aware_translation:
  636. (Pipelines.canmt_translation, 'damo/nlp_canmt_translation_zh2en_large'),
  637. Tasks.translation: (Pipelines.csanmt_translation,
  638. 'damo/nlp_csanmt_translation_zh2en'),
  639. Tasks.nli: (Pipelines.nli, 'damo/nlp_structbert_nli_chinese-base'),
  640. Tasks.sentiment_classification:
  641. (Pipelines.sentiment_classification,
  642. 'damo/nlp_structbert_sentiment-classification_chinese-base'
  643. ), # TODO: revise back after passing the pr
  644. Tasks.portrait_matting: (Pipelines.portrait_matting,
  645. 'damo/cv_unet_image-matting'),
  646. Tasks.universal_matting: (Pipelines.universal_matting,
  647. 'damo/cv_unet_universal-matting'),
  648. Tasks.human_detection: (Pipelines.human_detection,
  649. 'damo/cv_resnet18_human-detection'),
  650. Tasks.image_object_detection: (Pipelines.object_detection,
  651. 'damo/cv_vit_object-detection_coco'),
  652. Tasks.image_denoising: (Pipelines.image_denoise,
  653. 'damo/cv_nafnet_image-denoise_sidd'),
  654. Tasks.image_deblurring: (Pipelines.image_deblur,
  655. 'damo/cv_nafnet_image-deblur_gopro'),
  656. Tasks.image_editing: (Pipelines.image_editing,
  657. 'damo/cv_masactrl_image-editing'),
  658. Tasks.video_stabilization: (Pipelines.video_stabilization,
  659. 'damo/cv_dut-raft_video-stabilization_base'),
  660. Tasks.video_super_resolution:
  661. (Pipelines.video_super_resolution,
  662. 'damo/cv_realbasicvsr_video-super-resolution_videolq'),
  663. Tasks.text_classification:
  664. (Pipelines.sentiment_classification,
  665. 'damo/nlp_structbert_sentiment-classification_chinese-base'),
  666. Tasks.text_generation: (Pipelines.text_generation,
  667. 'damo/nlp_palm2.0_text-generation_chinese-base'),
  668. Tasks.zero_shot_classification:
  669. (Pipelines.zero_shot_classification,
  670. 'damo/nlp_structbert_zero-shot-classification_chinese-base'),
  671. Tasks.task_oriented_conversation: (Pipelines.dialog_modeling,
  672. 'damo/nlp_space_dialog-modeling'),
  673. Tasks.dialog_state_tracking: (Pipelines.dialog_state_tracking,
  674. 'damo/nlp_space_dialog-state-tracking'),
  675. Tasks.table_question_answering:
  676. (Pipelines.table_question_answering_pipeline,
  677. 'damo/nlp-convai-text2sql-pretrain-cn'),
  678. Tasks.document_grounded_dialog_generate:
  679. (Pipelines.document_grounded_dialog_generate,
  680. 'DAMO_ConvAI/nlp_convai_generation_pretrain'),
  681. Tasks.document_grounded_dialog_rerank:
  682. (Pipelines.document_grounded_dialog_rerank,
  683. 'damo/nlp_convai_rerank_pretrain'),
  684. Tasks.document_grounded_dialog_retrieval:
  685. (Pipelines.document_grounded_dialog_retrieval,
  686. 'DAMO_ConvAI/nlp_convai_retrieval_pretrain'),
  687. Tasks.text_error_correction:
  688. (Pipelines.text_error_correction,
  689. 'damo/nlp_bart_text-error-correction_chinese'),
  690. Tasks.image_captioning: (Pipelines.image_captioning,
  691. 'damo/ofa_image-caption_coco_large_en'),
  692. Tasks.video_captioning:
  693. (Pipelines.video_captioning,
  694. 'damo/multi-modal_hitea_video-captioning_base_en'),
  695. Tasks.image_portrait_stylization:
  696. (Pipelines.person_image_cartoon,
  697. 'damo/cv_unet_person-image-cartoon_compound-models'),
  698. Tasks.ocr_detection: (Pipelines.ocr_detection,
  699. 'damo/cv_resnet18_ocr-detection-line-level_damo'),
  700. Tasks.table_recognition:
  701. (Pipelines.table_recognition,
  702. 'damo/cv_dla34_table-structure-recognition_cycle-centernet'),
  703. Tasks.lineless_table_recognition:
  704. (Pipelines.lineless_table_recognition,
  705. 'damo/cv_resnet-transformer_table-structure-recognition_lore'),
  706. Tasks.document_vl_embedding:
  707. (Pipelines.document_vl_embedding,
  708. 'damo/multi-modal_convnext-roberta-base_vldoc-embedding'),
  709. Tasks.license_plate_detection:
  710. (Pipelines.license_plate_detection,
  711. 'damo/cv_resnet18_license-plate-detection_damo'),
  712. Tasks.card_detection_correction: (Pipelines.card_detection_correction,
  713. 'damo/cv_resnet18_card_correction'),
  714. Tasks.fill_mask: (Pipelines.fill_mask, 'damo/nlp_veco_fill-mask-large'),
  715. Tasks.feature_extraction: (Pipelines.feature_extraction,
  716. 'damo/pert_feature-extraction_base-test'),
  717. Tasks.action_recognition: (Pipelines.action_recognition,
  718. 'damo/cv_TAdaConv_action-recognition'),
  719. Tasks.action_detection: (Pipelines.action_detection,
  720. 'damo/cv_ResNetC3D_action-detection_detection2d'),
  721. Tasks.live_category: (Pipelines.live_category,
  722. 'damo/cv_resnet50_live-category'),
  723. Tasks.video_category: (Pipelines.video_category,
  724. 'damo/cv_resnet50_video-category'),
  725. Tasks.multi_modal_embedding: (Pipelines.multi_modal_embedding,
  726. 'damo/multi-modal_clip-vit-base-patch16_zh'),
  727. Tasks.generative_multi_modal_embedding:
  728. (Pipelines.generative_multi_modal_embedding,
  729. 'damo/multi-modal_gemm-vit-large-patch14_generative-multi-modal-embedding'
  730. ),
  731. Tasks.multi_modal_similarity:
  732. (Pipelines.multi_modal_similarity,
  733. 'damo/multi-modal_team-vit-large-patch14_multi-modal-similarity'),
  734. Tasks.visual_question_answering:
  735. (Pipelines.visual_question_answering,
  736. 'damo/mplug_visual-question-answering_coco_large_en'),
  737. Tasks.video_question_answering:
  738. (Pipelines.video_question_answering,
  739. 'damo/multi-modal_hitea_video-question-answering_base_en'),
  740. Tasks.video_embedding: (Pipelines.cmdssl_video_embedding,
  741. 'damo/cv_r2p1d_video_embedding'),
  742. Tasks.text_to_image_synthesis:
  743. (Pipelines.text_to_image_synthesis,
  744. 'damo/cv_diffusion_text-to-image-synthesis_tiny'),
  745. Tasks.text_to_video_synthesis: (Pipelines.text_to_video_synthesis,
  746. 'damo/text-to-video-synthesis'),
  747. Tasks.body_2d_keypoints: (Pipelines.body_2d_keypoints,
  748. 'damo/cv_hrnetv2w32_body-2d-keypoints_image'),
  749. Tasks.body_3d_keypoints: (Pipelines.body_3d_keypoints,
  750. 'damo/cv_canonical_body-3d-keypoints_video'),
  751. Tasks.hand_2d_keypoints:
  752. (Pipelines.hand_2d_keypoints,
  753. 'damo/cv_hrnetw18_hand-pose-keypoints_coco-wholebody'),
  754. Tasks.card_detection: (Pipelines.card_detection,
  755. 'damo/cv_resnet_carddetection_scrfd34gkps'),
  756. Tasks.content_check: (Pipelines.content_check,
  757. 'damo/cv_resnet50_content-check_cc'),
  758. Tasks.face_detection:
  759. (Pipelines.mog_face_detection,
  760. 'damo/cv_resnet101_face-detection_cvpr22papermogface'),
  761. Tasks.face_liveness: (Pipelines.face_liveness_ir,
  762. 'damo/cv_manual_face-liveness_flir'),
  763. Tasks.face_recognition: (Pipelines.face_recognition,
  764. 'damo/cv_ir101_facerecognition_cfglint'),
  765. Tasks.facial_expression_recognition:
  766. (Pipelines.facial_expression_recognition,
  767. 'damo/cv_vgg19_facial-expression-recognition_fer'),
  768. Tasks.face_attribute_recognition:
  769. (Pipelines.face_attribute_recognition,
  770. 'damo/cv_resnet34_face-attribute-recognition_fairface'),
  771. Tasks.face_2d_keypoints: (Pipelines.face_2d_keypoints,
  772. 'damo/cv_mobilenet_face-2d-keypoints_alignment'),
  773. Tasks.face_quality_assessment:
  774. (Pipelines.face_quality_assessment,
  775. 'damo/cv_manual_face-quality-assessment_fqa'),
  776. Tasks.video_multi_modal_embedding:
  777. (Pipelines.video_multi_modal_embedding,
  778. 'damo/multi_modal_clip_vtretrival_msrvtt_53'),
  779. Tasks.text_video_retrieval: (Pipelines.prost_text_video_retrieval,
  780. 'damo/multi_modal_clip_vtretrieval_prost'),
  781. Tasks.image_color_enhancement:
  782. (Pipelines.image_color_enhance,
  783. 'damo/cv_csrnet_image-color-enhance-models'),
  784. Tasks.virtual_try_on: (Pipelines.virtual_try_on,
  785. 'damo/cv_daflow_virtual-try-on_base'),
  786. Tasks.image_colorization: (Pipelines.ddcolor_image_colorization,
  787. 'damo/cv_ddcolor_image-colorization'),
  788. Tasks.video_colorization: (Pipelines.video_colorization,
  789. 'damo/cv_unet_video-colorization'),
  790. Tasks.image_segmentation:
  791. (Pipelines.image_instance_segmentation,
  792. 'damo/cv_swin-b_image-instance-segmentation_coco'),
  793. Tasks.image_driving_perception:
  794. (Pipelines.yolopv2_image_driving_percetion_bdd100k,
  795. 'damo/cv_yolopv2_image-driving-perception_bdd100k'),
  796. Tasks.image_depth_estimation:
  797. (Pipelines.image_depth_estimation,
  798. 'damo/cv_newcrfs_image-depth-estimation_indoor'),
  799. Tasks.image_normal_estimation:
  800. (Pipelines.image_normal_estimation,
  801. 'Damo_XR_Lab/cv_omnidata_image-normal-estimation_normal'),
  802. Tasks.human_normal_estimation:
  803. (Pipelines.human_normal_estimation,
  804. 'Damo_XR_Lab/cv_human_monocular-normal-estimation'),
  805. Tasks.indoor_layout_estimation:
  806. (Pipelines.indoor_layout_estimation,
  807. 'damo/cv_panovit_indoor-layout-estimation'),
  808. Tasks.video_depth_estimation:
  809. (Pipelines.video_depth_estimation,
  810. 'damo/cv_dro-resnet18_video-depth-estimation_indoor'),
  811. Tasks.panorama_depth_estimation:
  812. (Pipelines.panorama_depth_estimation,
  813. 'damo/cv_unifuse_panorama-depth-estimation'),
  814. Tasks.dense_optical_flow_estimation:
  815. (Pipelines.dense_optical_flow_estimation,
  816. 'Damo_XR_Lab/cv_raft_dense-optical-flow_things'),
  817. Tasks.image_local_feature_matching:
  818. (Pipelines.image_local_feature_matching,
  819. 'Damo_XR_Lab/cv_resnet-transformer_local-feature-matching_outdoor-data'),
  820. Tasks.image_style_transfer: (Pipelines.image_style_transfer,
  821. 'damo/cv_aams_style-transfer_damo'),
  822. Tasks.face_image_generation: (Pipelines.face_image_generation,
  823. 'damo/cv_gan_face-image-generation'),
  824. Tasks.image_super_resolution: (Pipelines.image_super_resolution,
  825. 'damo/cv_rrdb_image-super-resolution'),
  826. Tasks.image_debanding: (Pipelines.image_debanding,
  827. 'damo/cv_rrdb_image-debanding'),
  828. Tasks.image_portrait_enhancement:
  829. (Pipelines.image_portrait_enhancement,
  830. 'damo/cv_gpen_image-portrait-enhancement'),
  831. Tasks.product_retrieval_embedding:
  832. (Pipelines.product_retrieval_embedding,
  833. 'damo/cv_resnet50_product-bag-embedding-models'),
  834. Tasks.image_to_image_generation:
  835. (Pipelines.image_to_image_generation,
  836. 'damo/cv_latent_diffusion_image2image_generate'),
  837. Tasks.image_classification: (
  838. Pipelines.daily_image_classification,
  839. 'damo/cv_vit-base_image-classification_Dailylife-labels'),
  840. Tasks.image_object_detection: (
  841. Pipelines.image_object_detection_auto,
  842. 'damo/cv_yolox_image-object-detection-auto'),
  843. Tasks.ocr_recognition: (
  844. Pipelines.ocr_recognition,
  845. 'damo/cv_convnextTiny_ocr-recognition-general_damo'),
  846. Tasks.skin_retouching: (Pipelines.skin_retouching,
  847. 'damo/cv_unet_skin-retouching'),
  848. Tasks.faq_question_answering: (
  849. Pipelines.faq_question_answering,
  850. 'damo/nlp_structbert_faq-question-answering_chinese-base'),
  851. Tasks.crowd_counting: (Pipelines.crowd_counting,
  852. 'damo/cv_hrnet_crowd-counting_dcanet'),
  853. Tasks.video_single_object_tracking: (
  854. Pipelines.video_single_object_tracking,
  855. 'damo/cv_vitb_video-single-object-tracking_ostrack'),
  856. Tasks.image_reid_person: (Pipelines.image_reid_person,
  857. 'damo/cv_passvitb_image-reid-person_market'),
  858. Tasks.text_driven_segmentation: (
  859. Pipelines.text_driven_segmentation,
  860. 'damo/cv_vitl16_segmentation_text-driven-seg'),
  861. Tasks.movie_scene_segmentation: (
  862. Pipelines.movie_scene_segmentation,
  863. 'damo/cv_resnet50-bert_video-scene-segmentation_movienet'),
  864. Tasks.shop_segmentation: (Pipelines.shop_segmentation,
  865. 'damo/cv_vitb16_segmentation_shop-seg'),
  866. Tasks.image_inpainting: (Pipelines.image_inpainting,
  867. 'damo/cv_fft_inpainting_lama'),
  868. Tasks.image_paintbyexample: (Pipelines.image_paintbyexample,
  869. 'damo/cv_stable-diffusion_paint-by-example'),
  870. Tasks.controllable_image_generation:
  871. (Pipelines.controllable_image_generation,
  872. 'dienstag/cv_controlnet_controllable-image-generation_nine-annotators'),
  873. Tasks.video_inpainting: (Pipelines.video_inpainting,
  874. 'damo/cv_video-inpainting'),
  875. Tasks.video_human_matting: (Pipelines.video_human_matting,
  876. 'damo/cv_effnetv2_video-human-matting'),
  877. Tasks.human_reconstruction: (Pipelines.human_reconstruction,
  878. 'damo/cv_hrnet_image-human-reconstruction'),
  879. Tasks.text_texture_generation: (
  880. Pipelines.text_texture_generation,
  881. 'damo/cv_diffuser_text-texture-generation'),
  882. Tasks.video_frame_interpolation: (
  883. Pipelines.video_frame_interpolation,
  884. 'damo/cv_raft_video-frame-interpolation'),
  885. Tasks.video_deinterlace: (Pipelines.video_deinterlace,
  886. 'damo/cv_unet_video-deinterlace'),
  887. Tasks.human_wholebody_keypoint: (
  888. Pipelines.human_wholebody_keypoint,
  889. 'damo/cv_hrnetw48_human-wholebody-keypoint_image'),
  890. Tasks.hand_static: (Pipelines.hand_static,
  891. 'damo/cv_mobileface_hand-static'),
  892. Tasks.face_human_hand_detection: (
  893. Pipelines.face_human_hand_detection,
  894. 'damo/cv_nanodet_face-human-hand-detection'),
  895. Tasks.face_emotion: (Pipelines.face_emotion, 'damo/cv_face-emotion'),
  896. Tasks.product_segmentation: (Pipelines.product_segmentation,
  897. 'damo/cv_F3Net_product-segmentation'),
  898. Tasks.referring_video_object_segmentation: (
  899. Pipelines.referring_video_object_segmentation,
  900. 'damo/cv_swin-t_referring_video-object-segmentation'),
  901. Tasks.video_summarization: (Pipelines.video_summarization,
  902. 'damo/cv_googlenet_pgl-video-summarization'),
  903. Tasks.image_skychange: (Pipelines.image_skychange,
  904. 'damo/cv_hrnetocr_skychange'),
  905. Tasks.translation_evaluation: (
  906. Pipelines.translation_evaluation,
  907. 'damo/nlp_unite_mup_translation_evaluation_multilingual_large'),
  908. Tasks.video_object_segmentation: (
  909. Pipelines.video_object_segmentation,
  910. 'damo/cv_rdevos_video-object-segmentation'),
  911. Tasks.video_multi_object_tracking: (
  912. Pipelines.video_multi_object_tracking,
  913. 'damo/cv_yolov5_video-multi-object-tracking_fairmot'),
  914. Tasks.image_multi_view_depth_estimation: (
  915. Pipelines.image_multi_view_depth_estimation,
  916. 'damo/cv_casmvs_multi-view-depth-estimation_general'),
  917. Tasks.image_fewshot_detection: (
  918. Pipelines.image_fewshot_detection,
  919. 'damo/cv_resnet101_detection_fewshot-defrcn'),
  920. Tasks.image_body_reshaping: (Pipelines.image_body_reshaping,
  921. 'damo/cv_flow-based-body-reshaping_damo'),
  922. Tasks.image_face_fusion: (Pipelines.image_face_fusion,
  923. 'damo/cv_unet-image-face-fusion_damo'),
  924. Tasks.image_matching: (
  925. Pipelines.image_matching,
  926. 'damo/cv_quadtree_attention_image-matching_outdoor'),
  927. Tasks.image_quality_assessment_mos: (
  928. Pipelines.image_quality_assessment_mos,
  929. 'damo/cv_resnet_image-quality-assessment-mos_youtubeUGC'),
  930. Tasks.image_quality_assessment_degradation: (
  931. Pipelines.image_quality_assessment_degradation,
  932. 'damo/cv_resnet50_image-quality-assessment_degradation'),
  933. Tasks.vision_efficient_tuning: (
  934. Pipelines.vision_efficient_tuning,
  935. 'damo/cv_vitb16_classification_vision-efficient-tuning-adapter'),
  936. Tasks.object_detection_3d: (Pipelines.object_detection_3d_depe,
  937. 'damo/cv_object-detection-3d_depe'),
  938. Tasks.bad_image_detecting: (Pipelines.bad_image_detecting,
  939. 'damo/cv_mobilenet-v2_bad-image-detecting'),
  940. Tasks.nerf_recon_acc: (Pipelines.nerf_recon_acc,
  941. 'damo/cv_nerf-3d-reconstruction-accelerate_damo'),
  942. Tasks.nerf_recon_4k: (Pipelines.nerf_recon_4k,
  943. 'damo/cv_nerf-3d-reconstruction-4k-nerf_damo'),
  944. Tasks.nerf_recon_vq_compression: (
  945. Pipelines.nerf_recon_vq_compression,
  946. 'damo/cv_nerf-3d-reconstruction-vq-compression_damo'),
  947. Tasks.surface_recon_common: (Pipelines.surface_recon_common,
  948. 'damo/cv_surface-reconstruction-common'),
  949. Tasks.siamese_uie: (Pipelines.siamese_uie,
  950. 'damo/nlp_structbert_siamese-uie_chinese-base'),
  951. Tasks.pedestrian_attribute_recognition: (
  952. Pipelines.pedestrian_attribute_recognition,
  953. 'damo/cv_resnet50_pedestrian-attribute-recognition_image'),
  954. Tasks.text_to_360panorama_image: (
  955. Pipelines.text_to_360panorama_image,
  956. 'damo/cv_diffusion_text-to-360panorama-image_generation'),
  957. Tasks.image_try_on: (Pipelines.image_try_on,
  958. 'damo/cv_SAL-VTON_virtual-try-on'),
  959. Tasks.human_image_generation: (Pipelines.human_image_generation,
  960. 'damo/cv_FreqHPT_human-image-generation'),
  961. Tasks.human3d_render: (Pipelines.human3d_render,
  962. 'damo/cv_3d-human-synthesis-library'),
  963. Tasks.human3d_animation: (Pipelines.human3d_animation,
  964. 'damo/cv_3d-human-animation'),
  965. Tasks.image_view_transform: (Pipelines.image_view_transform,
  966. 'damo/cv_image-view-transform'),
  967. Tasks.image_control_3d_portrait: (
  968. Pipelines.image_control_3d_portrait,
  969. 'damo/cv_vit_image-control-3d-portrait-synthesis'),
  970. Tasks.self_supervised_depth_completion: (
  971. Pipelines.self_supervised_depth_completion,
  972. 'damo/self-supervised-depth-completion')
  973. }
  974. class CVTrainers(object):
  975. # cv trainers
  976. image_instance_segmentation = 'image-instance-segmentation'
  977. image_portrait_enhancement = 'image-portrait-enhancement'
  978. video_summarization = 'video-summarization'
  979. movie_scene_segmentation = 'movie-scene-segmentation'
  980. face_detection_scrfd = 'face-detection-scrfd'
  981. card_detection_scrfd = 'card-detection-scrfd'
  982. image_inpainting = 'image-inpainting'
  983. referring_video_object_segmentation = 'referring-video-object-segmentation'
  984. image_classification_team = 'image-classification-team'
  985. image_classification = 'image-classification'
  986. image_fewshot_detection = 'image-fewshot-detection'
  987. ocr_recognition = 'ocr-recognition'
  988. ocr_detection_db = 'ocr-detection-db'
  989. nerf_recon_acc = 'nerf-recon-acc'
  990. nerf_recon_4k = 'nerf-recon-4k'
  991. action_detection = 'action-detection'
  992. vision_efficient_tuning = 'vision-efficient-tuning'
  993. self_supervised_depth_completion = 'self-supervised-depth-completion'
  994. class NLPTrainers(object):
  995. # nlp trainers
  996. bert_sentiment_analysis = 'bert-sentiment-analysis'
  997. dialog_modeling_trainer = 'dialog-modeling-trainer'
  998. dialog_intent_trainer = 'dialog-intent-trainer'
  999. nlp_base_trainer = 'nlp-base-trainer'
  1000. nlp_veco_trainer = 'nlp-veco-trainer'
  1001. nlp_text_ranking_trainer = 'nlp-text-ranking-trainer'
  1002. nlp_sentence_embedding_trainer = 'nlp-sentence-embedding-trainer'
  1003. text_generation_trainer = 'text-generation-trainer'
  1004. nlp_plug_trainer = 'nlp-plug-trainer'
  1005. gpt3_trainer = 'nlp-gpt3-trainer'
  1006. faq_question_answering_trainer = 'faq-question-answering-trainer'
  1007. gpt_moe_trainer = 'nlp-gpt-moe-trainer'
  1008. table_question_answering_trainer = 'table-question-answering-trainer'
  1009. document_grounded_dialog_generate_trainer = 'document-grounded-dialog-generate-trainer'
  1010. document_grounded_dialog_rerank_trainer = 'document-grounded-dialog-rerank-trainer'
  1011. document_grounded_dialog_retrieval_trainer = 'document-grounded-dialog-retrieval-trainer'
  1012. siamese_uie_trainer = 'siamese-uie-trainer'
  1013. translation_evaluation_trainer = 'translation-evaluation-trainer'
  1014. class MultiModalTrainers(object):
  1015. clip_multi_modal_embedding = 'clip-multi-modal-embedding'
  1016. ofa = 'ofa'
  1017. mplug = 'mplug'
  1018. mgeo_ranking_trainer = 'mgeo-ranking-trainer'
  1019. efficient_diffusion_tuning = 'efficient-diffusion-tuning'
  1020. stable_diffusion = 'stable-diffusion'
  1021. lora_diffusion = 'lora-diffusion'
  1022. lora_diffusion_xl = 'lora-diffusion-xl'
  1023. dreambooth_diffusion = 'dreambooth-diffusion'
  1024. custom_diffusion = 'custom-diffusion'
  1025. cones2_inference = 'cones2-inference'
  1026. class AudioTrainers(object):
  1027. speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k'
  1028. speech_dfsmn_kws_char_farfield = 'speech_dfsmn_kws_char_farfield'
  1029. speech_kws_fsmn_char_ctc_nearfield = 'speech_kws_fsmn_char_ctc_nearfield'
  1030. speech_kantts_trainer = 'speech-kantts-trainer'
  1031. speech_asr_trainer = 'speech-asr-trainer'
  1032. speech_separation = 'speech-separation'
  1033. class Trainers(CVTrainers, NLPTrainers, MultiModalTrainers, AudioTrainers):
  1034. """ Names for different trainer.
  1035. Holds the standard trainer name to use for identifying different trainer.
  1036. This should be used to register trainers.
  1037. For a general Trainer, you can use EpochBasedTrainer.
  1038. For a model specific Trainer, you can use ${ModelName}-${Task}-trainer.
  1039. """
  1040. default = 'trainer'
  1041. tinynas_damoyolo = 'tinynas-damoyolo'
  1042. @staticmethod
  1043. def get_trainer_domain(attribute_or_value):
  1044. if attribute_or_value in vars(
  1045. CVTrainers) or attribute_or_value in vars(CVTrainers).values():
  1046. return Fields.cv
  1047. elif attribute_or_value in vars(
  1048. NLPTrainers) or attribute_or_value in vars(
  1049. NLPTrainers).values():
  1050. return Fields.nlp
  1051. elif attribute_or_value in vars(
  1052. AudioTrainers) or attribute_or_value in vars(
  1053. AudioTrainers).values():
  1054. return Fields.audio
  1055. elif attribute_or_value in vars(
  1056. MultiModalTrainers) or attribute_or_value in vars(
  1057. MultiModalTrainers).values():
  1058. return Fields.multi_modal
  1059. elif attribute_or_value == Trainers.default:
  1060. return Trainers.default
  1061. else:
  1062. return 'unknown'
  1063. class Preprocessors(object):
  1064. """ Names for different preprocessor.
  1065. Holds the standard preprocessor name to use for identifying different preprocessor.
  1066. This should be used to register preprocessors.
  1067. For a general preprocessor, just use the function name as preprocessor name such as
  1068. resize-image, random-crop
  1069. For a model-specific preprocessor, use ${modelname}-${function}
  1070. """
  1071. # cv preprocessor
  1072. load_image = 'load-image'
  1073. image_denoise_preprocessor = 'image-denoise-preprocessor'
  1074. image_deblur_preprocessor = 'image-deblur-preprocessor'
  1075. object_detection_tinynas_preprocessor = 'object-detection-tinynas-preprocessor'
  1076. image_classification_mmcv_preprocessor = 'image-classification-mmcv-preprocessor'
  1077. image_color_enhance_preprocessor = 'image-color-enhance-preprocessor'
  1078. image_instance_segmentation_preprocessor = 'image-instance-segmentation-preprocessor'
  1079. image_driving_perception_preprocessor = 'image-driving-perception-preprocessor'
  1080. image_portrait_enhancement_preprocessor = 'image-portrait-enhancement-preprocessor'
  1081. image_quality_assessment_man_preprocessor = 'image-quality_assessment-man-preprocessor'
  1082. image_quality_assessment_mos_preprocessor = 'image-quality_assessment-mos-preprocessor'
  1083. video_summarization_preprocessor = 'video-summarization-preprocessor'
  1084. movie_scene_segmentation_preprocessor = 'movie-scene-segmentation-preprocessor'
  1085. image_classification_bypass_preprocessor = 'image-classification-bypass-preprocessor'
  1086. object_detection_scrfd = 'object-detection-scrfd'
  1087. image_sky_change_preprocessor = 'image-sky-change-preprocessor'
  1088. image_demoire_preprocessor = 'image-demoire-preprocessor'
  1089. ocr_recognition = 'ocr-recognition'
  1090. ocr_detection = 'ocr-detection'
  1091. bad_image_detecting_preprocessor = 'bad-image-detecting-preprocessor'
  1092. nerf_recon_acc_preprocessor = 'nerf-recon-acc-preprocessor'
  1093. nerf_recon_4k_preprocessor = 'nerf-recon-4k-preprocessor'
  1094. nerf_recon_vq_compression_preprocessor = 'nerf-recon-vq-compression-preprocessor'
  1095. controllable_image_generation_preprocessor = 'controllable-image-generation-preprocessor'
  1096. image_classification_preprocessor = 'image-classification-preprocessor'
  1097. # nlp preprocessor
  1098. sen_sim_tokenizer = 'sen-sim-tokenizer'
  1099. cross_encoder_tokenizer = 'cross-encoder-tokenizer'
  1100. bert_seq_cls_tokenizer = 'bert-seq-cls-tokenizer'
  1101. text_gen_tokenizer = 'text-gen-tokenizer'
  1102. text2text_gen_preprocessor = 'text2text-gen-preprocessor'
  1103. text_gen_jieba_tokenizer = 'text-gen-jieba-tokenizer'
  1104. text2text_translate_preprocessor = 'text2text-translate-preprocessor'
  1105. token_cls_tokenizer = 'token-cls-tokenizer'
  1106. ner_tokenizer = 'ner-tokenizer'
  1107. thai_ner_tokenizer = 'thai-ner-tokenizer'
  1108. viet_ner_tokenizer = 'viet-ner-tokenizer'
  1109. nli_tokenizer = 'nli-tokenizer'
  1110. sen_cls_tokenizer = 'sen-cls-tokenizer'
  1111. dialog_intent_preprocessor = 'dialog-intent-preprocessor'
  1112. dialog_modeling_preprocessor = 'dialog-modeling-preprocessor'
  1113. dialog_state_tracking_preprocessor = 'dialog-state-tracking-preprocessor'
  1114. sbert_token_cls_tokenizer = 'sbert-token-cls-tokenizer'
  1115. zero_shot_cls_tokenizer = 'zero-shot-cls-tokenizer'
  1116. text_error_correction = 'text-error-correction'
  1117. word_alignment = 'word-alignment'
  1118. sentence_embedding = 'sentence-embedding'
  1119. text_ranking = 'text-ranking'
  1120. sequence_labeling_tokenizer = 'sequence-labeling-tokenizer'
  1121. word_segment_text_to_label_preprocessor = 'word-segment-text-to-label-preprocessor'
  1122. thai_wseg_tokenizer = 'thai-wseg-tokenizer'
  1123. fill_mask = 'fill-mask'
  1124. fill_mask_ponet = 'fill-mask-ponet'
  1125. faq_question_answering_preprocessor = 'faq-question-answering-preprocessor'
  1126. conversational_text_to_sql = 'conversational-text-to-sql'
  1127. table_question_answering_preprocessor = 'table-question-answering-preprocessor'
  1128. re_tokenizer = 're-tokenizer'
  1129. document_segmentation = 'document-segmentation'
  1130. feature_extraction = 'feature-extraction'
  1131. mglm_summarization = 'mglm-summarization'
  1132. sentence_piece = 'sentence-piece'
  1133. translation_evaluation = 'translation-evaluation-preprocessor'
  1134. canmt_translation = 'canmt-translation'
  1135. dialog_use_preprocessor = 'dialog-use-preprocessor'
  1136. siamese_uie_preprocessor = 'siamese-uie-preprocessor'
  1137. document_grounded_dialog_retrieval = 'document-grounded-dialog-retrieval'
  1138. document_grounded_dialog_rerank = 'document-grounded-dialog-rerank'
  1139. document_grounded_dialog_generate = 'document-grounded-dialog-generate'
  1140. machine_reading_comprehension_for_ner = 'machine-reading-comprehension-for-ner'
  1141. # audio preprocessor
  1142. linear_aec_fbank = 'linear-aec-fbank'
  1143. text_to_tacotron_symbols = 'text-to-tacotron-symbols'
  1144. wav_to_lists = 'wav-to-lists'
  1145. wav_to_scp = 'wav-to-scp'
  1146. kantts_data_preprocessor = 'kantts-data-preprocessor'
  1147. # multi-modal preprocessor
  1148. ofa_tasks_preprocessor = 'ofa-tasks-preprocessor'
  1149. clip_preprocessor = 'clip-preprocessor'
  1150. mplug_tasks_preprocessor = 'mplug-tasks-preprocessor'
  1151. mgeo_ranking = 'mgeo-ranking'
  1152. vldoc_preprocessor = 'vldoc-preprocessor'
  1153. hitea_tasks_preprocessor = 'hitea-tasks-preprocessor'
  1154. diffusion_image_generation_preprocessor = 'diffusion-image-generation-preprocessor'
  1155. mplug_owl_preprocessor = 'mplug-owl-preprocessor'
  1156. image_captioning_clip_interrogator_preprocessor = 'image-captioning-clip-interrogator-preprocessor'
  1157. # science preprocessor
  1158. unifold_preprocessor = 'unifold-preprocessor'
  1159. class Metrics(object):
  1160. """ Names for different metrics.
  1161. """
  1162. # accuracy
  1163. accuracy = 'accuracy'
  1164. multi_average_precision = 'mAP'
  1165. audio_noise_metric = 'audio-noise-metric'
  1166. PPL = 'ppl'
  1167. # text gen
  1168. BLEU = 'bleu'
  1169. # metrics for image denoise task
  1170. image_denoise_metric = 'image-denoise-metric'
  1171. # metrics for video frame-interpolation task
  1172. video_frame_interpolation_metric = 'video-frame-interpolation-metric'
  1173. # metrics for real-world video super-resolution task
  1174. video_super_resolution_metric = 'video-super-resolution-metric'
  1175. # metric for image instance segmentation task
  1176. image_ins_seg_coco_metric = 'image-ins-seg-coco-metric'
  1177. # metrics for sequence classification task
  1178. seq_cls_metric = 'seq-cls-metric'
  1179. # loss metric
  1180. loss_metric = 'loss-metric'
  1181. # metrics for token-classification task
  1182. token_cls_metric = 'token-cls-metric'
  1183. # metrics for text-generation task
  1184. text_gen_metric = 'text-gen-metric'
  1185. # file saving wrapper
  1186. prediction_saving_wrapper = 'prediction-saving-wrapper'
  1187. # metrics for image-color-enhance task
  1188. image_color_enhance_metric = 'image-color-enhance-metric'
  1189. # metrics for image-portrait-enhancement task
  1190. image_portrait_enhancement_metric = 'image-portrait-enhancement-metric'
  1191. video_summarization_metric = 'video-summarization-metric'
  1192. # metric for movie-scene-segmentation task
  1193. movie_scene_segmentation_metric = 'movie-scene-segmentation-metric'
  1194. # metric for inpainting task
  1195. image_inpainting_metric = 'image-inpainting-metric'
  1196. # metric for ocr
  1197. NED = 'ned'
  1198. # metric for cross-modal retrieval
  1199. inbatch_recall = 'inbatch_recall'
  1200. # metric for referring-video-object-segmentation task
  1201. referring_video_object_segmentation_metric = 'referring-video-object-segmentation-metric'
  1202. # metric for video stabilization task
  1203. video_stabilization_metric = 'video-stabilization-metric'
  1204. # metirc for image-quality-assessment-mos task
  1205. image_quality_assessment_mos_metric = 'image-quality-assessment-mos-metric'
  1206. # metirc for image-quality-assessment-degradation task
  1207. image_quality_assessment_degradation_metric = 'image-quality-assessment-degradation-metric'
  1208. # metric for text-ranking task
  1209. text_ranking_metric = 'text-ranking-metric'
  1210. # metric for image-colorization task
  1211. image_colorization_metric = 'image-colorization-metric'
  1212. ocr_recognition_metric = 'ocr-recognition-metric'
  1213. # metric for translation evaluation
  1214. translation_evaluation_metric = 'translation-evaluation-metric'
  1215. class Optimizers(object):
  1216. """ Names for different OPTIMIZER.
  1217. Holds the standard optimizer name to use for identifying different optimizer.
  1218. This should be used to register optimizer.
  1219. """
  1220. default = 'optimizer'
  1221. SGD = 'SGD'
  1222. class Hooks(object):
  1223. """ Names for different hooks.
  1224. All kinds of hooks are defined here
  1225. """
  1226. # lr
  1227. LrSchedulerHook = 'LrSchedulerHook'
  1228. PlateauLrSchedulerHook = 'PlateauLrSchedulerHook'
  1229. NoneLrSchedulerHook = 'NoneLrSchedulerHook'
  1230. # optimizer
  1231. OptimizerHook = 'OptimizerHook'
  1232. TorchAMPOptimizerHook = 'TorchAMPOptimizerHook'
  1233. ApexAMPOptimizerHook = 'ApexAMPOptimizerHook'
  1234. NoneOptimizerHook = 'NoneOptimizerHook'
  1235. # checkpoint
  1236. CheckpointHook = 'CheckpointHook'
  1237. BestCkptSaverHook = 'BestCkptSaverHook'
  1238. LoadCheckpointHook = 'LoadCheckpointHook'
  1239. # logger
  1240. TextLoggerHook = 'TextLoggerHook'
  1241. TensorboardHook = 'TensorboardHook'
  1242. IterTimerHook = 'IterTimerHook'
  1243. EvaluationHook = 'EvaluationHook'
  1244. # Compression
  1245. SparsityHook = 'SparsityHook'
  1246. # CLIP logit_scale clamp
  1247. ClipClampLogitScaleHook = 'ClipClampLogitScaleHook'
  1248. # train
  1249. EarlyStopHook = 'EarlyStopHook'
  1250. DeepspeedHook = 'DeepspeedHook'
  1251. MegatronHook = 'MegatronHook'
  1252. DDPHook = 'DDPHook'
  1253. SwiftHook = 'SwiftHook'
  1254. class LR_Schedulers(object):
  1255. """learning rate scheduler is defined here
  1256. """
  1257. LinearWarmup = 'LinearWarmup'
  1258. ConstantWarmup = 'ConstantWarmup'
  1259. ExponentialWarmup = 'ExponentialWarmup'
  1260. class CustomDatasets(object):
  1261. """ Names for different datasets.
  1262. """
  1263. PairedDataset = 'PairedDataset'
  1264. SiddDataset = 'SiddDataset'
  1265. GoproDataset = 'GoproDataset'
  1266. RedsDataset = 'RedsDataset'