snapshot_download.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. import fnmatch
  3. import os
  4. import re
  5. import uuid
  6. from contextlib import nullcontext
  7. from http.cookiejar import CookieJar
  8. from pathlib import Path
  9. from typing import Dict, List, Optional, Type, Union
  10. from modelscope.utils.constant import (DEFAULT_DATASET_REVISION,
  11. DEFAULT_MODEL_REVISION,
  12. INTRA_CLOUD_ACCELERATION,
  13. REPO_TYPE_DATASET, REPO_TYPE_MODEL,
  14. REPO_TYPE_SUPPORT)
  15. from modelscope.utils.file_utils import get_modelscope_cache_dir
  16. from modelscope.utils.logger import get_logger
  17. from modelscope.utils.thread_utils import thread_executor
  18. from .api import HubApi, ModelScopeConfig
  19. from .callback import ProgressCallback
  20. from .constants import DEFAULT_MAX_WORKERS
  21. from .errors import InvalidParameter
  22. from .file_download import (create_temporary_directory_and_cache,
  23. download_file, get_file_download_url)
  24. from .utils.caching import ModelFileSystemCache
  25. from .utils.utils import (get_model_masked_directory,
  26. model_id_to_group_owner_name, strtobool,
  27. weak_file_lock)
  28. logger = get_logger()
  29. def snapshot_download(
  30. model_id: str = None,
  31. revision: Optional[str] = None,
  32. cache_dir: Union[str, Path, None] = None,
  33. user_agent: Optional[Union[Dict, str]] = None,
  34. local_files_only: Optional[bool] = False,
  35. cookies: Optional[CookieJar] = None,
  36. ignore_file_pattern: Optional[Union[str, List[str]]] = None,
  37. allow_file_pattern: Optional[Union[str, List[str]]] = None,
  38. local_dir: Optional[str] = None,
  39. allow_patterns: Optional[Union[List[str], str]] = None,
  40. ignore_patterns: Optional[Union[List[str], str]] = None,
  41. max_workers: Optional[int] = None,
  42. repo_id: str = None,
  43. repo_type: Optional[str] = REPO_TYPE_MODEL,
  44. enable_file_lock: Optional[bool] = None,
  45. progress_callbacks: List[Type[ProgressCallback]] = None,
  46. ) -> str:
  47. """Download all files of a repo.
  48. Downloads a whole snapshot of a repo's files at the specified revision. This
  49. is useful when you want all files from a repo, because you don't know which
  50. ones you will need a priori. All files are nested inside a folder in order
  51. to keep their actual filename relative to that folder.
  52. An alternative would be to just clone a repo but this would require that the
  53. user always has git and git-lfs installed, and properly configured.
  54. Args:
  55. repo_id (str): A user or an organization name and a repo name separated by a `/`.
  56. model_id (str): A user or an organization name and a model name separated by a `/`.
  57. if `repo_id` is provided, `model_id` will be ignored.
  58. repo_type (str, optional): The type of the repo, either 'model' or 'dataset'.
  59. revision (str, optional): An optional Git revision id which can be a branch name, a tag, or a
  60. commit hash. NOTE: currently only branch and tag name is supported
  61. cache_dir (str, Path, optional): Path to the folder where cached files are stored, model will
  62. be save as cache_dir/model_id/THE_MODEL_FILES.
  63. user_agent (str, dict, optional): The user-agent info in the form of a dictionary or a string.
  64. local_files_only (bool, optional): If `True`, avoid downloading the file and return the path to the
  65. local cached file if it exists.
  66. cookies (CookieJar, optional): The cookie of the request, default None.
  67. ignore_file_pattern (`str` or `List`, *optional*, default to `None`):
  68. Any file pattern to be ignored in downloading, like exact file names or file extensions.
  69. allow_file_pattern (`str` or `List`, *optional*, default to `None`):
  70. Any file pattern to be downloading, like exact file names or file extensions.
  71. local_dir (str, optional): Specific local directory path to which the file will be downloaded.
  72. allow_patterns (`str` or `List`, *optional*, default to `None`):
  73. If provided, only files matching at least one pattern are downloaded, priority over allow_file_pattern.
  74. For hugging-face compatibility.
  75. ignore_patterns (`str` or `List`, *optional*, default to `None`):
  76. If provided, files matching any of the patterns are not downloaded, priority over ignore_file_pattern.
  77. For hugging-face compatibility.
  78. max_workers (`int`): The maximum number of workers to download files, default 8.
  79. enable_file_lock (`bool`): Enable file lock, this is useful in multiprocessing downloading, default `True`.
  80. If you find something wrong with file lock and have a problem modifying your code,
  81. change `MODELSCOPE_HUB_FILE_LOCK` env to `false`.
  82. progress_callbacks (`List[Type[ProgressCallback]]`, **optional**, default to `None`):
  83. progress callbacks to track the download progress.
  84. Raises:
  85. ValueError: the value details.
  86. Returns:
  87. str: Local folder path (string) of repo snapshot
  88. Note:
  89. Raises the following errors:
  90. - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
  91. if `use_auth_token=True` and the token cannot be found.
  92. - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
  93. ETag cannot be determined.
  94. - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
  95. if some parameter value is invalid
  96. """
  97. repo_id = repo_id or model_id
  98. if not repo_id:
  99. raise ValueError('Please provide a valid model_id or repo_id')
  100. if repo_type not in REPO_TYPE_SUPPORT:
  101. raise ValueError(
  102. f'Invalid repo type: {repo_type}, only support: {REPO_TYPE_SUPPORT}'
  103. )
  104. max_workers = max_workers or DEFAULT_MAX_WORKERS
  105. if revision is None:
  106. revision = DEFAULT_DATASET_REVISION if repo_type == REPO_TYPE_DATASET else DEFAULT_MODEL_REVISION
  107. if enable_file_lock is None:
  108. enable_file_lock = strtobool(
  109. os.environ.get('MODELSCOPE_HUB_FILE_LOCK', 'true'))
  110. if enable_file_lock:
  111. system_cache = cache_dir if cache_dir is not None else get_modelscope_cache_dir(
  112. )
  113. os.makedirs(os.path.join(system_cache, '.lock'), exist_ok=True)
  114. lock_file = os.path.join(system_cache, '.lock',
  115. repo_id.replace('/', '___'))
  116. context = weak_file_lock(lock_file)
  117. else:
  118. context = nullcontext()
  119. with context:
  120. return _snapshot_download(
  121. repo_id,
  122. repo_type=repo_type,
  123. revision=revision,
  124. cache_dir=cache_dir,
  125. user_agent=user_agent,
  126. local_files_only=local_files_only,
  127. cookies=cookies,
  128. ignore_file_pattern=ignore_file_pattern,
  129. allow_file_pattern=allow_file_pattern,
  130. local_dir=local_dir,
  131. ignore_patterns=ignore_patterns,
  132. allow_patterns=allow_patterns,
  133. max_workers=max_workers,
  134. progress_callbacks=progress_callbacks)
  135. def dataset_snapshot_download(
  136. dataset_id: str,
  137. revision: Optional[str] = DEFAULT_DATASET_REVISION,
  138. cache_dir: Union[str, Path, None] = None,
  139. local_dir: Optional[str] = None,
  140. user_agent: Optional[Union[Dict, str]] = None,
  141. local_files_only: Optional[bool] = False,
  142. cookies: Optional[CookieJar] = None,
  143. ignore_file_pattern: Optional[Union[str, List[str]]] = None,
  144. allow_file_pattern: Optional[Union[str, List[str]]] = None,
  145. allow_patterns: Optional[Union[List[str], str]] = None,
  146. ignore_patterns: Optional[Union[List[str], str]] = None,
  147. enable_file_lock: Optional[bool] = None,
  148. max_workers: int = 8,
  149. ) -> str:
  150. """Download raw files of a dataset.
  151. Downloads all files at the specified revision. This
  152. is useful when you want all files from a dataset, because you don't know which
  153. ones you will need a priori. All files are nested inside a folder in order
  154. to keep their actual filename relative to that folder.
  155. An alternative would be to just clone a dataset but this would require that the
  156. user always has git and git-lfs installed, and properly configured.
  157. Args:
  158. dataset_id (str): A user or an organization name and a dataset name separated by a `/`.
  159. revision (str, optional): An optional Git revision id which can be a branch name, a tag, or a
  160. commit hash. NOTE: currently only branch and tag name is supported
  161. cache_dir (str, Path, optional): Path to the folder where cached files are stored, dataset will
  162. be save as cache_dir/dataset_id/THE_DATASET_FILES.
  163. local_dir (str, optional): Specific local directory path to which the file will be downloaded.
  164. user_agent (str, dict, optional): The user-agent info in the form of a dictionary or a string.
  165. local_files_only (bool, optional): If `True`, avoid downloading the file and return the path to the
  166. local cached file if it exists.
  167. cookies (CookieJar, optional): The cookie of the request, default None.
  168. ignore_file_pattern (`str` or `List`, *optional*, default to `None`):
  169. Any file pattern to be ignored in downloading, like exact file names or file extensions.
  170. Use regression is deprecated.
  171. allow_file_pattern (`str` or `List`, *optional*, default to `None`):
  172. Any file pattern to be downloading, like exact file names or file extensions.
  173. allow_patterns (`str` or `List`, *optional*, default to `None`):
  174. If provided, only files matching at least one pattern are downloaded, priority over allow_file_pattern.
  175. For hugging-face compatibility.
  176. ignore_patterns (`str` or `List`, *optional*, default to `None`):
  177. If provided, files matching any of the patterns are not downloaded, priority over ignore_file_pattern.
  178. For hugging-face compatibility.
  179. enable_file_lock (`bool`): Enable file lock, this is useful in multiprocessing downloading, default `True`.
  180. If you find something wrong with file lock and have a problem modifying your code,
  181. change `MODELSCOPE_HUB_FILE_LOCK` env to `false`.
  182. max_workers (`int`): The maximum number of workers to download files, default 8.
  183. Raises:
  184. ValueError: the value details.
  185. Returns:
  186. str: Local folder path (string) of repo snapshot
  187. Note:
  188. Raises the following errors:
  189. - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
  190. if `use_auth_token=True` and the token cannot be found.
  191. - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
  192. ETag cannot be determined.
  193. - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
  194. if some parameter value is invalid
  195. """
  196. if enable_file_lock is None:
  197. enable_file_lock = strtobool(
  198. os.environ.get('MODELSCOPE_HUB_FILE_LOCK', 'true'))
  199. if enable_file_lock:
  200. system_cache = cache_dir if cache_dir is not None else get_modelscope_cache_dir(
  201. )
  202. os.makedirs(os.path.join(system_cache, '.lock'), exist_ok=True)
  203. lock_file = os.path.join(system_cache, '.lock',
  204. dataset_id.replace('/', '___'))
  205. context = weak_file_lock(lock_file)
  206. else:
  207. context = nullcontext()
  208. with context:
  209. return _snapshot_download(
  210. dataset_id,
  211. repo_type=REPO_TYPE_DATASET,
  212. revision=revision,
  213. cache_dir=cache_dir,
  214. user_agent=user_agent,
  215. local_files_only=local_files_only,
  216. cookies=cookies,
  217. ignore_file_pattern=ignore_file_pattern,
  218. allow_file_pattern=allow_file_pattern,
  219. local_dir=local_dir,
  220. ignore_patterns=ignore_patterns,
  221. allow_patterns=allow_patterns,
  222. max_workers=max_workers)
  223. def _snapshot_download(
  224. repo_id: str,
  225. *,
  226. repo_type: Optional[str] = None,
  227. revision: Optional[str] = DEFAULT_MODEL_REVISION,
  228. cache_dir: Union[str, Path, None] = None,
  229. user_agent: Optional[Union[Dict, str]] = None,
  230. local_files_only: Optional[bool] = False,
  231. cookies: Optional[CookieJar] = None,
  232. ignore_file_pattern: Optional[Union[str, List[str]]] = None,
  233. allow_file_pattern: Optional[Union[str, List[str]]] = None,
  234. local_dir: Optional[str] = None,
  235. allow_patterns: Optional[Union[List[str], str]] = None,
  236. ignore_patterns: Optional[Union[List[str], str]] = None,
  237. max_workers: int = 8,
  238. progress_callbacks: List[Type[ProgressCallback]] = None,
  239. ):
  240. if not repo_type:
  241. repo_type = REPO_TYPE_MODEL
  242. if repo_type not in REPO_TYPE_SUPPORT:
  243. raise InvalidParameter('Invalid repo type: %s, only support: %s' %
  244. (repo_type, REPO_TYPE_SUPPORT))
  245. temporary_cache_dir, cache = create_temporary_directory_and_cache(
  246. repo_id, local_dir=local_dir, cache_dir=cache_dir, repo_type=repo_type)
  247. system_cache = cache_dir if cache_dir is not None else get_modelscope_cache_dir(
  248. )
  249. if local_files_only:
  250. if len(cache.cached_files) == 0:
  251. raise ValueError(
  252. 'Cannot find the requested files in the cached path and outgoing'
  253. ' traffic has been disabled. To enable look-ups and downloads'
  254. " online, set 'local_files_only' to False.")
  255. logger.warning('We can not confirm the cached file is for revision: %s'
  256. % revision)
  257. return cache.get_root_location(
  258. ) # we can not confirm the cached file is for snapshot 'revision'
  259. else:
  260. # make headers
  261. headers = {
  262. 'user-agent':
  263. ModelScopeConfig.get_user_agent(user_agent=user_agent, ),
  264. 'snapshot-identifier': str(uuid.uuid4()),
  265. }
  266. if INTRA_CLOUD_ACCELERATION == 'true':
  267. region_id: str = (
  268. os.getenv('INTRA_CLOUD_ACCELERATION_REGION')
  269. or HubApi()._get_internal_acceleration_domain())
  270. if region_id:
  271. logger.info(
  272. f'Intra-cloud acceleration enabled for downloading from {repo_id}'
  273. )
  274. headers['x-aliyun-region-id'] = region_id
  275. _api = HubApi()
  276. endpoint = _api.get_endpoint_for_read(
  277. repo_id=repo_id, repo_type=repo_type)
  278. if cookies is None:
  279. cookies = ModelScopeConfig.get_cookies()
  280. if repo_type == REPO_TYPE_MODEL:
  281. if local_dir:
  282. directory = os.path.abspath(local_dir)
  283. elif cache_dir:
  284. directory = os.path.join(system_cache, *repo_id.split('/'))
  285. else:
  286. directory = os.path.join(system_cache, 'models',
  287. *repo_id.split('/'))
  288. print(
  289. f'Downloading Model from {endpoint} to directory: {directory}')
  290. revision_detail = _api.get_valid_revision_detail(
  291. repo_id, revision=revision, cookies=cookies, endpoint=endpoint)
  292. revision = revision_detail['Revision']
  293. # Add snapshot-ci-test for counting the ci test download
  294. if 'CI_TEST' in os.environ:
  295. snapshot_header = {**headers, **{'snapshot-ci-test': 'True'}}
  296. else:
  297. snapshot_header = {**headers, **{'Snapshot': 'True'}}
  298. if cache.cached_model_revision is not None:
  299. snapshot_header[
  300. 'cached_model_revision'] = cache.cached_model_revision
  301. repo_files = _api.get_model_files(
  302. model_id=repo_id,
  303. revision=revision,
  304. recursive=True,
  305. use_cookies=False if cookies is None else cookies,
  306. headers=snapshot_header,
  307. endpoint=endpoint)
  308. _download_file_lists(
  309. repo_files,
  310. cache,
  311. temporary_cache_dir,
  312. repo_id,
  313. _api,
  314. None,
  315. None,
  316. headers,
  317. repo_type=repo_type,
  318. revision=revision,
  319. cookies=cookies,
  320. ignore_file_pattern=ignore_file_pattern,
  321. allow_file_pattern=allow_file_pattern,
  322. ignore_patterns=ignore_patterns,
  323. allow_patterns=allow_patterns,
  324. max_workers=max_workers,
  325. endpoint=endpoint,
  326. progress_callbacks=progress_callbacks,
  327. )
  328. if '.' in repo_id:
  329. masked_directory = get_model_masked_directory(
  330. directory, repo_id)
  331. if os.path.exists(directory):
  332. logger.info(
  333. 'Target directory already exists, skipping creation.')
  334. else:
  335. logger.info(f'Creating symbolic link [{directory}].')
  336. try:
  337. os.symlink(
  338. os.path.abspath(masked_directory),
  339. directory,
  340. target_is_directory=True)
  341. except OSError:
  342. logger.warning(
  343. f'Failed to create symbolic link {directory} for {os.path.abspath(masked_directory)}.'
  344. )
  345. elif repo_type == REPO_TYPE_DATASET:
  346. if local_dir:
  347. directory = os.path.abspath(local_dir)
  348. elif cache_dir:
  349. directory = os.path.join(system_cache, *repo_id.split('/'))
  350. else:
  351. directory = os.path.join(system_cache, 'datasets',
  352. *repo_id.split('/'))
  353. print(f'Downloading Dataset to directory: {directory}')
  354. group_or_owner, name = model_id_to_group_owner_name(repo_id)
  355. revision_detail = revision or DEFAULT_DATASET_REVISION
  356. logger.info('Fetching dataset repo file list...')
  357. repo_files = fetch_repo_files(_api, repo_id, revision_detail,
  358. endpoint)
  359. if repo_files is None:
  360. logger.error(
  361. f'Failed to retrieve file list for dataset: {repo_id}')
  362. return None
  363. _download_file_lists(
  364. repo_files,
  365. cache,
  366. temporary_cache_dir,
  367. repo_id,
  368. _api,
  369. name,
  370. group_or_owner,
  371. headers,
  372. repo_type=repo_type,
  373. revision=revision,
  374. cookies=cookies,
  375. ignore_file_pattern=ignore_file_pattern,
  376. allow_file_pattern=allow_file_pattern,
  377. ignore_patterns=ignore_patterns,
  378. allow_patterns=allow_patterns,
  379. max_workers=max_workers,
  380. endpoint=endpoint,
  381. progress_callbacks=progress_callbacks,
  382. )
  383. cache.save_model_version(revision_info=revision_detail)
  384. cache_root_path = cache.get_root_location()
  385. return cache_root_path
  386. def fetch_repo_files(_api, repo_id, revision, endpoint):
  387. page_number = 1
  388. page_size = 150
  389. repo_files = []
  390. while True:
  391. try:
  392. dataset_files = _api.get_dataset_files(
  393. repo_id=repo_id,
  394. revision=revision,
  395. root_path='/',
  396. recursive=True,
  397. page_number=page_number,
  398. page_size=page_size,
  399. endpoint=endpoint)
  400. except Exception as e:
  401. logger.error(f'Error fetching dataset files: {e}')
  402. break
  403. repo_files.extend(dataset_files)
  404. if len(dataset_files) < page_size:
  405. break
  406. page_number += 1
  407. return repo_files
  408. def _is_valid_regex(pattern: str):
  409. try:
  410. re.compile(pattern)
  411. return True
  412. except BaseException:
  413. return False
  414. def _normalize_patterns(patterns: Union[str, List[str]]):
  415. if isinstance(patterns, str):
  416. patterns = [patterns]
  417. if patterns is not None:
  418. patterns = [
  419. item if not item.endswith('/') else item + '*' for item in patterns
  420. ]
  421. return patterns
  422. def _get_valid_regex_pattern(patterns: List[str]):
  423. if patterns is not None:
  424. regex_patterns = []
  425. for item in patterns:
  426. if _is_valid_regex(item):
  427. regex_patterns.append(item)
  428. return regex_patterns
  429. else:
  430. return None
  431. def _download_file_lists(
  432. repo_files: List[str],
  433. cache: ModelFileSystemCache,
  434. temporary_cache_dir: str,
  435. repo_id: str,
  436. api: HubApi,
  437. name: str,
  438. group_or_owner: str,
  439. headers,
  440. repo_type: Optional[str] = None,
  441. revision: Optional[str] = DEFAULT_MODEL_REVISION,
  442. cookies: Optional[CookieJar] = None,
  443. ignore_file_pattern: Optional[Union[str, List[str]]] = None,
  444. allow_file_pattern: Optional[Union[str, List[str]]] = None,
  445. allow_patterns: Optional[Union[List[str], str]] = None,
  446. ignore_patterns: Optional[Union[List[str], str]] = None,
  447. max_workers: int = 8,
  448. endpoint: Optional[str] = None,
  449. progress_callbacks: List[Type[ProgressCallback]] = None,
  450. ):
  451. ignore_patterns = _normalize_patterns(ignore_patterns)
  452. allow_patterns = _normalize_patterns(allow_patterns)
  453. ignore_file_pattern = _normalize_patterns(ignore_file_pattern)
  454. allow_file_pattern = _normalize_patterns(allow_file_pattern)
  455. # to compatible regex usage.
  456. ignore_regex_pattern = _get_valid_regex_pattern(ignore_file_pattern)
  457. filtered_repo_files = []
  458. for repo_file in repo_files:
  459. if repo_file['Type'] == 'tree':
  460. continue
  461. try:
  462. # processing patterns
  463. if ignore_patterns and any([
  464. fnmatch.fnmatch(repo_file['Path'], pattern)
  465. for pattern in ignore_patterns
  466. ]):
  467. continue
  468. if ignore_file_pattern and any([
  469. fnmatch.fnmatch(repo_file['Path'], pattern)
  470. for pattern in ignore_file_pattern
  471. ]):
  472. continue
  473. if ignore_regex_pattern and any([
  474. re.search(pattern, repo_file['Name']) is not None
  475. for pattern in ignore_regex_pattern
  476. ]): # noqa E501
  477. continue
  478. if allow_patterns is not None and allow_patterns:
  479. if not any(
  480. fnmatch.fnmatch(repo_file['Path'], pattern)
  481. for pattern in allow_patterns):
  482. continue
  483. if allow_file_pattern is not None and allow_file_pattern:
  484. if not any(
  485. fnmatch.fnmatch(repo_file['Path'], pattern)
  486. for pattern in allow_file_pattern):
  487. continue
  488. # check model_file is exist in cache, if existed, skip download
  489. if cache.exists(repo_file):
  490. file_name = os.path.basename(repo_file['Name'])
  491. logger.debug(
  492. f'File {file_name} already in cache with identical hash, skip downloading!'
  493. )
  494. continue
  495. except Exception as e:
  496. logger.warning('The file pattern is invalid : %s' % e)
  497. else:
  498. filtered_repo_files.append(repo_file)
  499. @thread_executor(max_workers=max_workers, disable_tqdm=False)
  500. def _download_single_file(repo_file):
  501. if repo_type == REPO_TYPE_MODEL:
  502. url = get_file_download_url(
  503. model_id=repo_id,
  504. file_path=repo_file['Path'],
  505. revision=revision,
  506. endpoint=endpoint)
  507. elif repo_type == REPO_TYPE_DATASET:
  508. url = api.get_dataset_file_url(
  509. file_name=repo_file['Path'],
  510. dataset_name=name,
  511. namespace=group_or_owner,
  512. revision=revision,
  513. endpoint=endpoint)
  514. else:
  515. raise InvalidParameter(
  516. f'Invalid repo type: {repo_type}, supported types: {REPO_TYPE_SUPPORT}'
  517. )
  518. download_file(
  519. url,
  520. repo_file,
  521. temporary_cache_dir,
  522. cache,
  523. headers,
  524. cookies,
  525. disable_tqdm=False,
  526. progress_callbacks=progress_callbacks,
  527. )
  528. if len(filtered_repo_files) > 0:
  529. logger.info(
  530. f'Got {len(filtered_repo_files)} files, start to download ...')
  531. _download_single_file(filtered_repo_files)
  532. logger.info(f"Download {repo_type} '{repo_id}' successfully.")