dataset_context_config.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. from typing import Mapping, Sequence, Union
  3. from modelscope.msdatasets.auth.auth_config import BaseAuthConfig
  4. from modelscope.msdatasets.download.download_config import DataDownloadConfig
  5. from modelscope.msdatasets.meta.data_meta_config import DataMetaConfig
  6. from modelscope.utils.constant import DownloadMode, Hubs
  7. class DatasetContextConfig:
  8. """Context configuration of dataset."""
  9. def __init__(self, dataset_name: Union[str, list], namespace: str,
  10. version: str, subset_name: str, split: Union[str, list],
  11. target: str, hub: Hubs, data_dir: str,
  12. data_files: Union[str, Sequence[str],
  13. Mapping[str, Union[str, Sequence[str]]]],
  14. download_mode: DownloadMode, cache_root_dir: str,
  15. use_streaming: bool, stream_batch_size: int,
  16. trust_remote_code: bool, **kwargs):
  17. self._download_config = None
  18. self._data_meta_config = None
  19. self._config_kwargs = kwargs
  20. self._dataset_version_cache_root_dir = None
  21. self._auth_config = None
  22. # The lock file path for meta-files and data-files
  23. self._global_meta_lock_file_path = None
  24. self._global_data_lock_file_path = None
  25. # General arguments for dataset
  26. self.hub = hub
  27. self.download_mode = download_mode
  28. self.dataset_name = dataset_name
  29. self.namespace = namespace
  30. self.version = version
  31. self.subset_name = subset_name
  32. self.split = split
  33. self.target = target
  34. self.data_dir = data_dir
  35. self.data_files = data_files
  36. self.cache_root_dir = cache_root_dir
  37. self.use_streaming = use_streaming
  38. self.stream_batch_size = stream_batch_size
  39. self.download_virgo_files: bool = False
  40. self.trust_remote_code: bool = trust_remote_code
  41. @property
  42. def config_kwargs(self) -> dict:
  43. return self._config_kwargs
  44. @config_kwargs.setter
  45. def config_kwargs(self, val: dict):
  46. self._config_kwargs = val
  47. @property
  48. def download_config(self) -> DataDownloadConfig:
  49. return self._download_config
  50. @download_config.setter
  51. def download_config(self, val: DataDownloadConfig):
  52. self._download_config = val
  53. @property
  54. def data_meta_config(self) -> DataMetaConfig:
  55. return self._data_meta_config
  56. @data_meta_config.setter
  57. def data_meta_config(self, val: DataMetaConfig):
  58. self._data_meta_config = val
  59. @property
  60. def dataset_version_cache_root_dir(self) -> str:
  61. return self._dataset_version_cache_root_dir
  62. @dataset_version_cache_root_dir.setter
  63. def dataset_version_cache_root_dir(self, val: str):
  64. self._dataset_version_cache_root_dir = val
  65. @property
  66. def global_meta_lock_file_path(self) -> str:
  67. return self._global_meta_lock_file_path
  68. @global_meta_lock_file_path.setter
  69. def global_meta_lock_file_path(self, val: str):
  70. self._global_meta_lock_file_path = val
  71. @property
  72. def global_data_lock_file_path(self) -> str:
  73. return self._global_data_lock_file_path
  74. @global_data_lock_file_path.setter
  75. def global_data_lock_file_path(self, val: str):
  76. self._global_data_lock_file_path = val
  77. @property
  78. def auth_config(self) -> BaseAuthConfig:
  79. return self._auth_config
  80. @auth_config.setter
  81. def auth_config(self, val: BaseAuthConfig):
  82. self._auth_config = val