info.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. # Copyright 2022-present, the HuggingFace Inc. team.
  3. # yapf: disable
  4. import datetime
  5. from dataclasses import dataclass
  6. from typing import Any, Dict, List, Literal, Optional
  7. from modelscope.hub.utils.utils import convert_timestamp
  8. def _parse_siblings(siblings_data: Optional[List[Dict[str, Any]]]) -> List['RepoSibling']:
  9. """
  10. Parse siblings data into RepoSibling objects.
  11. Args:
  12. siblings_data: Raw siblings data from API response, supporting both
  13. uppercase (Path, Size, etc.) and lowercase (path, size, etc.) field names.
  14. Returns:
  15. List of RepoSibling objects.
  16. """
  17. if not siblings_data:
  18. return []
  19. return [
  20. RepoSibling(
  21. rfilename=sibling.get('Path') or sibling.get('path'),
  22. size=sibling.get('Size') or sibling.get('size'),
  23. blob_id=sibling.get('Sha256') or sibling.get('sha256'),
  24. type=sibling.get('Type') or sibling.get('type'),
  25. sha=sibling.get('Revision') or sibling.get('revision'),
  26. last_modified=convert_timestamp(sibling.get('CommittedDate') or sibling.get('committedDate')),
  27. lfs=BlobLfsInfo(
  28. size=sibling.get('Size') or sibling.get('size'),
  29. sha256=sibling.get('Sha256') or sibling.get('sha256'),
  30. )
  31. ) for sibling in siblings_data
  32. ]
  33. @dataclass
  34. class OrganizationInfo:
  35. """Organization information for a repository."""
  36. id: Optional[int]
  37. name: Optional[str]
  38. full_name: Optional[str]
  39. description: Optional[str]
  40. avatar: Optional[str]
  41. github_address: Optional[str]
  42. type: Optional[int]
  43. email: Optional[str]
  44. created_time: Optional[datetime.datetime]
  45. modified_time: Optional[datetime.datetime]
  46. def __init__(self, **kwargs):
  47. self.id = kwargs.pop('Id', None)
  48. self.name = kwargs.pop('Name', '')
  49. self.full_name = kwargs.pop('FullName', '')
  50. self.description = kwargs.pop('Description', '')
  51. self.avatar = kwargs.pop('Avatar', '')
  52. self.github_address = kwargs.pop('GithubAddress', '')
  53. self.type = kwargs.pop('Type', kwargs.pop('type', None))
  54. self.email = kwargs.pop('Email', kwargs.pop('email', ''))
  55. created_time = kwargs.pop('GmtCreated', kwargs.pop('created_time', None))
  56. self.created_time = convert_timestamp(created_time) if created_time else None
  57. modified_time = kwargs.pop('GmtModified', kwargs.pop('modified_time', None))
  58. self.modified_time = convert_timestamp(modified_time) if modified_time else None
  59. @dataclass
  60. class BlobLfsInfo:
  61. size: Optional[int] = None
  62. sha256: Optional[str] = None
  63. @dataclass
  64. class RepoSibling:
  65. """
  66. Contains basic information about a repo file inside a repo on the Hub.
  67. Attributes:
  68. rfilename (str): file name, relative to the repo root.
  69. size (`int`, *optional*): The file's size, in bytes.
  70. blob_id (`str`, *optional*): The file's git OID.
  71. lfs (`BlobLfsInfo`, *optional*): The file's LFS metadata.
  72. type (`str`, *optional*): The file's type.
  73. sha (`str`, *optional*): The file's latest commit SHA.
  74. last_modified (`datetime`, *optional*): The file's last modified time.
  75. """
  76. rfilename: str
  77. size: Optional[int] = None
  78. blob_id: Optional[str] = None
  79. type: Optional[str] = None
  80. sha: Optional[str] = None
  81. last_modified: Optional[datetime.datetime] = None
  82. lfs: Optional[BlobLfsInfo] = None
  83. @dataclass
  84. class ModelInfo:
  85. """
  86. Contains detailed information about a model on ModelScope Hub. This object is returned by [`model_info`].
  87. Attributes:
  88. id (`int`, *optional*): Model ID.
  89. name (`str`, *optional*): Model name.
  90. author (`str`, *optional*): Model author.
  91. chinese_name (`str`, *optional*): Chinese display name.
  92. visibility (`int`, *optional*): Visibility level (1=private, 5=public).
  93. is_published (`int`, *optional*): Whether the model is published.
  94. is_online (`int`, *optional*): Whether the model is online.
  95. already_star (`bool`, *optional*): Whether current user has starred this model.
  96. description (`str`, *optional*): Model description.
  97. license (`str`, *optional*): Model license.
  98. downloads (`int`, *optional*): Number of downloads.
  99. likes (`int`, *optional*): Number of likes.
  100. created_at (`datetime`, *optional*): Date of creation of the repo on the Hub.
  101. last_updated_time (`datetime`, *optional*): Last update timestamp.
  102. architectures (`List[str]`, *optional*): Model architectures.
  103. model_type (`List[str]`, *optional*): Model types.
  104. tasks (`List[Dict[str, Any]]`, *optional*): Supported tasks.
  105. readme_content (`str`, *optional*): README content.
  106. organization (`OrganizationInfo`, *optional*): Organization information.
  107. created_by (`str`, *optional*): Creator username.
  108. is_certification (`int`, *optional*): Certification status.
  109. approval_mode (`int`, *optional*): Approval mode.
  110. card_ready (`int`, *optional*): Whether model card is ready.
  111. backend_support (`str`, *optional*): Backend support information.
  112. model_infos (`Dict[str, Any]`, *optional*): Detailed model configuration information.
  113. tags (`List[str]`, *optional*): Model Tags.
  114. is_accessible (`int`, *optional*): Whether accessible.
  115. revision (`str`, *optional*): Revision/branch.
  116. related_arxiv_id (`List[str]`, *optional*): Related arXiv paper IDs.
  117. related_paper (`List[int]`, *optional*): Related papers.
  118. sha (`str`, *optional*): Latest commit SHA.
  119. last_modified (`datetime`, *optional*): Latest commit date.
  120. last_commit (`Dict[str, Any]`, *optional*): Latest commit information.
  121. siblings (List[RepoSibling], optional): Basic information about files that constitute the model.
  122. """
  123. id: Optional[int]
  124. name: Optional[str]
  125. author: Optional[str]
  126. chinese_name: Optional[str]
  127. visibility: Optional[int]
  128. is_published: Optional[int]
  129. is_online: Optional[int]
  130. already_star: Optional[bool]
  131. description: Optional[str]
  132. license: Optional[str]
  133. downloads: Optional[int]
  134. likes: Optional[int]
  135. created_at: Optional[datetime.datetime]
  136. last_updated_time: Optional[datetime.datetime]
  137. architectures: Optional[List[str]]
  138. model_type: Optional[List[str]]
  139. tasks: Optional[List[Dict[str, Any]]]
  140. readme_content: Optional[str]
  141. organization: Optional[OrganizationInfo]
  142. created_by: Optional[str]
  143. # Certification and approval
  144. is_certification: Optional[int]
  145. approval_mode: Optional[int]
  146. card_ready: Optional[int]
  147. # Model specific
  148. backend_support: Optional[str]
  149. model_infos: Optional[Dict[str, Any]]
  150. siblings: Optional[List[RepoSibling]]
  151. # Content and settings
  152. tags: Optional[List[str]]
  153. # Additional flags
  154. is_accessible: Optional[int]
  155. # Revision and version info
  156. revision: Optional[str]
  157. # External references
  158. related_arxiv_id: Optional[List[str]]
  159. related_paper: Optional[List[int]]
  160. # latest commit infomation
  161. last_commit: Optional[Dict[str, Any]]
  162. sha: Optional[str]
  163. last_modified: Optional[datetime.datetime]
  164. def __init__(self, **kwargs):
  165. self.id = kwargs.pop('Id', None)
  166. self.name = kwargs.pop('Name', '')
  167. self.chinese_name = kwargs.pop('ChineseName', '')
  168. self.visibility = kwargs.pop('Visibility', None)
  169. self.is_published = kwargs.pop('IsPublished', None)
  170. self.is_online = kwargs.pop('IsOnline', None)
  171. self.already_star = kwargs.pop('AlreadyStar', None)
  172. self.description = kwargs.pop('Description', '')
  173. self.license = kwargs.pop('License', '')
  174. self.downloads = kwargs.pop('Downloads', None)
  175. self.likes = kwargs.pop('Stars', None) or kwargs.pop('Likes', None)
  176. created_time = kwargs.pop('CreatedTime', None)
  177. self.created_at = convert_timestamp(created_time) if created_time else None
  178. last_updated_time = kwargs.pop('LastUpdatedTime', None)
  179. self.last_updated_time = convert_timestamp(last_updated_time) if last_updated_time else None
  180. self.architectures = kwargs.pop('Architectures', [])
  181. self.model_type = kwargs.pop('ModelType', [])
  182. self.tasks = kwargs.pop('Tasks', [])
  183. self.readme_content = kwargs.pop('ReadMeContent', '')
  184. org_data = kwargs.pop('Organization', None)
  185. self.organization = OrganizationInfo(**org_data) if org_data else None
  186. self.created_by = kwargs.pop('CreatedBy', None)
  187. self.is_certification = kwargs.pop('IsCertification', None)
  188. self.approval_mode = kwargs.pop('ApprovalMode', None)
  189. self.card_ready = kwargs.pop('CardReady', None)
  190. self.backend_support = kwargs.pop('BackendSupport', '{}')
  191. self.model_infos = kwargs.pop('ModelInfos', {})
  192. self.tags = kwargs.pop('Tags', [])
  193. self.is_accessible = kwargs.pop('IsAccessible', None)
  194. self.revision = kwargs.pop('Revision', '')
  195. self.related_arxiv_id = kwargs.pop('RelatedArxivId', [])
  196. self.related_paper = kwargs.pop('RelatedPaper', [])
  197. commits = kwargs.pop('commits', None) or kwargs.pop('Commits', None)
  198. if commits and hasattr(commits, 'commits') and commits.commits:
  199. last_commit = commits.commits[0]
  200. self.last_commit = last_commit.to_dict() if hasattr(last_commit, 'to_dict') else None
  201. self.sha = self.last_commit.get('id') if self.last_commit else None
  202. self.last_modified = convert_timestamp(self.last_commit.get('committed_date')) if self.last_commit else None
  203. else:
  204. self.last_commit = None
  205. self.sha = None
  206. self.last_modified = None
  207. self.author = kwargs.pop('author', '')
  208. siblings = kwargs.pop('siblings', None) or kwargs.pop('Siblings', None)
  209. self.siblings = _parse_siblings(siblings)
  210. # backward compatibility
  211. self.__dict__.update(kwargs)
  212. @dataclass
  213. class DatasetInfo:
  214. """
  215. Contains detailed information about a dataset on ModelScope Hub. This object is returned by [`dataset_info`].
  216. Attributes:
  217. id (`int`, *optional*)): Dataset ID.
  218. name (`str`, *optional*)): Dataset name.
  219. author (`str`, *optional*): Dataset owner (user or organization).
  220. chinese_name (`str`, *optional*): Chinese display name.
  221. visibility (`int`, *optional*)): Visibility level (1=private, 3=interal, 5=public).
  222. 'internal' means visible to logged-in users only.
  223. already_star (`bool`, *optional*)): Whether current user has starred this dataset.
  224. description (`str`, *optional*): Dataset description.
  225. license (`str`, *optional*)): Dataset license.
  226. downloads (`int`, *optional*)): Number of downloads.
  227. likes (`int`, *optional*)): Number of likes.
  228. created_at (`int`, *optional*): Creation timestamp.
  229. last_updated_time (`int`, *optional*): Last update timestamp.
  230. readme_content (`str`, *optional*): README content.
  231. organization (`OrganizationInfo`, *optional*): Organization information.
  232. created_by (`str`, *optional*): Creator username.
  233. tags (`List[Dict[str, Any]]`): Dataset tags.
  234. last_commit (`Dict[str, Any]`, *optional*): Latest commit information.
  235. sha (`str`, *optional*): Latest commit SHA.
  236. last_modified (`datetime`, *optional*): Latest commit date.
  237. siblings (`List[RepoSibling]`, *optional*): Basic information about files in the dataset.
  238. """
  239. id: Optional[int]
  240. name: Optional[str]
  241. author: Optional[str]
  242. chinese_name: Optional[str]
  243. visibility: Optional[Literal[1, 3, 5]]
  244. already_star: Optional[bool]
  245. description: Optional[str]
  246. license: Optional[str]
  247. downloads: Optional[int]
  248. likes: Optional[int]
  249. created_at: Optional[datetime.datetime]
  250. last_updated_time: Optional[datetime.datetime]
  251. readme_content: Optional[str]
  252. organization: Optional[OrganizationInfo]
  253. created_by: Optional[str]
  254. tags: Optional[List[Dict[str, Any]]]
  255. last_commit: Optional[Dict[str, Any]]
  256. sha: Optional[str]
  257. last_modified: Optional[datetime.datetime]
  258. siblings: Optional[List[RepoSibling]]
  259. def __init__(self, **kwargs):
  260. self.id = kwargs.pop('Id', None)
  261. self.name = kwargs.pop('Name', '')
  262. self.author = kwargs.pop('author', kwargs.pop('Owner', None) or kwargs.pop('Namespace', None))
  263. self.chinese_name = kwargs.pop('ChineseName', '')
  264. self.visibility = kwargs.pop('Visibility', None)
  265. self.already_star = kwargs.pop('AlreadyStar', None)
  266. self.description = kwargs.pop('Description', '')
  267. self.likes = kwargs.pop('Likes', None) or kwargs.pop('Stars', None)
  268. self.license = kwargs.pop('License', '')
  269. self.downloads = kwargs.pop('Downloads', None)
  270. created_time = kwargs.pop('GmtCreate', None)
  271. self.created_at = convert_timestamp(created_time) if created_time else None
  272. last_updated_time = kwargs.pop('LastUpdatedTime', None)
  273. self.last_updated_time = convert_timestamp(last_updated_time) if last_updated_time else None
  274. self.readme_content = kwargs.pop('ReadMeContent', '')
  275. org_data = kwargs.pop('Organization', None)
  276. self.organization = OrganizationInfo(**org_data) if org_data else None
  277. self.created_by = kwargs.pop('CreatedBy', None)
  278. self.tags = kwargs.pop('Tags', [])
  279. commits = kwargs.pop('commits', None) or kwargs.pop('Commits', None)
  280. if commits and hasattr(commits, 'commits') and commits.commits:
  281. last_commit = commits.commits[0]
  282. self.last_commit = last_commit.to_dict() if hasattr(last_commit, 'to_dict') else None
  283. self.sha = self.last_commit.get('id') if self.last_commit else None
  284. self.last_modified = convert_timestamp(self.last_commit.get('committed_date')) if self.last_commit else None
  285. else:
  286. self.last_commit = None
  287. self.sha = None
  288. self.last_modified = None
  289. siblings = kwargs.pop('siblings', None) or kwargs.pop('Siblings', None)
  290. self.siblings = _parse_siblings(siblings)
  291. # backward compatibility
  292. self.__dict__.update(kwargs)