repocard.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827
  1. import os
  2. import re
  3. from pathlib import Path
  4. from typing import Any, Dict, Literal, Optional, Type, Union
  5. import requests
  6. import yaml
  7. from huggingface_hub.file_download import hf_hub_download
  8. from huggingface_hub.hf_api import upload_file
  9. from huggingface_hub.repocard_data import (
  10. CardData,
  11. DatasetCardData,
  12. EvalResult,
  13. ModelCardData,
  14. SpaceCardData,
  15. eval_results_to_model_index,
  16. model_index_to_eval_results,
  17. )
  18. from huggingface_hub.utils import get_session, is_jinja_available, yaml_dump
  19. from . import constants
  20. from .errors import EntryNotFoundError
  21. from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args
  22. logger = logging.get_logger(__name__)
  23. TEMPLATE_MODELCARD_PATH = Path(__file__).parent / "templates" / "modelcard_template.md"
  24. TEMPLATE_DATASETCARD_PATH = Path(__file__).parent / "templates" / "datasetcard_template.md"
  25. # exact same regex as in the Hub server. Please keep in sync.
  26. # See https://github.com/huggingface/moon-landing/blob/main/server/lib/ViewMarkdown.ts#L18
  27. REGEX_YAML_BLOCK = re.compile(r"^(\s*---[\r\n]+)([\S\s]*?)([\r\n]+---(\r\n|\n|$))")
  28. class RepoCard:
  29. card_data_class = CardData
  30. default_template_path = TEMPLATE_MODELCARD_PATH
  31. repo_type = "model"
  32. def __init__(self, content: str, ignore_metadata_errors: bool = False):
  33. """Initialize a RepoCard from string content. The content should be a
  34. Markdown file with a YAML block at the beginning and a Markdown body.
  35. Args:
  36. content (`str`): The content of the Markdown file.
  37. Example:
  38. ```python
  39. >>> from huggingface_hub.repocard import RepoCard
  40. >>> text = '''
  41. ... ---
  42. ... language: en
  43. ... license: mit
  44. ... ---
  45. ...
  46. ... # My repo
  47. ... '''
  48. >>> card = RepoCard(text)
  49. >>> card.data.to_dict()
  50. {'language': 'en', 'license': 'mit'}
  51. >>> card.text
  52. '\\n# My repo\\n'
  53. ```
  54. > [!TIP]
  55. > Raises the following error:
  56. >
  57. > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
  58. > when the content of the repo card metadata is not a dictionary.
  59. """
  60. # Set the content of the RepoCard, as well as underlying .data and .text attributes.
  61. # See the `content` property setter for more details.
  62. self.ignore_metadata_errors = ignore_metadata_errors
  63. self.content = content
  64. @property
  65. def content(self):
  66. """The content of the RepoCard, including the YAML block and the Markdown body."""
  67. line_break = _detect_line_ending(self._content) or "\n"
  68. return f"---{line_break}{self.data.to_yaml(line_break=line_break, original_order=self._original_order)}{line_break}---{line_break}{self.text}"
  69. @content.setter
  70. def content(self, content: str):
  71. """Set the content of the RepoCard."""
  72. self._content = content
  73. match = REGEX_YAML_BLOCK.search(content)
  74. if match:
  75. # Metadata found in the YAML block
  76. yaml_block = match.group(2)
  77. self.text = content[match.end() :]
  78. data_dict = yaml.safe_load(yaml_block)
  79. if data_dict is None:
  80. data_dict = {}
  81. # The YAML block's data should be a dictionary
  82. if not isinstance(data_dict, dict):
  83. raise ValueError("repo card metadata block should be a dict")
  84. else:
  85. # Model card without metadata... create empty metadata
  86. logger.warning("Repo card metadata block was not found. Setting CardData to empty.")
  87. data_dict = {}
  88. self.text = content
  89. self.data = self.card_data_class(**data_dict, ignore_metadata_errors=self.ignore_metadata_errors)
  90. self._original_order = list(data_dict.keys())
  91. def __str__(self):
  92. return self.content
  93. def save(self, filepath: Union[Path, str]):
  94. r"""Save a RepoCard to a file.
  95. Args:
  96. filepath (`Union[Path, str]`): Filepath to the markdown file to save.
  97. Example:
  98. ```python
  99. >>> from huggingface_hub.repocard import RepoCard
  100. >>> card = RepoCard("---\nlanguage: en\n---\n# This is a test repo card")
  101. >>> card.save("/tmp/test.md")
  102. ```
  103. """
  104. filepath = Path(filepath)
  105. filepath.parent.mkdir(parents=True, exist_ok=True)
  106. # Preserve newlines as in the existing file.
  107. with open(filepath, mode="w", newline="", encoding="utf-8") as f:
  108. f.write(str(self))
  109. @classmethod
  110. def load(
  111. cls,
  112. repo_id_or_path: Union[str, Path],
  113. repo_type: Optional[str] = None,
  114. token: Optional[str] = None,
  115. ignore_metadata_errors: bool = False,
  116. ):
  117. """Initialize a RepoCard from a Hugging Face Hub repo's README.md or a local filepath.
  118. Args:
  119. repo_id_or_path (`Union[str, Path]`):
  120. The repo ID associated with a Hugging Face Hub repo or a local filepath.
  121. repo_type (`str`, *optional*):
  122. The type of Hugging Face repo to push to. Defaults to None, which will use use "model". Other options
  123. are "dataset" and "space". Not used when loading from a local filepath. If this is called from a child
  124. class, the default value will be the child class's `repo_type`.
  125. token (`str`, *optional*):
  126. Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to the stored token.
  127. ignore_metadata_errors (`str`):
  128. If True, errors while parsing the metadata section will be ignored. Some information might be lost during
  129. the process. Use it at your own risk.
  130. Returns:
  131. [`huggingface_hub.repocard.RepoCard`]: The RepoCard (or subclass) initialized from the repo's
  132. README.md file or filepath.
  133. Example:
  134. ```python
  135. >>> from huggingface_hub.repocard import RepoCard
  136. >>> card = RepoCard.load("nateraw/food")
  137. >>> assert card.data.tags == ["generated_from_trainer", "image-classification", "pytorch"]
  138. ```
  139. """
  140. if Path(repo_id_or_path).is_file():
  141. card_path = Path(repo_id_or_path)
  142. elif isinstance(repo_id_or_path, str):
  143. card_path = Path(
  144. hf_hub_download(
  145. repo_id_or_path,
  146. constants.REPOCARD_NAME,
  147. repo_type=repo_type or cls.repo_type,
  148. token=token,
  149. )
  150. )
  151. else:
  152. raise ValueError(f"Cannot load RepoCard: path not found on disk ({repo_id_or_path}).")
  153. # Preserve newlines in the existing file.
  154. with card_path.open(mode="r", newline="", encoding="utf-8") as f:
  155. return cls(f.read(), ignore_metadata_errors=ignore_metadata_errors)
  156. def validate(self, repo_type: Optional[str] = None):
  157. """Validates card against Hugging Face Hub's card validation logic.
  158. Using this function requires access to the internet, so it is only called
  159. internally by [`huggingface_hub.repocard.RepoCard.push_to_hub`].
  160. Args:
  161. repo_type (`str`, *optional*, defaults to "model"):
  162. The type of Hugging Face repo to push to. Options are "model", "dataset", and "space".
  163. If this function is called from a child class, the default will be the child class's `repo_type`.
  164. > [!TIP]
  165. > Raises the following errors:
  166. >
  167. > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
  168. > if the card fails validation checks.
  169. > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
  170. > if the request to the Hub API fails for any other reason.
  171. """
  172. # If repo type is provided, otherwise, use the repo type of the card.
  173. repo_type = repo_type or self.repo_type
  174. body = {
  175. "repoType": repo_type,
  176. "content": str(self),
  177. }
  178. headers = {"Accept": "text/plain"}
  179. try:
  180. r = get_session().post("https://huggingface.co/api/validate-yaml", body, headers=headers)
  181. r.raise_for_status()
  182. except requests.exceptions.HTTPError as exc:
  183. if r.status_code == 400:
  184. raise ValueError(r.text)
  185. else:
  186. raise exc
  187. def push_to_hub(
  188. self,
  189. repo_id: str,
  190. token: Optional[str] = None,
  191. repo_type: Optional[str] = None,
  192. commit_message: Optional[str] = None,
  193. commit_description: Optional[str] = None,
  194. revision: Optional[str] = None,
  195. create_pr: Optional[bool] = None,
  196. parent_commit: Optional[str] = None,
  197. ):
  198. """Push a RepoCard to a Hugging Face Hub repo.
  199. Args:
  200. repo_id (`str`):
  201. The repo ID of the Hugging Face Hub repo to push to. Example: "nateraw/food".
  202. token (`str`, *optional*):
  203. Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to
  204. the stored token.
  205. repo_type (`str`, *optional*, defaults to "model"):
  206. The type of Hugging Face repo to push to. Options are "model", "dataset", and "space". If this
  207. function is called by a child class, it will default to the child class's `repo_type`.
  208. commit_message (`str`, *optional*):
  209. The summary / title / first line of the generated commit.
  210. commit_description (`str`, *optional*)
  211. The description of the generated commit.
  212. revision (`str`, *optional*):
  213. The git revision to commit from. Defaults to the head of the `"main"` branch.
  214. create_pr (`bool`, *optional*):
  215. Whether or not to create a Pull Request with this commit. Defaults to `False`.
  216. parent_commit (`str`, *optional*):
  217. The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
  218. If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
  219. If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
  220. Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
  221. especially useful if the repo is updated / committed to concurrently.
  222. Returns:
  223. `str`: URL of the commit which updated the card metadata.
  224. """
  225. # If repo type is provided, otherwise, use the repo type of the card.
  226. repo_type = repo_type or self.repo_type
  227. # Validate card before pushing to hub
  228. self.validate(repo_type=repo_type)
  229. with SoftTemporaryDirectory() as tmpdir:
  230. tmp_path = Path(tmpdir) / constants.REPOCARD_NAME
  231. tmp_path.write_text(str(self), encoding="utf-8")
  232. url = upload_file(
  233. path_or_fileobj=str(tmp_path),
  234. path_in_repo=constants.REPOCARD_NAME,
  235. repo_id=repo_id,
  236. token=token,
  237. repo_type=repo_type,
  238. commit_message=commit_message,
  239. commit_description=commit_description,
  240. create_pr=create_pr,
  241. revision=revision,
  242. parent_commit=parent_commit,
  243. )
  244. return url
  245. @classmethod
  246. def from_template(
  247. cls,
  248. card_data: CardData,
  249. template_path: Optional[str] = None,
  250. template_str: Optional[str] = None,
  251. **template_kwargs,
  252. ):
  253. """Initialize a RepoCard from a template. By default, it uses the default template.
  254. Templates are Jinja2 templates that can be customized by passing keyword arguments.
  255. Args:
  256. card_data (`huggingface_hub.CardData`):
  257. A huggingface_hub.CardData instance containing the metadata you want to include in the YAML
  258. header of the repo card on the Hugging Face Hub.
  259. template_path (`str`, *optional*):
  260. A path to a markdown file with optional Jinja template variables that can be filled
  261. in with `template_kwargs`. Defaults to the default template.
  262. Returns:
  263. [`huggingface_hub.repocard.RepoCard`]: A RepoCard instance with the specified card data and content from the
  264. template.
  265. """
  266. if is_jinja_available():
  267. import jinja2
  268. else:
  269. raise ImportError(
  270. "Using RepoCard.from_template requires Jinja2 to be installed. Please"
  271. " install it with `pip install Jinja2`."
  272. )
  273. kwargs = card_data.to_dict().copy()
  274. kwargs.update(template_kwargs) # Template_kwargs have priority
  275. if template_path is not None:
  276. template_str = Path(template_path).read_text()
  277. if template_str is None:
  278. template_str = Path(cls.default_template_path).read_text()
  279. template = jinja2.Template(template_str)
  280. content = template.render(card_data=card_data.to_yaml(), **kwargs)
  281. return cls(content)
  282. class ModelCard(RepoCard):
  283. card_data_class = ModelCardData
  284. default_template_path = TEMPLATE_MODELCARD_PATH
  285. repo_type = "model"
  286. @classmethod
  287. def from_template( # type: ignore # violates Liskov property but easier to use
  288. cls,
  289. card_data: ModelCardData,
  290. template_path: Optional[str] = None,
  291. template_str: Optional[str] = None,
  292. **template_kwargs,
  293. ):
  294. """Initialize a ModelCard from a template. By default, it uses the default template, which can be found here:
  295. https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md
  296. Templates are Jinja2 templates that can be customized by passing keyword arguments.
  297. Args:
  298. card_data (`huggingface_hub.ModelCardData`):
  299. A huggingface_hub.ModelCardData instance containing the metadata you want to include in the YAML
  300. header of the model card on the Hugging Face Hub.
  301. template_path (`str`, *optional*):
  302. A path to a markdown file with optional Jinja template variables that can be filled
  303. in with `template_kwargs`. Defaults to the default template.
  304. Returns:
  305. [`huggingface_hub.ModelCard`]: A ModelCard instance with the specified card data and content from the
  306. template.
  307. Example:
  308. ```python
  309. >>> from huggingface_hub import ModelCard, ModelCardData, EvalResult
  310. >>> # Using the Default Template
  311. >>> card_data = ModelCardData(
  312. ... language='en',
  313. ... license='mit',
  314. ... library_name='timm',
  315. ... tags=['image-classification', 'resnet'],
  316. ... datasets=['beans'],
  317. ... metrics=['accuracy'],
  318. ... )
  319. >>> card = ModelCard.from_template(
  320. ... card_data,
  321. ... model_description='This model does x + y...'
  322. ... )
  323. >>> # Including Evaluation Results
  324. >>> card_data = ModelCardData(
  325. ... language='en',
  326. ... tags=['image-classification', 'resnet'],
  327. ... eval_results=[
  328. ... EvalResult(
  329. ... task_type='image-classification',
  330. ... dataset_type='beans',
  331. ... dataset_name='Beans',
  332. ... metric_type='accuracy',
  333. ... metric_value=0.9,
  334. ... ),
  335. ... ],
  336. ... model_name='my-cool-model',
  337. ... )
  338. >>> card = ModelCard.from_template(card_data)
  339. >>> # Using a Custom Template
  340. >>> card_data = ModelCardData(
  341. ... language='en',
  342. ... tags=['image-classification', 'resnet']
  343. ... )
  344. >>> card = ModelCard.from_template(
  345. ... card_data=card_data,
  346. ... template_path='./src/huggingface_hub/templates/modelcard_template.md',
  347. ... custom_template_var='custom value', # will be replaced in template if it exists
  348. ... )
  349. ```
  350. """
  351. return super().from_template(card_data, template_path, template_str, **template_kwargs)
  352. class DatasetCard(RepoCard):
  353. card_data_class = DatasetCardData
  354. default_template_path = TEMPLATE_DATASETCARD_PATH
  355. repo_type = "dataset"
  356. @classmethod
  357. def from_template( # type: ignore # violates Liskov property but easier to use
  358. cls,
  359. card_data: DatasetCardData,
  360. template_path: Optional[str] = None,
  361. template_str: Optional[str] = None,
  362. **template_kwargs,
  363. ):
  364. """Initialize a DatasetCard from a template. By default, it uses the default template, which can be found here:
  365. https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/datasetcard_template.md
  366. Templates are Jinja2 templates that can be customized by passing keyword arguments.
  367. Args:
  368. card_data (`huggingface_hub.DatasetCardData`):
  369. A huggingface_hub.DatasetCardData instance containing the metadata you want to include in the YAML
  370. header of the dataset card on the Hugging Face Hub.
  371. template_path (`str`, *optional*):
  372. A path to a markdown file with optional Jinja template variables that can be filled
  373. in with `template_kwargs`. Defaults to the default template.
  374. Returns:
  375. [`huggingface_hub.DatasetCard`]: A DatasetCard instance with the specified card data and content from the
  376. template.
  377. Example:
  378. ```python
  379. >>> from huggingface_hub import DatasetCard, DatasetCardData
  380. >>> # Using the Default Template
  381. >>> card_data = DatasetCardData(
  382. ... language='en',
  383. ... license='mit',
  384. ... annotations_creators='crowdsourced',
  385. ... task_categories=['text-classification'],
  386. ... task_ids=['sentiment-classification', 'text-scoring'],
  387. ... multilinguality='monolingual',
  388. ... pretty_name='My Text Classification Dataset',
  389. ... )
  390. >>> card = DatasetCard.from_template(
  391. ... card_data,
  392. ... pretty_name=card_data.pretty_name,
  393. ... )
  394. >>> # Using a Custom Template
  395. >>> card_data = DatasetCardData(
  396. ... language='en',
  397. ... license='mit',
  398. ... )
  399. >>> card = DatasetCard.from_template(
  400. ... card_data=card_data,
  401. ... template_path='./src/huggingface_hub/templates/datasetcard_template.md',
  402. ... custom_template_var='custom value', # will be replaced in template if it exists
  403. ... )
  404. ```
  405. """
  406. return super().from_template(card_data, template_path, template_str, **template_kwargs)
  407. class SpaceCard(RepoCard):
  408. card_data_class = SpaceCardData
  409. default_template_path = TEMPLATE_MODELCARD_PATH
  410. repo_type = "space"
  411. def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]: # noqa: F722
  412. """Detect the line ending of a string. Used by RepoCard to avoid making huge diff on newlines.
  413. Uses same implementation as in Hub server, keep it in sync.
  414. Returns:
  415. str: The detected line ending of the string.
  416. """
  417. cr = content.count("\r")
  418. lf = content.count("\n")
  419. crlf = content.count("\r\n")
  420. if cr + lf == 0:
  421. return None
  422. if crlf == cr and crlf == lf:
  423. return "\r\n"
  424. if cr > lf:
  425. return "\r"
  426. else:
  427. return "\n"
  428. def metadata_load(local_path: Union[str, Path]) -> Optional[Dict]:
  429. content = Path(local_path).read_text()
  430. match = REGEX_YAML_BLOCK.search(content)
  431. if match:
  432. yaml_block = match.group(2)
  433. data = yaml.safe_load(yaml_block)
  434. if data is None or isinstance(data, dict):
  435. return data
  436. raise ValueError("repo card metadata block should be a dict")
  437. else:
  438. return None
  439. def metadata_save(local_path: Union[str, Path], data: Dict) -> None:
  440. """
  441. Save the metadata dict in the upper YAML part Trying to preserve newlines as
  442. in the existing file. Docs about open() with newline="" parameter:
  443. https://docs.python.org/3/library/functions.html?highlight=open#open Does
  444. not work with "^M" linebreaks, which are replaced by \n
  445. """
  446. line_break = "\n"
  447. content = ""
  448. # try to detect existing newline character
  449. if os.path.exists(local_path):
  450. with open(local_path, "r", newline="", encoding="utf8") as readme:
  451. content = readme.read()
  452. if isinstance(readme.newlines, tuple):
  453. line_break = readme.newlines[0]
  454. elif isinstance(readme.newlines, str):
  455. line_break = readme.newlines
  456. # creates a new file if it not
  457. with open(local_path, "w", newline="", encoding="utf8") as readme:
  458. data_yaml = yaml_dump(data, sort_keys=False, line_break=line_break)
  459. # sort_keys: keep dict order
  460. match = REGEX_YAML_BLOCK.search(content)
  461. if match:
  462. output = content[: match.start()] + f"---{line_break}{data_yaml}---{line_break}" + content[match.end() :]
  463. else:
  464. output = f"---{line_break}{data_yaml}---{line_break}{content}"
  465. readme.write(output)
  466. readme.close()
  467. def metadata_eval_result(
  468. *,
  469. model_pretty_name: str,
  470. task_pretty_name: str,
  471. task_id: str,
  472. metrics_pretty_name: str,
  473. metrics_id: str,
  474. metrics_value: Any,
  475. dataset_pretty_name: str,
  476. dataset_id: str,
  477. metrics_config: Optional[str] = None,
  478. metrics_verified: bool = False,
  479. dataset_config: Optional[str] = None,
  480. dataset_split: Optional[str] = None,
  481. dataset_revision: Optional[str] = None,
  482. metrics_verification_token: Optional[str] = None,
  483. ) -> Dict:
  484. """
  485. Creates a metadata dict with the result from a model evaluated on a dataset.
  486. Args:
  487. model_pretty_name (`str`):
  488. The name of the model in natural language.
  489. task_pretty_name (`str`):
  490. The name of a task in natural language.
  491. task_id (`str`):
  492. Example: automatic-speech-recognition. A task id.
  493. metrics_pretty_name (`str`):
  494. A name for the metric in natural language. Example: Test WER.
  495. metrics_id (`str`):
  496. Example: wer. A metric id from https://hf.co/metrics.
  497. metrics_value (`Any`):
  498. The value from the metric. Example: 20.0 or "20.0 ± 1.2".
  499. dataset_pretty_name (`str`):
  500. The name of the dataset in natural language.
  501. dataset_id (`str`):
  502. Example: common_voice. A dataset id from https://hf.co/datasets.
  503. metrics_config (`str`, *optional*):
  504. The name of the metric configuration used in `load_metric()`.
  505. Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
  506. metrics_verified (`bool`, *optional*, defaults to `False`):
  507. Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
  508. dataset_config (`str`, *optional*):
  509. Example: fr. The name of the dataset configuration used in `load_dataset()`.
  510. dataset_split (`str`, *optional*):
  511. Example: test. The name of the dataset split used in `load_dataset()`.
  512. dataset_revision (`str`, *optional*):
  513. Example: 5503434ddd753f426f4b38109466949a1217c2bb. The name of the dataset dataset revision
  514. used in `load_dataset()`.
  515. metrics_verification_token (`bool`, *optional*):
  516. A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.
  517. Returns:
  518. `dict`: a metadata dict with the result from a model evaluated on a dataset.
  519. Example:
  520. ```python
  521. >>> from huggingface_hub import metadata_eval_result
  522. >>> results = metadata_eval_result(
  523. ... model_pretty_name="RoBERTa fine-tuned on ReactionGIF",
  524. ... task_pretty_name="Text Classification",
  525. ... task_id="text-classification",
  526. ... metrics_pretty_name="Accuracy",
  527. ... metrics_id="accuracy",
  528. ... metrics_value=0.2662102282047272,
  529. ... dataset_pretty_name="ReactionJPEG",
  530. ... dataset_id="julien-c/reactionjpeg",
  531. ... dataset_config="default",
  532. ... dataset_split="test",
  533. ... )
  534. >>> results == {
  535. ... 'model-index': [
  536. ... {
  537. ... 'name': 'RoBERTa fine-tuned on ReactionGIF',
  538. ... 'results': [
  539. ... {
  540. ... 'task': {
  541. ... 'type': 'text-classification',
  542. ... 'name': 'Text Classification'
  543. ... },
  544. ... 'dataset': {
  545. ... 'name': 'ReactionJPEG',
  546. ... 'type': 'julien-c/reactionjpeg',
  547. ... 'config': 'default',
  548. ... 'split': 'test'
  549. ... },
  550. ... 'metrics': [
  551. ... {
  552. ... 'type': 'accuracy',
  553. ... 'value': 0.2662102282047272,
  554. ... 'name': 'Accuracy',
  555. ... 'verified': False
  556. ... }
  557. ... ]
  558. ... }
  559. ... ]
  560. ... }
  561. ... ]
  562. ... }
  563. True
  564. ```
  565. """
  566. return {
  567. "model-index": eval_results_to_model_index(
  568. model_name=model_pretty_name,
  569. eval_results=[
  570. EvalResult(
  571. task_name=task_pretty_name,
  572. task_type=task_id,
  573. metric_name=metrics_pretty_name,
  574. metric_type=metrics_id,
  575. metric_value=metrics_value,
  576. dataset_name=dataset_pretty_name,
  577. dataset_type=dataset_id,
  578. metric_config=metrics_config,
  579. verified=metrics_verified,
  580. verify_token=metrics_verification_token,
  581. dataset_config=dataset_config,
  582. dataset_split=dataset_split,
  583. dataset_revision=dataset_revision,
  584. )
  585. ],
  586. )
  587. }
  588. @validate_hf_hub_args
  589. def metadata_update(
  590. repo_id: str,
  591. metadata: Dict,
  592. *,
  593. repo_type: Optional[str] = None,
  594. overwrite: bool = False,
  595. token: Optional[str] = None,
  596. commit_message: Optional[str] = None,
  597. commit_description: Optional[str] = None,
  598. revision: Optional[str] = None,
  599. create_pr: bool = False,
  600. parent_commit: Optional[str] = None,
  601. ) -> str:
  602. """
  603. Updates the metadata in the README.md of a repository on the Hugging Face Hub.
  604. If the README.md file doesn't exist yet, a new one is created with metadata and an
  605. the default ModelCard or DatasetCard template. For `space` repo, an error is thrown
  606. as a Space cannot exist without a `README.md` file.
  607. Args:
  608. repo_id (`str`):
  609. The name of the repository.
  610. metadata (`dict`):
  611. A dictionary containing the metadata to be updated.
  612. repo_type (`str`, *optional*):
  613. Set to `"dataset"` or `"space"` if updating to a dataset or space,
  614. `None` or `"model"` if updating to a model. Default is `None`.
  615. overwrite (`bool`, *optional*, defaults to `False`):
  616. If set to `True` an existing field can be overwritten, otherwise
  617. attempting to overwrite an existing field will cause an error.
  618. token (`str`, *optional*):
  619. The Hugging Face authentication token.
  620. commit_message (`str`, *optional*):
  621. The summary / title / first line of the generated commit. Defaults to
  622. `f"Update metadata with huggingface_hub"`
  623. commit_description (`str` *optional*)
  624. The description of the generated commit
  625. revision (`str`, *optional*):
  626. The git revision to commit from. Defaults to the head of the
  627. `"main"` branch.
  628. create_pr (`boolean`, *optional*):
  629. Whether or not to create a Pull Request from `revision` with that commit.
  630. Defaults to `False`.
  631. parent_commit (`str`, *optional*):
  632. The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
  633. If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
  634. If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
  635. Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
  636. especially useful if the repo is updated / committed to concurrently.
  637. Returns:
  638. `str`: URL of the commit which updated the card metadata.
  639. Example:
  640. ```python
  641. >>> from huggingface_hub import metadata_update
  642. >>> metadata = {'model-index': [{'name': 'RoBERTa fine-tuned on ReactionGIF',
  643. ... 'results': [{'dataset': {'name': 'ReactionGIF',
  644. ... 'type': 'julien-c/reactiongif'},
  645. ... 'metrics': [{'name': 'Recall',
  646. ... 'type': 'recall',
  647. ... 'value': 0.7762102282047272}],
  648. ... 'task': {'name': 'Text Classification',
  649. ... 'type': 'text-classification'}}]}]}
  650. >>> url = metadata_update("hf-internal-testing/reactiongif-roberta-card", metadata)
  651. ```
  652. """
  653. commit_message = commit_message if commit_message is not None else "Update metadata with huggingface_hub"
  654. # Card class given repo_type
  655. card_class: Type[RepoCard]
  656. if repo_type is None or repo_type == "model":
  657. card_class = ModelCard
  658. elif repo_type == "dataset":
  659. card_class = DatasetCard
  660. elif repo_type == "space":
  661. card_class = RepoCard
  662. else:
  663. raise ValueError(f"Unknown repo_type: {repo_type}")
  664. # Either load repo_card from the Hub or create an empty one.
  665. # NOTE: Will not create the repo if it doesn't exist.
  666. try:
  667. card = card_class.load(repo_id, token=token, repo_type=repo_type)
  668. except EntryNotFoundError:
  669. if repo_type == "space":
  670. raise ValueError("Cannot update metadata on a Space that doesn't contain a `README.md` file.")
  671. # Initialize a ModelCard or DatasetCard from default template and no data.
  672. # Cast to the concrete expected card type to satisfy type checkers.
  673. card = card_class.from_template(CardData()) # type: ignore[return-value]
  674. for key, value in metadata.items():
  675. if key == "model-index":
  676. # if the new metadata doesn't include a name, either use existing one or repo name
  677. if "name" not in value[0]:
  678. value[0]["name"] = getattr(card, "model_name", repo_id)
  679. model_name, new_results = model_index_to_eval_results(value)
  680. if card.data.eval_results is None:
  681. card.data.eval_results = new_results
  682. card.data.model_name = model_name
  683. else:
  684. existing_results = card.data.eval_results
  685. # Iterate over new results
  686. # Iterate over existing results
  687. # If both results describe the same metric but value is different:
  688. # If overwrite=True: overwrite the metric value
  689. # Else: raise ValueError
  690. # Else: append new result to existing ones.
  691. for new_result in new_results:
  692. result_found = False
  693. for existing_result in existing_results:
  694. if new_result.is_equal_except_value(existing_result):
  695. if new_result != existing_result and not overwrite:
  696. raise ValueError(
  697. "You passed a new value for the existing metric"
  698. f" 'name: {new_result.metric_name}, type: "
  699. f"{new_result.metric_type}'. Set `overwrite=True`"
  700. " to overwrite existing metrics."
  701. )
  702. result_found = True
  703. existing_result.metric_value = new_result.metric_value
  704. if existing_result.verified is True:
  705. existing_result.verify_token = new_result.verify_token
  706. if not result_found:
  707. card.data.eval_results.append(new_result)
  708. else:
  709. # Any metadata that is not a result metric
  710. if card.data.get(key) is not None and not overwrite and card.data.get(key) != value:
  711. raise ValueError(
  712. f"You passed a new value for the existing meta data field '{key}'."
  713. " Set `overwrite=True` to overwrite existing metadata."
  714. )
  715. else:
  716. card.data[key] = value
  717. return card.push_to_hub(
  718. repo_id,
  719. token=token,
  720. repo_type=repo_type,
  721. commit_message=commit_message,
  722. commit_description=commit_description,
  723. create_pr=create_pr,
  724. revision=revision,
  725. parent_commit=parent_commit,
  726. )