|
import os |
|
import re |
|
import warnings |
|
from pathlib import Path |
|
from typing import Any, Dict, Literal, Optional, Type, Union |
|
|
|
import requests |
|
import yaml |
|
|
|
from huggingface_hub.file_download import hf_hub_download |
|
from huggingface_hub.hf_api import upload_file |
|
from huggingface_hub.repocard_data import ( |
|
CardData, |
|
DatasetCardData, |
|
EvalResult, |
|
ModelCardData, |
|
SpaceCardData, |
|
eval_results_to_model_index, |
|
model_index_to_eval_results, |
|
) |
|
from huggingface_hub.utils import get_session, is_jinja_available, yaml_dump |
|
|
|
from .constants import REPOCARD_NAME |
|
from .utils import EntryNotFoundError, SoftTemporaryDirectory, validate_hf_hub_args |
|
|
|
|
|
TEMPLATE_MODELCARD_PATH = Path(__file__).parent / "templates" / "modelcard_template.md" |
|
TEMPLATE_DATASETCARD_PATH = Path(__file__).parent / "templates" / "datasetcard_template.md" |
|
|
|
|
|
|
|
REGEX_YAML_BLOCK = re.compile(r"^(\s*---[\r\n]+)([\S\s]*?)([\r\n]+---(\r\n|\n|$))") |
|
|
|
|
|
class RepoCard: |
|
card_data_class = CardData |
|
default_template_path = TEMPLATE_MODELCARD_PATH |
|
repo_type = "model" |
|
|
|
def __init__(self, content: str, ignore_metadata_errors: bool = False): |
|
"""Initialize a RepoCard from string content. The content should be a |
|
Markdown file with a YAML block at the beginning and a Markdown body. |
|
|
|
Args: |
|
content (`str`): The content of the Markdown file. |
|
|
|
Example: |
|
```python |
|
>>> from huggingface_hub.repocard import RepoCard |
|
>>> text = ''' |
|
... --- |
|
... language: en |
|
... license: mit |
|
... --- |
|
... |
|
... # My repo |
|
... ''' |
|
>>> card = RepoCard(text) |
|
>>> card.data.to_dict() |
|
{'language': 'en', 'license': 'mit'} |
|
>>> card.text |
|
'\\n# My repo\\n' |
|
|
|
``` |
|
<Tip> |
|
Raises the following error: |
|
|
|
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
|
when the content of the repo card metadata is not a dictionary. |
|
|
|
</Tip> |
|
""" |
|
|
|
|
|
|
|
self.ignore_metadata_errors = ignore_metadata_errors |
|
self.content = content |
|
|
|
@property |
|
def content(self): |
|
"""The content of the RepoCard, including the YAML block and the Markdown body.""" |
|
line_break = _detect_line_ending(self._content) or "\n" |
|
return f"---{line_break}{self.data.to_yaml(line_break=line_break)}{line_break}---{line_break}{self.text}" |
|
|
|
@content.setter |
|
def content(self, content: str): |
|
"""Set the content of the RepoCard.""" |
|
self._content = content |
|
|
|
match = REGEX_YAML_BLOCK.search(content) |
|
if match: |
|
|
|
yaml_block = match.group(2) |
|
self.text = content[match.end() :] |
|
data_dict = yaml.safe_load(yaml_block) |
|
|
|
if data_dict is None: |
|
data_dict = {} |
|
|
|
|
|
if not isinstance(data_dict, dict): |
|
raise ValueError("repo card metadata block should be a dict") |
|
else: |
|
|
|
warnings.warn("Repo card metadata block was not found. Setting CardData to empty.") |
|
data_dict = {} |
|
self.text = content |
|
|
|
self.data = self.card_data_class(**data_dict, ignore_metadata_errors=self.ignore_metadata_errors) |
|
|
|
def __str__(self): |
|
return self.content |
|
|
|
def save(self, filepath: Union[Path, str]): |
|
r"""Save a RepoCard to a file. |
|
|
|
Args: |
|
filepath (`Union[Path, str]`): Filepath to the markdown file to save. |
|
|
|
Example: |
|
```python |
|
>>> from huggingface_hub.repocard import RepoCard |
|
>>> card = RepoCard("---\nlanguage: en\n---\n# This is a test repo card") |
|
>>> card.save("/tmp/test.md") |
|
|
|
``` |
|
""" |
|
filepath = Path(filepath) |
|
filepath.parent.mkdir(parents=True, exist_ok=True) |
|
|
|
with open(filepath, mode="w", newline="", encoding="utf-8") as f: |
|
f.write(str(self)) |
|
|
|
@classmethod |
|
def load( |
|
cls, |
|
repo_id_or_path: Union[str, Path], |
|
repo_type: Optional[str] = None, |
|
token: Optional[str] = None, |
|
ignore_metadata_errors: bool = False, |
|
): |
|
"""Initialize a RepoCard from a Hugging Face Hub repo's README.md or a local filepath. |
|
|
|
Args: |
|
repo_id_or_path (`Union[str, Path]`): |
|
The repo ID associated with a Hugging Face Hub repo or a local filepath. |
|
repo_type (`str`, *optional*): |
|
The type of Hugging Face repo to push to. Defaults to None, which will use use "model". Other options |
|
are "dataset" and "space". Not used when loading from a local filepath. If this is called from a child |
|
class, the default value will be the child class's `repo_type`. |
|
token (`str`, *optional*): |
|
Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to the stored token. |
|
ignore_metadata_errors (`str`): |
|
If True, errors while parsing the metadata section will be ignored. Some information might be lost during |
|
the process. Use it at your own risk. |
|
|
|
Returns: |
|
[`huggingface_hub.repocard.RepoCard`]: The RepoCard (or subclass) initialized from the repo's |
|
README.md file or filepath. |
|
|
|
Example: |
|
```python |
|
>>> from huggingface_hub.repocard import RepoCard |
|
>>> card = RepoCard.load("nateraw/food") |
|
>>> assert card.data.tags == ["generated_from_trainer", "image-classification", "pytorch"] |
|
|
|
``` |
|
""" |
|
|
|
if Path(repo_id_or_path).exists(): |
|
card_path = Path(repo_id_or_path) |
|
elif isinstance(repo_id_or_path, str): |
|
card_path = Path( |
|
hf_hub_download( |
|
repo_id_or_path, |
|
REPOCARD_NAME, |
|
repo_type=repo_type or cls.repo_type, |
|
token=token, |
|
) |
|
) |
|
else: |
|
raise ValueError(f"Cannot load RepoCard: path not found on disk ({repo_id_or_path}).") |
|
|
|
|
|
with card_path.open(mode="r", newline="", encoding="utf-8") as f: |
|
return cls(f.read(), ignore_metadata_errors=ignore_metadata_errors) |
|
|
|
def validate(self, repo_type: Optional[str] = None): |
|
"""Validates card against Hugging Face Hub's card validation logic. |
|
Using this function requires access to the internet, so it is only called |
|
internally by [`huggingface_hub.repocard.RepoCard.push_to_hub`]. |
|
|
|
Args: |
|
repo_type (`str`, *optional*, defaults to "model"): |
|
The type of Hugging Face repo to push to. Options are "model", "dataset", and "space". |
|
If this function is called from a child class, the default will be the child class's `repo_type`. |
|
|
|
<Tip> |
|
Raises the following errors: |
|
|
|
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
|
if the card fails validation checks. |
|
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) |
|
if the request to the Hub API fails for any other reason. |
|
|
|
</Tip> |
|
""" |
|
|
|
|
|
repo_type = repo_type or self.repo_type |
|
|
|
body = { |
|
"repoType": repo_type, |
|
"content": str(self), |
|
} |
|
headers = {"Accept": "text/plain"} |
|
|
|
try: |
|
r = get_session().post("https://huggingface.co/api/validate-yaml", body, headers=headers) |
|
r.raise_for_status() |
|
except requests.exceptions.HTTPError as exc: |
|
if r.status_code == 400: |
|
raise ValueError(r.text) |
|
else: |
|
raise exc |
|
|
|
def push_to_hub( |
|
self, |
|
repo_id: str, |
|
token: Optional[str] = None, |
|
repo_type: Optional[str] = None, |
|
commit_message: Optional[str] = None, |
|
commit_description: Optional[str] = None, |
|
revision: Optional[str] = None, |
|
create_pr: Optional[bool] = None, |
|
parent_commit: Optional[str] = None, |
|
): |
|
"""Push a RepoCard to a Hugging Face Hub repo. |
|
|
|
Args: |
|
repo_id (`str`): |
|
The repo ID of the Hugging Face Hub repo to push to. Example: "nateraw/food". |
|
token (`str`, *optional*): |
|
Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to |
|
the stored token. |
|
repo_type (`str`, *optional*, defaults to "model"): |
|
The type of Hugging Face repo to push to. Options are "model", "dataset", and "space". If this |
|
function is called by a child class, it will default to the child class's `repo_type`. |
|
commit_message (`str`, *optional*): |
|
The summary / title / first line of the generated commit. |
|
commit_description (`str`, *optional*) |
|
The description of the generated commit. |
|
revision (`str`, *optional*): |
|
The git revision to commit from. Defaults to the head of the `"main"` branch. |
|
create_pr (`bool`, *optional*): |
|
Whether or not to create a Pull Request with this commit. Defaults to `False`. |
|
parent_commit (`str`, *optional*): |
|
The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. |
|
If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. |
|
If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. |
|
Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be |
|
especially useful if the repo is updated / committed to concurrently. |
|
Returns: |
|
`str`: URL of the commit which updated the card metadata. |
|
""" |
|
|
|
|
|
repo_type = repo_type or self.repo_type |
|
|
|
|
|
self.validate(repo_type=repo_type) |
|
|
|
with SoftTemporaryDirectory() as tmpdir: |
|
tmp_path = Path(tmpdir) / REPOCARD_NAME |
|
tmp_path.write_text(str(self)) |
|
url = upload_file( |
|
path_or_fileobj=str(tmp_path), |
|
path_in_repo=REPOCARD_NAME, |
|
repo_id=repo_id, |
|
token=token, |
|
repo_type=repo_type, |
|
commit_message=commit_message, |
|
commit_description=commit_description, |
|
create_pr=create_pr, |
|
revision=revision, |
|
parent_commit=parent_commit, |
|
) |
|
return url |
|
|
|
@classmethod |
|
def from_template( |
|
cls, |
|
card_data: CardData, |
|
template_path: Optional[str] = None, |
|
**template_kwargs, |
|
): |
|
"""Initialize a RepoCard from a template. By default, it uses the default template. |
|
|
|
Templates are Jinja2 templates that can be customized by passing keyword arguments. |
|
|
|
Args: |
|
card_data (`huggingface_hub.CardData`): |
|
A huggingface_hub.CardData instance containing the metadata you want to include in the YAML |
|
header of the repo card on the Hugging Face Hub. |
|
template_path (`str`, *optional*): |
|
A path to a markdown file with optional Jinja template variables that can be filled |
|
in with `template_kwargs`. Defaults to the default template. |
|
|
|
Returns: |
|
[`huggingface_hub.repocard.RepoCard`]: A RepoCard instance with the specified card data and content from the |
|
template. |
|
""" |
|
if is_jinja_available(): |
|
import jinja2 |
|
else: |
|
raise ImportError( |
|
"Using RepoCard.from_template requires Jinja2 to be installed. Please" |
|
" install it with `pip install Jinja2`." |
|
) |
|
|
|
kwargs = card_data.to_dict().copy() |
|
kwargs.update(template_kwargs) |
|
template = jinja2.Template(Path(template_path or cls.default_template_path).read_text()) |
|
content = template.render(card_data=card_data.to_yaml(), **kwargs) |
|
return cls(content) |
|
|
|
|
|
class ModelCard(RepoCard): |
|
card_data_class = ModelCardData |
|
default_template_path = TEMPLATE_MODELCARD_PATH |
|
repo_type = "model" |
|
|
|
@classmethod |
|
def from_template( |
|
cls, |
|
card_data: ModelCardData, |
|
template_path: Optional[str] = None, |
|
**template_kwargs, |
|
): |
|
"""Initialize a ModelCard from a template. By default, it uses the default template, which can be found here: |
|
https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md |
|
|
|
Templates are Jinja2 templates that can be customized by passing keyword arguments. |
|
|
|
Args: |
|
card_data (`huggingface_hub.ModelCardData`): |
|
A huggingface_hub.ModelCardData instance containing the metadata you want to include in the YAML |
|
header of the model card on the Hugging Face Hub. |
|
template_path (`str`, *optional*): |
|
A path to a markdown file with optional Jinja template variables that can be filled |
|
in with `template_kwargs`. Defaults to the default template. |
|
|
|
Returns: |
|
[`huggingface_hub.ModelCard`]: A ModelCard instance with the specified card data and content from the |
|
template. |
|
|
|
Example: |
|
```python |
|
>>> from huggingface_hub import ModelCard, ModelCardData, EvalResult |
|
|
|
>>> # Using the Default Template |
|
>>> card_data = ModelCardData( |
|
... language='en', |
|
... license='mit', |
|
... library_name='timm', |
|
... tags=['image-classification', 'resnet'], |
|
... datasets=['beans'], |
|
... metrics=['accuracy'], |
|
... ) |
|
>>> card = ModelCard.from_template( |
|
... card_data, |
|
... model_description='This model does x + y...' |
|
... ) |
|
|
|
>>> # Including Evaluation Results |
|
>>> card_data = ModelCardData( |
|
... language='en', |
|
... tags=['image-classification', 'resnet'], |
|
... eval_results=[ |
|
... EvalResult( |
|
... task_type='image-classification', |
|
... dataset_type='beans', |
|
... dataset_name='Beans', |
|
... metric_type='accuracy', |
|
... metric_value=0.9, |
|
... ), |
|
... ], |
|
... model_name='my-cool-model', |
|
... ) |
|
>>> card = ModelCard.from_template(card_data) |
|
|
|
>>> # Using a Custom Template |
|
>>> card_data = ModelCardData( |
|
... language='en', |
|
... tags=['image-classification', 'resnet'] |
|
... ) |
|
>>> card = ModelCard.from_template( |
|
... card_data=card_data, |
|
... template_path='./src/huggingface_hub/templates/modelcard_template.md', |
|
... custom_template_var='custom value', # will be replaced in template if it exists |
|
... ) |
|
|
|
``` |
|
""" |
|
return super().from_template(card_data, template_path, **template_kwargs) |
|
|
|
|
|
class DatasetCard(RepoCard): |
|
card_data_class = DatasetCardData |
|
default_template_path = TEMPLATE_DATASETCARD_PATH |
|
repo_type = "dataset" |
|
|
|
@classmethod |
|
def from_template( |
|
cls, |
|
card_data: DatasetCardData, |
|
template_path: Optional[str] = None, |
|
**template_kwargs, |
|
): |
|
"""Initialize a DatasetCard from a template. By default, it uses the default template, which can be found here: |
|
https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/datasetcard_template.md |
|
|
|
Templates are Jinja2 templates that can be customized by passing keyword arguments. |
|
|
|
Args: |
|
card_data (`huggingface_hub.DatasetCardData`): |
|
A huggingface_hub.DatasetCardData instance containing the metadata you want to include in the YAML |
|
header of the dataset card on the Hugging Face Hub. |
|
template_path (`str`, *optional*): |
|
A path to a markdown file with optional Jinja template variables that can be filled |
|
in with `template_kwargs`. Defaults to the default template. |
|
|
|
Returns: |
|
[`huggingface_hub.DatasetCard`]: A DatasetCard instance with the specified card data and content from the |
|
template. |
|
|
|
Example: |
|
```python |
|
>>> from huggingface_hub import DatasetCard, DatasetCardData |
|
|
|
>>> # Using the Default Template |
|
>>> card_data = DatasetCardData( |
|
... language='en', |
|
... license='mit', |
|
... annotations_creators='crowdsourced', |
|
... task_categories=['text-classification'], |
|
... task_ids=['sentiment-classification', 'text-scoring'], |
|
... multilinguality='monolingual', |
|
... pretty_name='My Text Classification Dataset', |
|
... ) |
|
>>> card = DatasetCard.from_template( |
|
... card_data, |
|
... pretty_name=card_data.pretty_name, |
|
... ) |
|
|
|
>>> # Using a Custom Template |
|
>>> card_data = DatasetCardData( |
|
... language='en', |
|
... license='mit', |
|
... ) |
|
>>> card = DatasetCard.from_template( |
|
... card_data=card_data, |
|
... template_path='./src/huggingface_hub/templates/datasetcard_template.md', |
|
... custom_template_var='custom value', # will be replaced in template if it exists |
|
... ) |
|
|
|
``` |
|
""" |
|
return super().from_template(card_data, template_path, **template_kwargs) |
|
|
|
|
|
class SpaceCard(RepoCard): |
|
card_data_class = SpaceCardData |
|
default_template_path = TEMPLATE_MODELCARD_PATH |
|
repo_type = "space" |
|
|
|
|
|
def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]: |
|
"""Detect the line ending of a string. Used by RepoCard to avoid making huge diff on newlines. |
|
|
|
Uses same implementation as in Hub server, keep it in sync. |
|
|
|
Returns: |
|
str: The detected line ending of the string. |
|
""" |
|
cr = content.count("\r") |
|
lf = content.count("\n") |
|
crlf = content.count("\r\n") |
|
if cr + lf == 0: |
|
return None |
|
if crlf == cr and crlf == lf: |
|
return "\r\n" |
|
if cr > lf: |
|
return "\r" |
|
else: |
|
return "\n" |
|
|
|
|
|
def metadata_load(local_path: Union[str, Path]) -> Optional[Dict]: |
|
content = Path(local_path).read_text() |
|
match = REGEX_YAML_BLOCK.search(content) |
|
if match: |
|
yaml_block = match.group(2) |
|
data = yaml.safe_load(yaml_block) |
|
if data is None or isinstance(data, dict): |
|
return data |
|
raise ValueError("repo card metadata block should be a dict") |
|
else: |
|
return None |
|
|
|
|
|
def metadata_save(local_path: Union[str, Path], data: Dict) -> None: |
|
""" |
|
Save the metadata dict in the upper YAML part Trying to preserve newlines as |
|
in the existing file. Docs about open() with newline="" parameter: |
|
https://docs.python.org/3/library/functions.html?highlight=open#open Does |
|
not work with "^M" linebreaks, which are replaced by \n |
|
""" |
|
line_break = "\n" |
|
content = "" |
|
|
|
if os.path.exists(local_path): |
|
with open(local_path, "r", newline="", encoding="utf8") as readme: |
|
content = readme.read() |
|
if isinstance(readme.newlines, tuple): |
|
line_break = readme.newlines[0] |
|
elif isinstance(readme.newlines, str): |
|
line_break = readme.newlines |
|
|
|
|
|
with open(local_path, "w", newline="", encoding="utf8") as readme: |
|
data_yaml = yaml_dump(data, sort_keys=False, line_break=line_break) |
|
|
|
match = REGEX_YAML_BLOCK.search(content) |
|
if match: |
|
output = content[: match.start()] + f"---{line_break}{data_yaml}---{line_break}" + content[match.end() :] |
|
else: |
|
output = f"---{line_break}{data_yaml}---{line_break}{content}" |
|
|
|
readme.write(output) |
|
readme.close() |
|
|
|
|
|
def metadata_eval_result( |
|
*, |
|
model_pretty_name: str, |
|
task_pretty_name: str, |
|
task_id: str, |
|
metrics_pretty_name: str, |
|
metrics_id: str, |
|
metrics_value: Any, |
|
dataset_pretty_name: str, |
|
dataset_id: str, |
|
metrics_config: Optional[str] = None, |
|
metrics_verified: bool = False, |
|
dataset_config: Optional[str] = None, |
|
dataset_split: Optional[str] = None, |
|
dataset_revision: Optional[str] = None, |
|
metrics_verification_token: Optional[str] = None, |
|
) -> Dict: |
|
""" |
|
Creates a metadata dict with the result from a model evaluated on a dataset. |
|
|
|
Args: |
|
model_pretty_name (`str`): |
|
The name of the model in natural language. |
|
task_pretty_name (`str`): |
|
The name of a task in natural language. |
|
task_id (`str`): |
|
Example: automatic-speech-recognition. A task id. |
|
metrics_pretty_name (`str`): |
|
A name for the metric in natural language. Example: Test WER. |
|
metrics_id (`str`): |
|
Example: wer. A metric id from https://hf.co/metrics. |
|
metrics_value (`Any`): |
|
The value from the metric. Example: 20.0 or "20.0 ± 1.2". |
|
dataset_pretty_name (`str`): |
|
The name of the dataset in natural language. |
|
dataset_id (`str`): |
|
Example: common_voice. A dataset id from https://hf.co/datasets. |
|
metrics_config (`str`, *optional*): |
|
The name of the metric configuration used in `load_metric()`. |
|
Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`. |
|
metrics_verified (`bool`, *optional*, defaults to `False`): |
|
Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set. |
|
dataset_config (`str`, *optional*): |
|
Example: fr. The name of the dataset configuration used in `load_dataset()`. |
|
dataset_split (`str`, *optional*): |
|
Example: test. The name of the dataset split used in `load_dataset()`. |
|
dataset_revision (`str`, *optional*): |
|
Example: 5503434ddd753f426f4b38109466949a1217c2bb. The name of the dataset dataset revision |
|
used in `load_dataset()`. |
|
metrics_verification_token (`bool`, *optional*): |
|
A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. |
|
|
|
Returns: |
|
`dict`: a metadata dict with the result from a model evaluated on a dataset. |
|
|
|
Example: |
|
```python |
|
>>> from huggingface_hub import metadata_eval_result |
|
>>> results = metadata_eval_result( |
|
... model_pretty_name="RoBERTa fine-tuned on ReactionGIF", |
|
... task_pretty_name="Text Classification", |
|
... task_id="text-classification", |
|
... metrics_pretty_name="Accuracy", |
|
... metrics_id="accuracy", |
|
... metrics_value=0.2662102282047272, |
|
... dataset_pretty_name="ReactionJPEG", |
|
... dataset_id="julien-c/reactionjpeg", |
|
... dataset_config="default", |
|
... dataset_split="test", |
|
... ) |
|
>>> results == { |
|
... 'model-index': [ |
|
... { |
|
... 'name': 'RoBERTa fine-tuned on ReactionGIF', |
|
... 'results': [ |
|
... { |
|
... 'task': { |
|
... 'type': 'text-classification', |
|
... 'name': 'Text Classification' |
|
... }, |
|
... 'dataset': { |
|
... 'name': 'ReactionJPEG', |
|
... 'type': 'julien-c/reactionjpeg', |
|
... 'config': 'default', |
|
... 'split': 'test' |
|
... }, |
|
... 'metrics': [ |
|
... { |
|
... 'type': 'accuracy', |
|
... 'value': 0.2662102282047272, |
|
... 'name': 'Accuracy', |
|
... 'verified': False |
|
... } |
|
... ] |
|
... } |
|
... ] |
|
... } |
|
... ] |
|
... } |
|
True |
|
|
|
``` |
|
""" |
|
|
|
return { |
|
"model-index": eval_results_to_model_index( |
|
model_name=model_pretty_name, |
|
eval_results=[ |
|
EvalResult( |
|
task_name=task_pretty_name, |
|
task_type=task_id, |
|
metric_name=metrics_pretty_name, |
|
metric_type=metrics_id, |
|
metric_value=metrics_value, |
|
dataset_name=dataset_pretty_name, |
|
dataset_type=dataset_id, |
|
metric_config=metrics_config, |
|
verified=metrics_verified, |
|
verify_token=metrics_verification_token, |
|
dataset_config=dataset_config, |
|
dataset_split=dataset_split, |
|
dataset_revision=dataset_revision, |
|
) |
|
], |
|
) |
|
} |
|
|
|
|
|
@validate_hf_hub_args |
|
def metadata_update( |
|
repo_id: str, |
|
metadata: Dict, |
|
*, |
|
repo_type: Optional[str] = None, |
|
overwrite: bool = False, |
|
token: Optional[str] = None, |
|
commit_message: Optional[str] = None, |
|
commit_description: Optional[str] = None, |
|
revision: Optional[str] = None, |
|
create_pr: bool = False, |
|
parent_commit: Optional[str] = None, |
|
) -> str: |
|
""" |
|
Updates the metadata in the README.md of a repository on the Hugging Face Hub. |
|
If the README.md file doesn't exist yet, a new one is created with metadata and an |
|
the default ModelCard or DatasetCard template. For `space` repo, an error is thrown |
|
as a Space cannot exist without a `README.md` file. |
|
|
|
Args: |
|
repo_id (`str`): |
|
The name of the repository. |
|
metadata (`dict`): |
|
A dictionary containing the metadata to be updated. |
|
repo_type (`str`, *optional*): |
|
Set to `"dataset"` or `"space"` if updating to a dataset or space, |
|
`None` or `"model"` if updating to a model. Default is `None`. |
|
overwrite (`bool`, *optional*, defaults to `False`): |
|
If set to `True` an existing field can be overwritten, otherwise |
|
attempting to overwrite an existing field will cause an error. |
|
token (`str`, *optional*): |
|
The Hugging Face authentication token. |
|
commit_message (`str`, *optional*): |
|
The summary / title / first line of the generated commit. Defaults to |
|
`f"Update metadata with huggingface_hub"` |
|
commit_description (`str` *optional*) |
|
The description of the generated commit |
|
revision (`str`, *optional*): |
|
The git revision to commit from. Defaults to the head of the |
|
`"main"` branch. |
|
create_pr (`boolean`, *optional*): |
|
Whether or not to create a Pull Request from `revision` with that commit. |
|
Defaults to `False`. |
|
parent_commit (`str`, *optional*): |
|
The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. |
|
If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. |
|
If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. |
|
Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be |
|
especially useful if the repo is updated / committed to concurrently. |
|
Returns: |
|
`str`: URL of the commit which updated the card metadata. |
|
|
|
Example: |
|
```python |
|
>>> from huggingface_hub import metadata_update |
|
>>> metadata = {'model-index': [{'name': 'RoBERTa fine-tuned on ReactionGIF', |
|
... 'results': [{'dataset': {'name': 'ReactionGIF', |
|
... 'type': 'julien-c/reactiongif'}, |
|
... 'metrics': [{'name': 'Recall', |
|
... 'type': 'recall', |
|
... 'value': 0.7762102282047272}], |
|
... 'task': {'name': 'Text Classification', |
|
... 'type': 'text-classification'}}]}]} |
|
>>> url = metadata_update("hf-internal-testing/reactiongif-roberta-card", metadata) |
|
|
|
``` |
|
""" |
|
commit_message = commit_message if commit_message is not None else "Update metadata with huggingface_hub" |
|
|
|
|
|
card_class: Type[RepoCard] |
|
if repo_type is None or repo_type == "model": |
|
card_class = ModelCard |
|
elif repo_type == "dataset": |
|
card_class = DatasetCard |
|
elif repo_type == "space": |
|
card_class = RepoCard |
|
else: |
|
raise ValueError(f"Unknown repo_type: {repo_type}") |
|
|
|
|
|
|
|
try: |
|
card = card_class.load(repo_id, token=token, repo_type=repo_type) |
|
except EntryNotFoundError: |
|
if repo_type == "space": |
|
raise ValueError("Cannot update metadata on a Space that doesn't contain a `README.md` file.") |
|
|
|
|
|
card = card_class.from_template(CardData()) |
|
|
|
for key, value in metadata.items(): |
|
if key == "model-index": |
|
|
|
if "name" not in value[0]: |
|
value[0]["name"] = getattr(card, "model_name", repo_id) |
|
model_name, new_results = model_index_to_eval_results(value) |
|
if card.data.eval_results is None: |
|
card.data.eval_results = new_results |
|
card.data.model_name = model_name |
|
else: |
|
existing_results = card.data.eval_results |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for new_result in new_results: |
|
result_found = False |
|
for existing_result in existing_results: |
|
if new_result.is_equal_except_value(existing_result): |
|
if new_result != existing_result and not overwrite: |
|
raise ValueError( |
|
"You passed a new value for the existing metric" |
|
f" 'name: {new_result.metric_name}, type: " |
|
f"{new_result.metric_type}'. Set `overwrite=True`" |
|
" to overwrite existing metrics." |
|
) |
|
result_found = True |
|
existing_result.metric_value = new_result.metric_value |
|
if existing_result.verified is True: |
|
existing_result.verify_token = new_result.verify_token |
|
if not result_found: |
|
card.data.eval_results.append(new_result) |
|
else: |
|
|
|
if card.data.get(key) is not None and not overwrite and card.data.get(key) != value: |
|
raise ValueError( |
|
f"You passed a new value for the existing meta data field '{key}'." |
|
" Set `overwrite=True` to overwrite existing metadata." |
|
) |
|
else: |
|
card.data[key] = value |
|
|
|
return card.push_to_hub( |
|
repo_id, |
|
token=token, |
|
repo_type=repo_type, |
|
commit_message=commit_message, |
|
commit_description=commit_description, |
|
create_pr=create_pr, |
|
revision=revision, |
|
parent_commit=parent_commit, |
|
) |
|
|