|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Contains a logger to push training logs to the Hub, using Tensorboard.""" |
|
from pathlib import Path |
|
from typing import TYPE_CHECKING, List, Optional, Union |
|
|
|
from huggingface_hub._commit_scheduler import CommitScheduler |
|
|
|
from .utils import experimental, is_tensorboard_available |
|
|
|
|
|
if is_tensorboard_available(): |
|
from tensorboardX import SummaryWriter |
|
|
|
|
|
|
|
else: |
|
SummaryWriter = object |
|
|
|
if TYPE_CHECKING: |
|
from tensorboardX import SummaryWriter |
|
|
|
|
|
class HFSummaryWriter(SummaryWriter): |
|
""" |
|
Wrapper around the tensorboard's `SummaryWriter` to push training logs to the Hub. |
|
|
|
Data is logged locally and then pushed to the Hub asynchronously. Pushing data to the Hub is done in a separate |
|
thread to avoid blocking the training script. In particular, if the upload fails for any reason (e.g. a connection |
|
issue), the main script will not be interrupted. Data is automatically pushed to the Hub every `commit_every` |
|
minutes (default to every 5 minutes). |
|
|
|
<Tip warning={true}> |
|
|
|
`HFSummaryWriter` is experimental. Its API is subject to change in the future without prior notice. |
|
|
|
</Tip> |
|
|
|
Args: |
|
repo_id (`str`): |
|
The id of the repo to which the logs will be pushed. |
|
logdir (`str`, *optional*): |
|
The directory where the logs will be written. If not specified, a local directory will be created by the |
|
underlying `SummaryWriter` object. |
|
commit_every (`int` or `float`, *optional*): |
|
The frequency (in minutes) at which the logs will be pushed to the Hub. Defaults to 5 minutes. |
|
squash_history (`bool`, *optional*): |
|
Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is |
|
useful to avoid degraded performances on the repo when it grows too large. |
|
repo_type (`str`, *optional*): |
|
The type of the repo to which the logs will be pushed. Defaults to "model". |
|
repo_revision (`str`, *optional*): |
|
The revision of the repo to which the logs will be pushed. Defaults to "main". |
|
repo_private (`bool`, *optional*): |
|
Whether to create a private repo or not. Defaults to False. This argument is ignored if the repo already |
|
exists. |
|
path_in_repo (`str`, *optional*): |
|
The path to the folder in the repo where the logs will be pushed. Defaults to "tensorboard/". |
|
repo_allow_patterns (`List[str]` or `str`, *optional*): |
|
A list of patterns to include in the upload. Defaults to `"*.tfevents.*"`. Check out the |
|
[upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details. |
|
repo_ignore_patterns (`List[str]` or `str`, *optional*): |
|
A list of patterns to exclude in the upload. Check out the |
|
[upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details. |
|
token (`str`, *optional*): |
|
Authentication token. Will default to the stored token. See https://huggingface.co/settings/token for more |
|
details |
|
kwargs: |
|
Additional keyword arguments passed to `SummaryWriter`. |
|
|
|
Examples: |
|
```py |
|
>>> from huggingface_hub import HFSummaryWriter |
|
|
|
# Logs are automatically pushed every 15 minutes |
|
>>> logger = HFSummaryWriter(repo_id="test_hf_logger", commit_every=15) |
|
>>> logger.add_scalar("a", 1) |
|
>>> logger.add_scalar("b", 2) |
|
... |
|
|
|
# You can also trigger a push manually |
|
>>> logger.scheduler.trigger() |
|
``` |
|
|
|
```py |
|
>>> from huggingface_hub import HFSummaryWriter |
|
|
|
# Logs are automatically pushed every 5 minutes (default) + when exiting the context manager |
|
>>> with HFSummaryWriter(repo_id="test_hf_logger") as logger: |
|
... logger.add_scalar("a", 1) |
|
... logger.add_scalar("b", 2) |
|
``` |
|
""" |
|
|
|
@experimental |
|
def __new__(cls, *args, **kwargs) -> "HFSummaryWriter": |
|
if not is_tensorboard_available(): |
|
raise ImportError( |
|
"You must have `tensorboard` installed to use `HFSummaryWriter`. Please run `pip install --upgrade" |
|
" tensorboardX` first." |
|
) |
|
return super().__new__(cls) |
|
|
|
def __init__( |
|
self, |
|
repo_id: str, |
|
*, |
|
logdir: Optional[str] = None, |
|
commit_every: Union[int, float] = 5, |
|
squash_history: bool = False, |
|
repo_type: Optional[str] = None, |
|
repo_revision: Optional[str] = None, |
|
repo_private: bool = False, |
|
path_in_repo: Optional[str] = "tensorboard", |
|
repo_allow_patterns: Optional[Union[List[str], str]] = "*.tfevents.*", |
|
repo_ignore_patterns: Optional[Union[List[str], str]] = None, |
|
token: Optional[str] = None, |
|
**kwargs, |
|
): |
|
|
|
super().__init__(logdir=logdir, **kwargs) |
|
|
|
|
|
if not isinstance(self.logdir, str): |
|
raise ValueError(f"`self.logdir` must be a string. Got '{self.logdir}' of type {type(self.logdir)}.") |
|
|
|
|
|
if path_in_repo is None or path_in_repo == "": |
|
path_in_repo = Path(self.logdir).name |
|
else: |
|
path_in_repo = path_in_repo.strip("/") + "/" + Path(self.logdir).name |
|
|
|
|
|
self.scheduler = CommitScheduler( |
|
folder_path=self.logdir, |
|
path_in_repo=path_in_repo, |
|
repo_id=repo_id, |
|
repo_type=repo_type, |
|
revision=repo_revision, |
|
private=repo_private, |
|
token=token, |
|
allow_patterns=repo_allow_patterns, |
|
ignore_patterns=repo_ignore_patterns, |
|
every=commit_every, |
|
squash_history=squash_history, |
|
) |
|
|
|
|
|
self.repo_id = self.scheduler.repo_id |
|
self.repo_type = self.scheduler.repo_type |
|
self.repo_revision = self.scheduler.revision |
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb): |
|
"""Push to hub in a non-blocking way when exiting the logger's context manager.""" |
|
super().__exit__(exc_type, exc_val, exc_tb) |
|
future = self.scheduler.trigger() |
|
future.result() |
|
|