Spaces:

yangheng
/

Super-Resolution-Anime-Diffusion

Running

App Files Files Community

yangheng commited on 6 days ago

Commit

fca804e

1 Parent(s): c68f812

update

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +5 -3
.gitignore +1 -0
.gradio/certificate.pem +31 -0
SuperResolutionAnimeDiffusion.zip → 1boy.png +2 -2
1girl.png +3 -0
README.md +1 -12
Waifu2x/model_check_points/CRAN_V2/CARN_adam_checkpoint.pt +3 -0
Waifu2x/model_check_points/CRAN_V2/CARN_scheduler_last_iter.pt +3 -0
Waifu2x/model_check_points/CRAN_V2/CRAN_V2_02_28_2019.pt +3 -0
Waifu2x/model_check_points/CRAN_V2/ReadME.md +34 -27
Waifu2x/model_check_points/CRAN_V2/test_loss.pt +3 -0
Waifu2x/model_check_points/CRAN_V2/test_psnr.pt +3 -0
Waifu2x/model_check_points/CRAN_V2/test_ssim.pt +3 -0
Waifu2x/model_check_points/CRAN_V2/train_loss.pt +3 -0
Waifu2x/model_check_points/CRAN_V2/train_psnr.pt +3 -0
Waifu2x/model_check_points/CRAN_V2/train_ssim.pt +3 -0
Waifu2x/model_check_points/DCSCN/DCSCN_model_387epos_L12_noise_1.pt +3 -0
Waifu2x/model_check_points/DCSCN/DCSCN_weights_387epos_L12_noise_1.pt +3 -0
Waifu2x/model_check_points/DCSCN/DCSCN_weights_45epos_L8_noise_1.pt +3 -0
Waifu2x/model_check_points/DCSCN/ReadME.md +13 -0
Waifu2x/model_check_points/ESPCN/ESPCN_7_weights_14epos.pk +3 -0
Waifu2x/model_check_points/Upconv_7/anime.7z +3 -0
Waifu2x/model_check_points/Upconv_7/photo.7z +3 -0
Waifu2x/model_check_points/vgg_7/art.7z +3 -0
Waifu2x/model_check_points/vgg_7/art_y.7z +3 -0
Waifu2x/model_check_points/vgg_7/photo.7z +3 -0
Waifu2x/model_check_points/vgg_7/ukbench.7z +3 -0
app.py +641 -435
gfpgan/weights/detection_Resnet50_Final.pth +3 -0
gfpgan/weights/parsing_parsenet.pth +3 -0
huggingface_hub/README.md +358 -0
huggingface_hub/__init__.py +968 -0
huggingface_hub/_commit_api.py +729 -0
huggingface_hub/_commit_scheduler.py +327 -0
huggingface_hub/_inference_endpoints.py +396 -0
huggingface_hub/_local_folder.py +425 -0
huggingface_hub/_login.py +397 -0
huggingface_hub/_multi_commits.py +306 -0
huggingface_hub/_snapshot_download.py +304 -0
huggingface_hub/_space_api.py +160 -0
huggingface_hub/_tensorboard_logger.py +195 -0
huggingface_hub/_upload_large_folder.py +621 -0
huggingface_hub/_webhooks_payload.py +137 -0
huggingface_hub/_webhooks_server.py +386 -0
huggingface_hub/commands/__init__.py +27 -0
huggingface_hub/commands/_cli_utils.py +69 -0
huggingface_hub/commands/delete_cache.py +428 -0
huggingface_hub/commands/download.py +200 -0
huggingface_hub/commands/env.py +36 -0
huggingface_hub/commands/huggingface_cli.py +61 -0

.gitattributes CHANGED Viewed

@@ -29,8 +29,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
-*zip filter=lfs diff=lfs merge=lfs -text
-SuperResolutionAnimeDiffusion.zip filter=lfs diff=lfs merge=lfs -text
-random_examples.zip filter=lfs diff=lfs merge=lfs -text

 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+scenery.png filter=lfs diff=lfs merge=lfs -text
+1boy.png filter=lfs diff=lfs merge=lfs -text
+1girl.png filter=lfs diff=lfs merge=lfs -text
+*.pk filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -10,6 +10,7 @@ integrated_datasets/
 *.state_dict
 *.config
 *.args
 *.gz
 *.bin
 *.result.txt

 *.state_dict
 *.config
 *.args
+*.zip
 *.gz
 *.bin
 *.result.txt

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

SuperResolutionAnimeDiffusion.zip → 1boy.png RENAMED Viewed

File without changes

1girl.png ADDED Viewed

Git LFS Details

SHA256: c7d13eec13f7f7a98c225c9f2340461ad1bbfbc6ef7b44ecb96eb0ca73d2723d
Pointer size: 132 Bytes
Size of remote file: 2.11 MB

README.md CHANGED Viewed

@@ -1,15 +1,4 @@
----
-title: Anything V3.0
-emoji: 🏃
-colorFrom: gray
-colorTo: yellow
-sdk: gradio
-sdk_version: 3.10.1
-app_file: app.py
-pinned: false
----
-# If you have a GPU, try the [Stable Diffusion WebUI](https://github.com/yangheng95/stable-diffusion-webui)
 # [Online Web Demo](https://huggingface.co/spaces/yangheng/Super-Resolution-Anime-Diffusion)


1	+ # Super Resolution Anime Diffusion











2
3
4	# [Online Web Demo](https://huggingface.co/spaces/yangheng/Super-Resolution-Anime-Diffusion)

Waifu2x/model_check_points/CRAN_V2/CARN_adam_checkpoint.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:292f2be9ea173861e4a7f6cf580f04fe9a1fc6c78fdac6f182cbc051ea50791e
+size 31734614

Waifu2x/model_check_points/CRAN_V2/CARN_scheduler_last_iter.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba2302e523d32bfeb9b542a9dc6aa5ecdb45babc793892153245d6c69ae23433
+size 151

Waifu2x/model_check_points/CRAN_V2/CRAN_V2_02_28_2019.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b74e163d829f6f587e3fdb0b645342e494416accb1962cf0973354de5ec157ea
+size 49895595

Waifu2x/model_check_points/CRAN_V2/ReadME.md CHANGED Viewed

@@ -1,34 +1,41 @@
-# Resume & Use Model Check Points
-This folder contains check points for models and their weights.  They are generated from [PyTorch's pickle](https://pytorch.org/docs/master/notes/serialization.html).
-Model specifications are in each folder's ReadME.
-Pickle names with "model" contain the entire models, and they can be used as an freeze module by calling the "forward_checkpoint" function to generate images.
-Example:
 ```python
-import torch
-# No need to reconstruct the model
-model = torch.load("./DCSCN/DCSCN_model_387epos_L12_noise_1.pt")
-x = torch.randn((1,3,10,10)), torch.randn((1,3,20,20))
-out = model.forward_checkpoint(a)
-```
-Pickle names with "weights" are model weights, and they are named dictionaries.
-Example:
-```python
-model = DCSCN(*)    # the setting must be the same to load check points weights.
-model.load_state_dict(torch.load("./DCSCN/DCSCN_weights_387epos_L12_noise_1.pt"))
-# then you can resume the model training
-```
-Model check poins in Upconv_7 and vgg_7 are from [waifu2x's repo](https://github.com/nagadomi/waifu2x/tree/master/models). To load weights into a model, please use ```load_pre_train_weights``` function.
-Example:
 ```python
-model = UpConv_7()
-model.load_pre_train_weights(json_file=...)
-# then the model is ready to use
-```

+# Model Specifications
 ```python
+model_cran_v2 = CARN_V2(color_channels=3, mid_channels=64, conv=nn.Conv2d,
+                        single_conv_size=3, single_conv_group=1,
+                        scale=2, activation=nn.LeakyReLU(0.1),
+                        SEBlock=True, repeat_blocks=3, atrous=(1, 1, 1))
+model_cran_v2 = network_to_half(model_cran_v2)
+checkpoint = "CARN_model_checkpoint.pt"
+model_cran_v2.load_state_dict(torch.load(checkpoint, 'cpu'))
+model_cran_v2 = model_cran_v2.float() # if use cpu
+````
+To use pre-trained model for training
 ```python
+model = CARN_V2(color_channels=3, mid_channels=64, conv=nn.Conv2d,
+                single_conv_size=3, single_conv_group=1,
+                scale=2, activation=nn.LeakyReLU(0.1),
+                SEBlock=True, repeat_blocks=3, atrous=(1, 1, 1))
+model = network_to_half(model)
+model = model.cuda()
+model.load_state_dict(torch.load("CARN_model_checkpoint.pt"))
+learning_rate = 1e-4
+weight_decay = 1e-6
+optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay, amsgrad=True)
+optimizer = FP16_Optimizer(optimizer, static_loss_scale=128.0, verbose=False)
+optimizer.load_state_dict(torch.load("CARN_adam_checkpoint.pt"))
+last_iter = torch.load("CARN_scheduler_last_iter") # -1 if start from new
+scheduler = CyclicLR(optimizer.optimizer, base_lr=1e-4, max_lr=4e-4,
+                     step_size=3 * total_batch, mode="triangular",
+                     last_batch_iteration=last_iter)
+```

Waifu2x/model_check_points/CRAN_V2/test_loss.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:93f644a6a3f6636035980855f56ef3dbc8784679371b06b81e0e4d06067c142d
+size 43507

Waifu2x/model_check_points/CRAN_V2/test_psnr.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae8f8d1a3d175e76dcbcdcf0cede898e8f2cf169f3eec14eeb75a4e19d8e2d6b
+size 42563

Waifu2x/model_check_points/CRAN_V2/test_ssim.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:763ff936f536b12b37b351c09f3c1290fb2188399aea3d9ce3cf069bd0d135e7
+size 43515

Waifu2x/model_check_points/CRAN_V2/train_loss.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85a86e94cd689adff04c4b22bf2534d17aa52af5e7309a82bc2a4f5c6c144900
+size 15564175

Waifu2x/model_check_points/CRAN_V2/train_psnr.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d1e88646b74a054ddf20ba41368a01162e35d9c88ac72f392a6ba08a5c7ef3b
+size 15564175

Waifu2x/model_check_points/CRAN_V2/train_ssim.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b8da8bc73f64997c5b2d15d6161b11dbd172258a62c88572c032feb73bd022b
+size 15564175

Waifu2x/model_check_points/DCSCN/DCSCN_model_387epos_L12_noise_1.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7aaf293584618b446868910a173de4eed2e054f33e325f9c93cabacb0937e6d5
+size 7585347

Waifu2x/model_check_points/DCSCN/DCSCN_weights_387epos_L12_noise_1.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8faddf6e3bf6acf688642a99da23d5626a6173c1eb92d2cdd26a5d3dd6a73da4
+size 7568033

Waifu2x/model_check_points/DCSCN/DCSCN_weights_45epos_L8_noise_1.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b8c7b3c6c4bc1b8d48186352f9d74b685210ca8a372a06bd8718c2d20e0769e
+size 9746842

Waifu2x/model_check_points/DCSCN/ReadME.md ADDED Viewed

	@@ -0,0 +1,13 @@

+# Model Specifications
+## 12 Layers Model
+```python
+model = DCSCN(color_channel=3,
+              up_scale=2,
+              feature_layers=12,
+              first_feature_filters=196,
+              last_feature_filters=48,
+              reconstruction_filters=64,
+              up_sampler_filters=32)
+````

Waifu2x/model_check_points/ESPCN/ESPCN_7_weights_14epos.pk ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60088b9b7865535dae982af5f6ca2e361ecb6ce9ee1cc43c8ce4f6b1e1a4abe7
+size 5388762

Waifu2x/model_check_points/Upconv_7/anime.7z ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b4514f546498bf8966dd74e806d2f4034573809f91ca02659710d666235266d
+size 19867323

Waifu2x/model_check_points/Upconv_7/photo.7z ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7a173165da9b2b101f8964c55ce2472b3ce15a7a6f742804037e5c7a5a321ae
+size 19872894

Waifu2x/model_check_points/vgg_7/art.7z ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae5e88101e4b5591e795ffa8661b36c4986bf9ce9e762a9e21d9f268a2a8effe
+size 10456728

Waifu2x/model_check_points/vgg_7/art_y.7z ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f24fcbf0e0d2a9d9242e3188fe8fb3de82d77da82a5228664be4dc2a69aef7a
+size 8281792

Waifu2x/model_check_points/vgg_7/photo.7z ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a96d475054665d050c370f3786097690920523c71231ef276ab2c7d011d305b1
+size 10459233

Waifu2x/model_check_points/vgg_7/ukbench.7z ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05f6e10f467b10ab66a9a4d41443a7f280e67925eb50c96fc8e43287ce56e205
+size 2088088

app.py CHANGED Viewed

@@ -1,20 +1,43 @@
 import os
-import random
 import zipfile
-import findfile
 import PIL.Image
 import autocuda
-from pyabsa.utils.pyabsa_utils import fprint
-try:
-    for z_file in findfile.find_cwd_files(and_key=['.zip'],
-                                          exclude_key=['.ignore', 'git', 'SuperResolutionAnimeDiffusion'],
-                                          recursive=10):
-        fprint(f"Extracting {z_file}...")
-        with zipfile.ZipFile(z_file, 'r') as zip_ref:
-            zip_ref.extractall(os.path.dirname(z_file))
-except Exception as e:
-    os.system('unzip random_examples.zip')
 from diffusers import (
     AutoencoderKL,
@@ -27,59 +50,95 @@ import gradio as gr
 import torch
 from PIL import Image
 import utils
-import datetime
-import time
-import psutil
 from Waifu2x.magnify import ImageMagnifier
 from RealESRGANv030.interface import realEsrgan
-magnifier = ImageMagnifier()
 start_time = time.time()
 is_colab = utils.is_google_colab()
-CUDA_VISIBLE_DEVICES = ""
 device = autocuda.auto_cuda()
 dtype = torch.float16 if device != "cpu" else torch.float32
 class Model:
-    def __init__(self, name, path="", prefix=""):
         self.name = name
         self.path = path
         self.prefix = prefix
         self.pipe_t2i = None
         self.pipe_i2i = None
 models = [
-    # Model("anything v3", "Linaqruf/anything-v3.0", "anything v3 style"),
-    Model("anything v5", "stablediffusionapi/anything-v5", "anything v5 style"),
 ]
-#  Model("Spider-Verse", "nitrosocke/spider-verse-diffusion", "spiderverse style "),
-#  Model("Balloon Art", "Fictiverse/Stable_Diffusion_BalloonArt_Model", "BalloonArt "),
-#  Model("Elden Ring", "nitrosocke/elden-ring-diffusion", "elden ring style "),
-#  Model("Tron Legacy", "dallinmackay/Tron-Legacy-diffusion", "trnlgcy ")
-# Model("Pokémon", "lambdalabs/sd-pokemon-diffusers", ""),
-# Model("Pony Diffusion", "AstraliteHeart/pony-diffusion", ""),
-# Model("Robo Diffusion", "nousr/robo-diffusion", ""),
-scheduler = DPMSolverMultistepScheduler(
-    beta_start=0.00085,
-    beta_end=0.012,
-    beta_schedule="scaled_linear",
-    num_train_timesteps=1000,
-    trained_betas=None,
-    predict_epsilon=True,
-    thresholding=False,
-    algorithm_type="dpmsolver++",
-    solver_type="midpoint",
-    solver_order=2,
-    # lower_order_final=True,
-)
 custom_model = None
 if is_colab:
     models.insert(0, Model("Custom model"))
@@ -88,177 +147,198 @@ if is_colab:
 last_mode = "txt2img"
 current_model = models[1] if is_colab else models[0]
 current_model_path = current_model.path
-if is_colab:
-    pipe = StableDiffusionPipeline.from_pretrained(
-        current_model.path,
-        torch_dtype=dtype,
-        scheduler=scheduler,
-        safety_checker=lambda images, clip_input: (images, False),
-    )
-else:  # download all models
-    print(f"{datetime.datetime.now()} Downloading vae...")
-    vae = AutoencoderKL.from_pretrained(
-        current_model.path, subfolder="vae", torch_dtype=dtype
-    )
-    for model in models:
         try:
-            print(f"{datetime.datetime.now()} Downloading {model.name} model...")
-            unet = UNet2DConditionModel.from_pretrained(
-                model.path, subfolder="unet", torch_dtype=dtype
-            )
-            model.pipe_t2i = StableDiffusionPipeline.from_pretrained(
-                model.path,
-                unet=unet,
-                vae=vae,
-                torch_dtype=dtype,
-                scheduler=scheduler,
-                safety_checker=None,
-            )
-            model.pipe_i2i = StableDiffusionImg2ImgPipeline.from_pretrained(
-                model.path,
-                unet=unet,
-                vae=vae,
                 torch_dtype=dtype,
                 scheduler=scheduler,
                 safety_checker=None,
             )
         except Exception as e:
-            print(
-                f"{datetime.datetime.now()} Failed to load model "
-                + model.name
-                + ": "
-                + str(e)
             )
-            models.remove(model)
-    pipe = models[0].pipe_t2i
-# model.pipe_i2i = torch.compile(model.pipe_i2i)
-# model.pipe_t2i = torch.compile(model.pipe_t2i)
-if torch.cuda.is_available():
-    pipe = pipe.to(device)
-# device = "GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"
-def error_str(error, title="Error"):
     return (
-        f"""#### {title}
-            {error}"""
-        if error
-        else ""
     )
-def custom_model_changed(path):
-    models[0].path = path
     global current_model
-    current_model = models[0]
-def on_model_change(model_name):
-    prefix = (
-        'Enter prompt. "'
-        + next((m.prefix for m in models if m.name == model_name), None)
-        + '" is prefixed automatically'
-        if model_name != models[0].name
-        else "Don't forget to use the custom model prefix in the prompt!"
-    )
-    return (
-        gr.update(visible=model_name == models[0].name),
-        gr.update(placeholder=prefix),
-    )
-def inference(
-    model_name,
-    prompt,
-    guidance,
-    steps,
-    width=512,
-    height=512,
-    seed=0,
-    img=None,
-    strength=0.5,
-    neg_prompt="",
-    scale="ESRGAN4x",
-    scale_factor=2,
-):
-    fprint(psutil.virtual_memory())  # print memory usage
-    fprint(f"Prompt: {prompt}")
-    global current_model
-    for model in models:
-        if model.name == model_name:
-            current_model = model
-            model_path = current_model.path
-    generator = torch.Generator(device).manual_seed(seed) if seed != 0 else None
     try:
-        if img is not None:
-            return (
-                img_to_img(
-                    model_path,
-                    prompt,
-                    neg_prompt,
-                    img,
-                    strength,
-                    guidance,
-                    steps,
-                    width,
-                    height,
-                    generator,
-                    scale,
-                    scale_factor,
-                ),
-                None,
             )
         else:
-            return (
-                txt_to_img(
-                    model_path,
-                    prompt,
-                    neg_prompt,
-                    guidance,
-                    steps,
-                    width,
-                    height,
-                    generator,
-                    scale,
-                    scale_factor,
-                ),
-                None,
             )
-    except Exception as e:
-        return None, error_str(e)
-    # if img is not None:
-    #     return img_to_img(model_path, prompt, neg_prompt, img, strength, guidance, steps, width, height,
-    #                       generator, scale, scale_factor), None
-    # else:
-    #     return txt_to_img(model_path, prompt, neg_prompt, guidance, steps, width, height, generator, scale, scale_factor), None
 def txt_to_img(
-    model_path,
-    prompt,
-    neg_prompt,
-    guidance,
-    steps,
-    width,
-    height,
-    generator,
-    scale,
-    scale_factor,
-):
-    print(f"{datetime.datetime.now()} txt_to_img, model: {current_model.name}")
-    global last_mode
-    global pipe
-    global current_model_path
     if model_path != current_model_path or last_mode != "txt2img":
         current_model_path = model_path
@@ -267,70 +347,63 @@ def txt_to_img(
                 current_model_path,
                 torch_dtype=dtype,
                 scheduler=scheduler,
-                safety_checker=lambda images, clip_input: (images, False),
             )
         else:
-            # pipe = pipe.to("cpu")
             pipe = current_model.pipe_t2i
         if torch.cuda.is_available():
             pipe = pipe.to(device)
         last_mode = "txt2img"
-    prompt = current_model.prefix + prompt
     result = pipe(
-        prompt,
         negative_prompt=neg_prompt,
-        # num_images_per_prompt=n_images,
         num_inference_steps=int(steps),
         guidance_scale=guidance,
         width=width,
         height=height,
         generator=generator,
-    )
-    # result.images[0] = magnifier.magnify(result.images[0], scale_factor=scale_factor)
-    # enhance resolution
     if scale_factor > 1:
-        if scale == "ESRGAN4x":
-            fp32 = True if device == "cpu" else False
-            result.images[0] = realEsrgan(
-                input_dir=result.images[0],
                 suffix="",
                 output_dir="imgs",
                 fp32=fp32,
                 outscale=scale_factor,
             )[0]
-        else:
-            result.images[0] = magnifier.magnify(
-                result.images[0], scale_factor=scale_factor
-            )
-    # save image
-    result.images[0].save(
-        "imgs/result-{}.png".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
-    )
-    return replace_nsfw_images(result)
 def img_to_img(
-    model_path,
-    prompt,
-    neg_prompt,
-    img,
-    strength,
-    guidance,
-    steps,
-    width,
-    height,
-    generator,
-    scale,
-    scale_factor,
-):
-    fprint(f"{datetime.datetime.now()} img_to_img, model: {model_path}")
-    global last_mode
-    global pipe
-    global current_model_path
     if model_path != current_model_path or last_mode != "img2img":
         current_model_path = model_path
@@ -339,263 +412,396 @@ def img_to_img(
                 current_model_path,
                 torch_dtype=dtype,
                 scheduler=scheduler,
-                safety_checker=lambda images, clip_input: (images, False),
             )
         else:
-            # pipe = pipe.to("cpu")
             pipe = current_model.pipe_i2i
         if torch.cuda.is_available():
             pipe = pipe.to(device)
         last_mode = "img2img"
-    prompt = current_model.prefix + prompt
     ratio = min(height / img.height, width / img.width)
     img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
     result = pipe(
-        prompt,
         negative_prompt=neg_prompt,
-        # num_images_per_prompt=n_images,
         image=img,
         num_inference_steps=int(steps),
         strength=strength,
         guidance_scale=guidance,
-        # width=width,
-        # height=height,
         generator=generator,
-    )
     if scale_factor > 1:
-        if scale == "ESRGAN4x":
-            fp32 = True if device == "cpu" else False
-            result.images[0] = realEsrgan(
-                input_dir=result.images[0],
                 suffix="",
                 output_dir="imgs",
                 fp32=fp32,
                 outscale=scale_factor,
             )[0]
-        else:
-            result.images[0] = magnifier.magnify(
-                result.images[0], scale_factor=scale_factor
-            )
-    # save image
-    result.images[0].save(
-        "imgs/result-{}.png".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
-    )
-    return replace_nsfw_images(result)
-def replace_nsfw_images(results):
-    if is_colab:
-        return results.images[0]
-    if hasattr(results, "nsfw_content_detected") and results.nsfw_content_detected:
-        for i in range(len(results.images)):
-            if results.nsfw_content_detected[i]:
-                results.images[i] = Image.open("nsfw.png")
-    return results.images[0]
-css = """.finetuned-diffusion-div div{display:inline-flex;align-items:center;gap:.8rem;font-size:1.75rem}.finetuned-diffusion-div div h1{font-weight:900;margin-bottom:7px}.finetuned-diffusion-div p{margin-bottom:10px;font-size:94%}a{text-decoration:underline}.tabs{margin-top:0;margin-bottom:0}#gallery{min-height:20rem}
-"""
-with gr.Blocks(css=css) as demo:
-    if not os.path.exists("imgs"):
-        os.mkdir("imgs")
-    gr.Markdown("# Super Resolution Anime Diffusion")
-    gr.Markdown(
-        "## Author: [yangheng95](https://github.com/yangheng95)  Github:[Github](https://github.com/yangheng95/stable-diffusion-webui)"
-    )
-    gr.Markdown(
-        "### This demo is running on a CPU, so it will take at least 20 minutes. "
-        "If you have a GPU, you can clone from [Github](https://github.com/yangheng95/SuperResolutionAnimeDiffusion) and run it locally."
-    )
-    gr.Markdown(
-        "### FYI: to generate a 512*512 image and magnify 4x, it only takes 5~8 seconds on a RTX 2080 GPU"
-    )
-    gr.Markdown(
-        "### You can duplicate this demo on HuggingFace Spaces, click [here](https://huggingface.co/spaces/yangheng/Super-Resolution-Anime-Diffusion?duplicate=true)"
-    )
-    with gr.Row():
-        with gr.Column(scale=55):
-            with gr.Group():
-                gr.Markdown("Text to image")
-                model_name = gr.Dropdown(
-                    label="Model",
-                    choices=[m.name for m in models],
-                    value=current_model.name,
-                )
-                with gr.Box(visible=False) as custom_model_group:
-                    custom_model_path = gr.Textbox(
-                        label="Custom model path",
-                        placeholder="Path to model, e.g. nitrosocke/Arcane-Diffusion",
-                        interactive=True,
-                    )
-                    gr.HTML(
-                        "<div><font size='2'>Custom models have to be downloaded first, so give it some time.</font></div>"
-                    )
-                with gr.Row():
-                    prompt = gr.Textbox(
-                        label="Prompt",
-                        show_label=False,
-                        max_lines=2,
-                        placeholder="Enter prompt. Style applied automatically",
-                    ).style(container=False)
-                with gr.Row():
-                    generate = gr.Button(value="Generate")
-                with gr.Row():
-                    with gr.Group():
-                        neg_prompt = gr.Textbox(
-                            label="Negative prompt",
-                            value="bad result, worst, random, invalid, inaccurate, imperfect, blurry, deformed,"
-                                  " disfigured, mutation, mutated, ugly, out of focus, bad anatomy, text, error,"
-                                  " extra digit, fewer digits, worst quality, low quality, normal quality, noise, "
-                                  "jpeg artifact, compression artifact, signature, watermark, username, logo, "
-                                  "low resolution, worst resolution, bad resolution, normal resolution, bad detail,"
-                                  " bad details, bad lighting, bad shadow, bad shading, bad background,"
-                                  " worst background.",
                         )
-                image_out = gr.Image(height="auto", width="auto")
-                error_output = gr.Markdown()
-                with gr.Row():
-                    gr.Markdown(
-                        "# Random Image Generation Preview (512*768)x4 magnified"
-                    )
-                for f_img in findfile.find_cwd_files(".png", recursive=2):
-                    with gr.Row():
-                        image = gr.Image(height=512, value=PIL.Image.open(f_img))
-                # gallery = gr.Gallery(
-                #     label="Generated images", show_label=False, elem_id="gallery"
-                # ).style(grid=[1], height="auto")
-        with gr.Column(scale=45):
-            with gr.Group():
-                gr.Markdown("Image to Image")
                 with gr.Row():
-                    with gr.Group():
-                        image = gr.Image(
-                            label="Image", height=256, tool="editor", type="pil"
                         )
-                        strength = gr.Slider(
-                            label="Transformation strength",
-                            minimum=0,
-                            maximum=1,
-                            step=0.01,
-                            value=0.5,
                         )
-                with gr.Row():
-                    with gr.Group():
-                        # n_images = gr.Slider(label="Images", value=1, minimum=1, maximum=4, step=1)
-                        with gr.Row():
-                            guidance = gr.Slider(
-                                label="Guidance scale", value=7.5, maximum=15
-                            )
-                            steps = gr.Slider(
-                                label="Steps", value=15, minimum=2, maximum=75, step=1
-                            )
-                        with gr.Row():
-                            width = gr.Slider(
-                                label="Width",
-                                value=512,
-                                minimum=64,
-                                maximum=1024,
-                                step=8,
-                            )
-                            height = gr.Slider(
-                                label="Height",
-                                value=768,
-                                minimum=64,
-                                maximum=1024,
-                                step=8,
-                            )
-                        with gr.Row():
-                            scale = gr.Radio(
-                                label="Scale",
-                                choices=["Waifu2x", "ESRGAN4x"],
-                                value="Waifu2x",
-                            )
-                        with gr.Row():
-                            scale_factor = gr.Slider(
-                                1,
-                                8,
-                                label="Scale factor (to magnify image) (1, 2, 4, 8)",
-                                value=1,
-                                step=1,
                             )
-                        seed = gr.Slider(
-                            0, 2147483647, label="Seed (0 = random)", value=0, step=1
                         )
-    if is_colab:
-        model_name.change(
-            on_model_change,
-            inputs=model_name,
-            outputs=[custom_model_group, prompt],
-            queue=False,
         )
-        custom_model_path.change(
-            custom_model_changed, inputs=custom_model_path, outputs=None
         )
-    # n_images.change(lambda n: gr.Gallery().style(grid=[2 if n > 1 else 1], height="auto"), inputs=n_images, outputs=gallery)
-    gr.Markdown(
-        "### based on [Anything V5]"
-    )
-    inputs = [
-        model_name,
-        prompt,
-        guidance,
-        steps,
-        width,
-        height,
-        seed,
-        image,
-        strength,
-        neg_prompt,
-        scale,
-        scale_factor,
-    ]
-    outputs = [image_out, error_output]
-    prompt.submit(inference, inputs=inputs, outputs=outputs)
-    generate.click(inference, inputs=inputs, outputs=outputs, api_name="generate")
-    prompt_keys = [
-        "girl",
-        "lovely",
-        "cute",
-        "beautiful eyes",
-        "cumulonimbus clouds",
-        random.choice(["dress"]),
-        random.choice(["white hair"]),
-        random.choice(["blue eyes"]),
-        random.choice(["flower meadow"]),
-        random.choice(["Elif", "Angel"]),
-    ]
-    prompt.value = ",".join(prompt_keys)
-    ex = gr.Examples(
-        [
-            [models[0].name, prompt.value, 7.5, 15],
-        ],
-        inputs=[model_name, prompt, guidance, steps, seed],
-        outputs=outputs,
-        fn=inference,
-        cache_examples=False,
-    )
-print(f"Space built in {time.time() - start_time:.2f} seconds")
-if not is_colab:
-    demo.queue(concurrency_count=2)
-demo.launch(debug=is_colab, enable_queue=True, share=is_colab)

+"""
+Super Resolution Anime Diffusion - Enhanced WebUI
+This is an enhanced version of the original Super Resolution Anime Diffusion project by yangheng95.
+The WebUI has been improved with modern Gradio API implementation, better user experience,
+and comprehensive documentation.
+Key Contributions:
+- Updated to use modern Gradio Blocks API for better interface organization
+- Added tabbed interface for Text-to-Image, Image-to-Image, and Gallery views
+- Improved error handling and user feedback with progress indicators
+- Enhanced UI styling with custom CSS and responsive design
+- Better parameter organization with collapsible accordions
+- Real-time system information display
+Instructions:
+1. Choose between Text-to-Image or Image-to-Image tabs
+2. Select a model from the dropdown (or provide custom model path)
+3. Enter your prompt and adjust parameters as needed
+4. For Image-to-Image: upload a base image to transform
+5. Configure super-resolution settings (method and scale factor)
+6. Click Generate to create high-quality anime images with automatic upscaling
+Original Author: yangheng95
+Original Repository: https://github.com/yangheng95/SuperResolutionAnimeDiffusion
+License: Creative ML Open RAIL-M
+Enhanced WebUI by AI Assistant
+"""
 import os
+import sys
 import zipfile
+from typing import Optional, List, Tuple
+from datetime import datetime
+import time
+import psutil
 import PIL.Image
 import autocuda
+import findfile
 from diffusers import (
     AutoencoderKL,
 import torch
 from PIL import Image
 import utils
 from Waifu2x.magnify import ImageMagnifier
 from RealESRGANv030.interface import realEsrgan
+sys.path.append(os.path.dirname(__file__))  # Ensure current directory is in path
+os.environ["PYTHONPATH"] = os.path.dirname(__file__)
+# Application Configuration
+APP_TITLE = "🎨 Super Resolution Anime Diffusion"
+APP_DESCRIPTION = """
+Generate high-quality anime images with automatic super resolution enhancement.
+Combines Stable Diffusion models with advanced upscaling techniques (RealESRGAN & Waifu2x).
+"""
+CONTRIBUTION_INFO = """
+### 🤝 Enhanced Features
+This interface improves upon the original work with:
+- **Modern UI**: Clean tabbed interface with Gradio Blocks
+- **Better UX**: Progress tracking and real-time feedback
+- **Enhanced Parameters**: Organized controls with descriptions
+- **Gallery View**: Browse and manage generated images
+- **Error Handling**: Comprehensive error reporting and recovery
+"""
+INSTRUCTIONS = """
+### 🚀 How to Use
+1. **Select Mode**: Choose Text-to-Image or Image-to-Image tab
+2. **Pick Model**: Select from available models or use custom path
+3. **Create Prompt**: Describe your desired image (use negative prompt to avoid elements)
+4. **Upload Image**: For img2img mode, provide base image
+5. **Adjust Settings**: Fine-tune resolution, steps, and guidance
+6. **Set Upscaling**: Choose super-resolution method and scale
+7. **Generate**: Click the generate button and wait for results!
+"""
+COPYRIGHT_INFO = """
+**Original Author**: [yangheng95](https://github.com/yangheng95) |
+**Repository**: [SuperResolutionAnimeDiffusion](https://github.com/yangheng95/SuperResolutionAnimeDiffusion) |
+**License**: Creative ML Open RAIL-M | **Enhanced by**: AI Assistant
+"""
+DEFAULT_NEGATIVE_PROMPT = "bad result, worst, random, invalid, inaccurate, imperfect, blurry, deformed, disfigured, mutation, mutated, ugly, out of focus, bad anatomy, text, error, extra digit, fewer digits, worst quality, low quality, normal quality, noise, jpeg artifact, compression artifact, signature, watermark, username, logo, low resolution, worst resolution, bad resolution, normal resolution, bad detail, bad details, bad lighting, bad shadow, bad shading, bad background, worst background"
+# Initialization
+magnifier = ImageMagnifier()
 start_time = time.time()
 is_colab = utils.is_google_colab()
 device = autocuda.auto_cuda()
 dtype = torch.float16 if device != "cpu" else torch.float32
+# Extract zip files if needed
+for z_file in findfile.find_cwd_files(and_key=['.zip'], exclude_key=['.ignore'], recursive=1):
+    try:
+        with zipfile.ZipFile(z_file, 'r') as zip_ref:
+            zip_ref.extractall()
+    except Exception as e:
+        print(f"Warning: Could not extract {z_file}: {e}")
 class Model:
+    """Model configuration class"""
+    def __init__(self, name: str, path: str = "", prefix: str = ""):
         self.name = name
         self.path = path
         self.prefix = prefix
         self.pipe_t2i = None
         self.pipe_i2i = None
+# Model configurations
 models = [
+    Model("Anything v4.5", "xyn-ai/anything-v4.0", "anything v4.5 style"),
 ]
+# Scheduler configuration
+scheduler = DPMSolverMultistepScheduler.from_config({
+    "beta_start": 0.00085,
+    "beta_end": 0.012,
+    "beta_schedule": "scaled_linear",
+    "num_train_timesteps": 1000,
+    "trained_betas": None,
+    "prediction_type": "epsilon",
+    "thresholding": False,
+    "algorithm_type": "dpmsolver++",
+    "solver_type": "midpoint",
+    "solver_order": 2,
+    "use_karras_sigmas": False,
+    "timestep_spacing": "leading",
+    "steps_offset": 1
+})
+# Global state
 custom_model = None
 if is_colab:
     models.insert(0, Model("Custom model"))
 last_mode = "txt2img"
 current_model = models[1] if is_colab else models[0]
 current_model_path = current_model.path
+pipe = None
+def initialize_models():
+    """Initialize diffusion models with error handling"""
+    global pipe
+    if is_colab:
         try:
+            pipe = StableDiffusionPipeline.from_pretrained(
+                current_model.path,
                 torch_dtype=dtype,
                 scheduler=scheduler,
                 safety_checker=None,
             )
         except Exception as e:
+            print(f"Failed to initialize model: {e}")
+            return
+    else:
+        print(f"{datetime.now()} Loading models...")
+        try:
+            vae = AutoencoderKL.from_pretrained(
+                current_model.path, subfolder="vae", torch_dtype=dtype
             )
+            for model in models[:]:
+                try:
+                    print(f"Loading {model.name}...")
+                    unet = UNet2DConditionModel.from_pretrained(
+                        model.path, subfolder="unet", torch_dtype=dtype
+                    )
+                    model.pipe_t2i = StableDiffusionPipeline.from_pretrained(
+                        model.path,
+                        unet=unet,
+                        vae=vae,
+                        torch_dtype=dtype,
+                        scheduler=scheduler,
+                        safety_checker=None,
+                    )
+                    model.pipe_i2i = StableDiffusionImg2ImgPipeline.from_pretrained(
+                        model.path,
+                        unet=unet,
+                        vae=vae,
+                        torch_dtype=dtype,
+                        scheduler=scheduler,
+                        safety_checker=None,
+                    )
+                    print(f"✅ {model.name} loaded successfully")
+                except Exception as e:
+                    print(f"❌ Failed to load {model.name}: {e}")
+                    models.remove(model)
+            if models:
+                pipe = models[0].pipe_t2i
+        except Exception as e:
+            print(f"Failed to initialize models: {e}")
+            return
+    if torch.cuda.is_available() and pipe:
+        pipe = pipe.to(device)
+def get_system_info() -> str:
+    """Get system information"""
+    gpu_name = "CPU"
+    if torch.cuda.is_available():
+        gpu_name = torch.cuda.get_device_name()
+    memory = psutil.virtual_memory()
+    return f"🖥️ Device: {gpu_name} | 💾 RAM: {memory.available // (1024**3):.1f}GB"
+def error_str(error: Exception, title: str = "Error") -> str:
+    """Format error messages"""
+    return f"### ❌ {title}\n```\n{str(error)}\n```"
+def custom_model_changed(path: str) -> str:
+    """Handle custom model path changes"""
+    if custom_model and path.strip():
+        models[0].path = path.strip()
+        global current_model
+        current_model = models[0]
+        return "✅ Custom model path updated"
+    return "❌ Please enter a valid model path"
+def on_model_change(model_name: str) -> Tuple[gr.update, gr.update]:
+    """Handle model selection changes"""
+    selected_model = next((m for m in models if m.name == model_name), None)
+    if selected_model and selected_model != models[0] if custom_model else True:
+        prefix_text = f'Prompt (automatically prefixed with "{selected_model.prefix}")'
+        is_custom = False
+    else:
+        prefix_text = "Enter prompt (remember to include model-specific prefix)"
+        is_custom = True
     return (
+        gr.update(visible=is_custom),
+        gr.update(placeholder=prefix_text),
     )
+def generate_image(
+    mode: str,
+    model_name: str,
+    prompt: str,
+    negative_prompt: str,
+    width: int,
+    height: int,
+    guidance_scale: float,
+    num_steps: int,
+    seed: int,
+    image: Optional[PIL.Image.Image],
+    strength: float,
+    scale_method: str,
+    scale_factor: int,
+    progress=gr.Progress()
+) -> Tuple[Optional[PIL.Image.Image], str]:
+    """Main image generation function"""
+    if progress:
+        progress(0, desc="Starting generation...")
+    # Validation
+    if not prompt.strip():
+        return None, "❌ Please enter a prompt"
+    if mode == "img2img" and image is None:
+        return None, "❌ Please upload an image for Image-to-Image mode"
+    # Find model
     global current_model
+    selected_model = next((m for m in models if m.name == model_name), None)
+    if not selected_model:
+        return None, error_str(ValueError(f"Model '{model_name}' not found"))
+    current_model = selected_model
+    if progress:
+        progress(0.1, desc=f"Using {model_name}")
+    # Setup generator
+    if seed <= 0:
+        seed = torch.randint(0, 2**32-1, (1,)).item()
+    generator = torch.Generator(device).manual_seed(seed)
     try:
+        if mode == "img2img":
+            result_image = img_to_img(
+                current_model.path, prompt, negative_prompt, image, strength,
+                guidance_scale, num_steps, width, height, generator,
+                scale_method, scale_factor, progress
             )
         else:
+            result_image = txt_to_img(
+                current_model.path, prompt, negative_prompt, guidance_scale,
+                num_steps, width, height, generator, scale_method, scale_factor,
+                progress
             )
+        if progress:
+            progress(1.0, desc="Complete!")
+        # Save result
+        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+        os.makedirs("imgs", exist_ok=True)
+        filename = f"imgs/result-{timestamp}.png"
+        result_image.save(filename)
+        info = f"""### ✅ Generation Complete
+- **Mode**: {mode}
+- **Model**: {model_name}
+- **Resolution**: {result_image.size[0]}x{result_image.size[1]}
+- **Scale**: {scale_factor}x ({scale_method})
+- **Seed**: {seed}
+- **Saved**: {filename}"""
+        return result_image, info
+    except Exception as e:
+        print(f"Generation error: {e}")
+        return None, error_str(e, "Generation Failed")
 def txt_to_img(
+    model_path: str, prompt: str, neg_prompt: str, guidance: float,
+    steps: int, width: int, height: int, generator, scale: str,
+    scale_factor: int, progress
+) -> PIL.Image.Image:
+    """Text-to-image generation"""
+    global last_mode, pipe, current_model_path
+    if progress:
+        progress(0.2, desc="Loading pipeline...")
+    # Load pipeline if needed
     if model_path != current_model_path or last_mode != "txt2img":
         current_model_path = model_path
                 current_model_path,
                 torch_dtype=dtype,
                 scheduler=scheduler,
+                safety_checker=None,
             )
         else:
             pipe = current_model.pipe_t2i
         if torch.cuda.is_available():
             pipe = pipe.to(device)
         last_mode = "txt2img"
+    if progress:
+        progress(0.4, desc="Generating image...")
+    # Add model prefix
+    full_prompt = f"{current_model.prefix}, {prompt}" if current_model.prefix else prompt
     result = pipe(
+        full_prompt,
         negative_prompt=neg_prompt,
         num_inference_steps=int(steps),
         guidance_scale=guidance,
         width=width,
         height=height,
         generator=generator,
+    ).images[0]
+    if progress:
+        progress(0.7, desc="Applying super resolution...")
+    # Apply super resolution
     if scale_factor > 1:
+        if scale == "RealESRGAN":
+            fp32 = device == "cpu"
+            result = realEsrgan(
+                input_dir=result,
                 suffix="",
                 output_dir="imgs",
                 fp32=fp32,
                 outscale=scale_factor,
             )[0]
+        else:  # Waifu2x
+            result = magnifier.magnify(result, scale_factor=scale_factor)
+    return result
 def img_to_img(
+    model_path: str, prompt: str, neg_prompt: str, img: PIL.Image.Image,
+    strength: float, guidance: float, steps: int, width: int, height: int,
+    generator, scale: str, scale_factor: int, progress
+) -> PIL.Image.Image:
+    """Image-to-image generation"""
+    global last_mode, pipe, current_model_path
+    if progress:
+        progress(0.2, desc="Loading pipeline...")
+    # Load pipeline if needed
     if model_path != current_model_path or last_mode != "img2img":
         current_model_path = model_path
                 current_model_path,
                 torch_dtype=dtype,
                 scheduler=scheduler,
+                safety_checker=None,
             )
         else:
             pipe = current_model.pipe_i2i
         if torch.cuda.is_available():
             pipe = pipe.to(device)
         last_mode = "img2img"
+    # Resize input image
+    if progress:
+        progress(0.3, desc="Processing input image...")
     ratio = min(height / img.height, width / img.width)
     img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
+    # Add model prefix
+    full_prompt = f"{current_model.prefix}, {prompt}" if current_model.prefix else prompt
+    if progress:
+        progress(0.4, desc="Transforming image...")
     result = pipe(
+        full_prompt,
         negative_prompt=neg_prompt,
         image=img,
         num_inference_steps=int(steps),
         strength=strength,
         guidance_scale=guidance,
         generator=generator,
+    ).images[0]
+    if progress:
+        progress(0.7, desc="Applying super resolution...")
+    # Apply super resolution
     if scale_factor > 1:
+        if scale == "RealESRGAN":
+            fp32 = device == "cpu"
+            result = realEsrgan(
+                input_dir=result,
                 suffix="",
                 output_dir="imgs",
                 fp32=fp32,
                 outscale=scale_factor,
             )[0]
+        else:  # Waifu2x
+            result = magnifier.magnify(result, scale_factor=scale_factor)
+    return result
+def load_example_images() -> List[str]:
+    """Load example images for gallery"""
+    example_images = []
+    for f_img in findfile.find_cwd_files(".png", recursive=2):
+        if "result-" in os.path.basename(f_img) or "random_examples" in f_img:
+            example_images.append(f_img)
+    return example_images[:12]  # Limit examples
+# Custom CSS for styling
+custom_css = """
+.gradio-container {
+    font-family: 'Segoe UI', system-ui, sans-serif;
+    max-width: 1400px;
+    margin: 0 auto;
+}
+.header-section {
+    text-align: center;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    color: white;
+    padding: 2rem;
+    border-radius: 15px;
+    margin-bottom: 2rem;
+}
+.info-card {
+    background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
+    color: white;
+    padding: 1.5rem;
+    border-radius: 10px;
+    margin: 1rem 0;
+}
+.status-info {
+    background: #e8f5e8;
+    border-left: 4px solid #4CAF50;
+    padding: 1rem;
+    border-radius: 5px;
+    margin: 1rem 0;
+}
+.generate-btn {
+    background: linear-gradient(45deg, #FF6B6B, #4ECDC4) !important;
+    border: none !important;
+    border-radius: 25px !important;
+    padding: 15px 30px !important;
+    font-size: 16px !important;
+    font-weight: bold !important;
+    color: white !important;
+    transition: all 0.3s ease !important;
+}
+.generate-btn:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 10px 20px rgba(0,0,0,0.2) !important;
+}
+"""
+def create_interface():
+    """Create the Gradio interface"""
+    with gr.Blocks(title=APP_TITLE, css=custom_css) as demo:
+        # Header
+        with gr.Row():
+            gr.HTML(f"""
+            <div class="header-section">
+                <h1 style="font-size: 2.5rem; margin-bottom: 1rem;">{APP_TITLE}</h1>
+                <p style="font-size: 1.2rem; margin-bottom: 1rem;">{APP_DESCRIPTION}</p>
+                <div style="font-size: 1rem;">{get_system_info()}</div>
+            </div>
+            """)
+        # Info sections
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown(INSTRUCTIONS, elem_classes=["info-card"])
+            with gr.Column():
+                gr.Markdown(CONTRIBUTION_INFO, elem_classes=["info-card"])
+        # Copyright
+        gr.Markdown(f"### 📄 {COPYRIGHT_INFO}", elem_classes=["status-info"])
+        # Main interface
+        with gr.Tabs():
+            # Text-to-Image Tab
+            with gr.TabItem("🎨 Text-to-Image"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        # Model selection
+                        model_dropdown = gr.Dropdown(
+                            choices=[m.name for m in models],
+                            value=current_model.name,
+                            label="🤖 Model Selection"
+                        )
+                        # Custom model path
+                        custom_model_path = gr.Textbox(
+                            label="🔗 Custom Model Path (HuggingFace)",
+                            placeholder="username/model-name",
+                            visible=custom_model is not None
+                        )
+                        # Prompts
+                        prompt_txt2img = gr.Textbox(
+                            label="✨ Prompt",
+                            placeholder="Describe your desired image...",
+                            lines=3
                         )
+                        negative_prompt_txt2img = gr.Textbox(
+                            label="🚫 Negative Prompt",
+                            value=DEFAULT_NEGATIVE_PROMPT,
+                            lines=2
+                        )
+                        # Parameters
+                        with gr.Accordion("🎛️ Generation Parameters", open=False):
+                            with gr.Row():
+                                width_txt2img = gr.Slider(256, 1024, 512, step=64, label="Width")
+                                height_txt2img = gr.Slider(256, 1024, 512, step=64, label="Height")
+                            with gr.Row():
+                                guidance_scale_txt2img = gr.Slider(1, 20, 7.5, step=0.5, label="Guidance Scale")
+                                num_steps_txt2img = gr.Slider(10, 50, 20, label="Steps")
+                            seed_txt2img = gr.Number(label="Seed (-1 for random)", value=-1, precision=0)
+                        # Super Resolution
+                        with gr.Accordion("🔍 Super Resolution", open=True):
+                            scale_method_txt2img = gr.Radio(
+                                choices=["RealESRGAN", "Waifu2x"],
+                                value="RealESRGAN",
+                                label="Method"
+                            )
+                            scale_factor_txt2img = gr.Slider(1, 4, 2, step=1, label="Scale Factor")
+                    with gr.Column(scale=1):
+                        # Generate button
+                        generate_btn_txt2img = gr.Button(
+                            "🎨 Generate Image",
+                            variant="primary",
+                            elem_classes=["generate-btn"]
+                        )
+                        # Output
+                        output_image_txt2img = gr.Image(label="Generated Image", type="pil")
+                        output_info_txt2img = gr.Markdown("Ready to generate! 🚀")
+            # Image-to-Image Tab
+            with gr.TabItem("🖼️ Image-to-Image"):
                 with gr.Row():
+                    with gr.Column(scale=1):
+                        # Input image
+                        input_image_img2img = gr.Image(
+                            label="📤 Input Image",
+                            type="pil"
                         )
+                        # Model selection
+                        model_dropdown_img2img = gr.Dropdown(
+                            choices=[m.name for m in models],
+                            value=current_model.name,
+                            label="🤖 Model Selection"
                         )
+                        # Prompts
+                        prompt_img2img = gr.Textbox(
+                            label="✨ Transformation Prompt",
+                            placeholder="How to transform the image...",
+                            lines=3
+                        )
+                        negative_prompt_img2img = gr.Textbox(
+                            label="🚫 Negative Prompt",
+                            value=DEFAULT_NEGATIVE_PROMPT,
+                            lines=2
+                        )
+                        # Parameters
+                        with gr.Accordion("🎛️ Generation Parameters", open=False):
+                            with gr.Row():
+                                width_img2img = gr.Slider(256, 1024, 512, step=64, label="Width")
+                                height_img2img = gr.Slider(256, 1024, 512, step=64, label="Height")
+                            strength_img2img = gr.Slider(
+                                0.1, 1.0, 0.75, step=0.05,
+                                label="Strength (how much to change)"
                             )
+                            with gr.Row():
+                                guidance_scale_img2img = gr.Slider(1, 20, 7.5, step=0.5, label="Guidance")
+                                num_steps_img2img = gr.Slider(10, 50, 20, label="Steps")
+                            seed_img2img = gr.Number(label="Seed (-1 for random)", value=-1, precision=0)
+                        # Super Resolution
+                        with gr.Accordion("🔍 Super Resolution", open=True):
+                            scale_method_img2img = gr.Radio(
+                                choices=["RealESRGAN", "Waifu2x"],
+                                value="RealESRGAN",
+                                label="Method"
+                            )
+                            scale_factor_img2img = gr.Slider(1, 4, 2, step=1, label="Scale Factor")
+                    with gr.Column(scale=1):
+                        # Generate button
+                        generate_btn_img2img = gr.Button(
+                            "🖼️ Transform Image",
+                            variant="primary",
+                            elem_classes=["generate-btn"]
                         )
+                        # Output
+                        output_image_img2img = gr.Image(label="Transformed Image", type="pil")
+                        output_info_img2img = gr.Markdown("Upload an image to transform! 🖼️")
+            # Gallery Tab
+            with gr.TabItem("🖼️ Gallery"):
+                gr.Markdown("### 🎨 Generated Images")
+                with gr.Row():
+                    refresh_gallery_btn = gr.Button("🔄 Refresh Gallery", variant="secondary")
+                example_gallery = gr.Gallery(
+                    value=load_example_images(),
+                    label="Results Gallery",
+                    show_label=False,
+                    columns=4,
+                    height="auto"
+                )
+        # Event handlers
+        # Model changes
+        model_dropdown.change(
+            fn=on_model_change,
+            inputs=[model_dropdown],
+            outputs=[custom_model_path, prompt_txt2img]
         )
+        # Sync models between tabs
+        model_dropdown.change(
+            fn=lambda x: gr.update(value=x),
+            inputs=[model_dropdown],
+            outputs=[model_dropdown_img2img]
         )
+        model_dropdown_img2img.change(
+            fn=lambda x: gr.update(value=x),
+            inputs=[model_dropdown_img2img],
+            outputs=[model_dropdown]
+        )
+        # Custom model path
+        if custom_model:
+            custom_model_path.change(
+                fn=custom_model_changed,
+                inputs=[custom_model_path],
+                outputs=[output_info_txt2img]
+            )
+        # Generation events
+        generate_btn_txt2img.click(
+            fn=generate_image,
+            inputs=[
+                gr.State("txt2img"),
+                model_dropdown,
+                prompt_txt2img,
+                negative_prompt_txt2img,
+                width_txt2img,
+                height_txt2img,
+                guidance_scale_txt2img,
+                num_steps_txt2img,
+                seed_txt2img,
+                gr.State(None),  # No input image for txt2img
+                gr.State(0.75),  # Default strength
+                scale_method_txt2img,
+                scale_factor_txt2img
+            ],
+            outputs=[output_image_txt2img, output_info_txt2img]
+        )
+        generate_btn_img2img.click(
+            fn=generate_image,
+            inputs=[
+                gr.State("img2img"),
+                model_dropdown_img2img,
+                prompt_img2img,
+                negative_prompt_img2img,
+                width_img2img,
+                height_img2img,
+                guidance_scale_img2img,
+                num_steps_img2img,
+                seed_img2img,
+                input_image_img2img,
+                strength_img2img,
+                scale_method_img2img,
+                scale_factor_img2img
+            ],
+            outputs=[output_image_img2img, output_info_img2img]
+        )
+        # Gallery refresh
+        refresh_gallery_btn.click(
+            fn=load_example_images,
+            outputs=[example_gallery]
+        )
+    return demo
+if __name__ == "__main__":
+    # Initialize
+    print(f"🚀 Starting {APP_TITLE}...")
+    print(f"⏱️ Initialization time: {time.time() - start_time:.2f}s")
+    print(f"🖥️ {get_system_info()}")
+    # Ensure output directory
+    os.makedirs("imgs", exist_ok=True)
+    # Initialize models
+    initialize_models()
+    # Create and launch interface
+    demo = create_interface()
+    # Launch settings
+    launch_kwargs = {
+        "share": False,
+        "server_name": "0.0.0.0",
+        "server_port": 7860,
+        "show_error": True,
+    }
+    if is_colab:
+        launch_kwargs["share"] = True
+    print("🌐 Launching WebUI...")
+    demo.launch(**launch_kwargs)

gfpgan/weights/detection_Resnet50_Final.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d1de9c2944f2ccddca5f5e010ea5ae64a39845a86311af6fdf30841b0a5a16d
+size 109497761

gfpgan/weights/parsing_parsenet.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d558d8d0e42c20224f13cf5a29c79eba2d59913419f945545d8cf7b72920de2
+size 85331193

huggingface_hub/README.md ADDED Viewed

	@@ -0,0 +1,358 @@

+# Hugging Face Hub Client library
+## Download files from the Hub
+The `hf_hub_download()` function is the main function to download files from the Hub. One
+advantage of using it is that files are cached locally, so you won't have to
+download the files multiple times. If there are changes in the repository, the
+files will be automatically downloaded again.
+### `hf_hub_download`
+The function takes the following parameters, downloads the remote file,
+stores it to disk (in a version-aware way) and returns its local file path.
+Parameters:
+- a `repo_id` (a user or organization name and a repo name, separated by `/`, like `julien-c/EsperBERTo-small`)
+- a `filename` (like `pytorch_model.bin`)
+- an optional Git revision id (can be a branch name, a tag, or a commit hash)
+- a `cache_dir` which you can specify if you want to control where on disk the
+  files are cached.
+```python
+from huggingface_hub import hf_hub_download
+hf_hub_download("lysandre/arxiv-nlp", filename="config.json")
+```
+### `snapshot_download`
+Using `hf_hub_download()` works well when you know which files you want to download;
+for example a model file alongside a configuration file, both with static names.
+There are cases in which you will prefer to download all the files of the remote
+repository at a specified revision. That's what `snapshot_download()` does. It
+downloads and stores a remote repository to disk (in a versioning-aware way) and
+returns its local file path.
+Parameters:
+- a `repo_id` in the format `namespace/repository`
+- a `revision` on which the repository will be downloaded
+- a `cache_dir` which you can specify if you want to control where on disk the
+  files are cached
+### `hf_hub_url`
+Internally, the library uses `hf_hub_url()` to return the URL to download the actual files:
+`https://huggingface.co/julien-c/EsperBERTo-small/resolve/main/pytorch_model.bin`
+Parameters:
+- a `repo_id` (a user or organization name and a repo name separated by a `/`, like `julien-c/EsperBERTo-small`)
+- a `filename` (like `pytorch_model.bin`)
+- an optional `subfolder`, corresponding to a folder inside the model repo
+- an optional `repo_type`, such as `dataset` or `space`
+- an optional Git revision id (can be a branch name, a tag, or a commit hash)
+If you check out this URL's headers with a `HEAD` http request (which you can do
+from the command line with `curl -I`) for a few different files, you'll see
+that:
+- small files are returned directly
+- large files (i.e. the ones stored through
+  [git-lfs](https://git-lfs.github.com/)) are returned via a redirect to a
+  Cloudfront URL. Cloudfront is a Content Delivery Network, or CDN, that ensures
+  that downloads are as fast as possible from anywhere on the globe.
+<br>
+## Publish files to the Hub
+If you've used Git before, this will be very easy since Git is used to manage
+files in the Hub. You can find a step-by-step guide on how to upload your model
+to the Hub: https://huggingface.co/docs/hub/adding-a-model.
+### API utilities in `hf_api.py`
+You don't need them for the standard publishing workflow (ie. using git command line), however, if you need a
+programmatic way of creating a repo, deleting it (`⚠️ caution`), pushing a
+single file to a repo or listing models from the Hub, you'll find helpers in
+`hf_api.py`. Some example functionality available with the `HfApi` class:
+* `whoami()`
+* `create_repo()`
+* `list_repo_files()`
+* `list_repo_objects()`
+* `delete_repo()`
+* `update_repo_visibility()`
+* `create_commit()`
+* `upload_file()`
+* `delete_file()`
+* `delete_folder()`
+Those API utilities are also exposed through the `huggingface-cli` CLI:
+```bash
+huggingface-cli login
+huggingface-cli logout
+huggingface-cli whoami
+huggingface-cli repo create
+```
+With the `HfApi` class there are methods to query models, datasets, and metrics by specific tags (e.g. if you want to list models compatible with your library):
+- **Models**:
+  - `list_models()`
+  - `model_info()`
+  - `get_model_tags()`
+- **Datasets**:
+  - `list_datasets()`
+  - `dataset_info()`
+  - `get_dataset_tags()`
+- **Spaces**:
+  - `list_spaces()`
+  - `space_info()`
+These lightly wrap around the API Endpoints. Documentation for valid parameters and descriptions can be found [here](https://huggingface.co/docs/hub/endpoints).
+### Advanced programmatic repository management
+The `Repository` class helps manage both offline Git repositories and Hugging
+Face Hub repositories. Using the `Repository` class requires `git` and `git-lfs`
+to be installed.
+Instantiate a `Repository` object by calling it with a path to a local Git
+clone/repository:
+```python
+>>> from huggingface_hub import Repository
+>>> repo = Repository("<path>/<to>/<folder>")
+```
+The `Repository` takes a `clone_from` string as parameter. This can stay as
+`None` for offline management, but can also be set to any URL pointing to a Git
+repo to clone that repository in the specified directory:
+```python
+>>> repo = Repository("huggingface-hub", clone_from="https://github.com/huggingface/huggingface_hub")
+```
+The `clone_from` method can also take any Hugging Face model ID as input, and
+will clone that repository:
+```python
+>>> repo = Repository("w2v2", clone_from="facebook/wav2vec2-large-960h-lv60")
+```
+If the repository you're cloning is one of yours or one of your organisation's,
+then having the ability to commit and push to that repository is important. In
+order to do that, you should make sure to be logged-in using `huggingface-cli
+login`, and to have the `token` parameter set to `True` (the default)
+when  instantiating the `Repository` object:
+```python
+>>> repo = Repository("my-model", clone_from="<user>/<model_id>", token=True)
+```
+This works for models, datasets and spaces repositories; but you will need to
+explicitely specify the type for the last two options:
+```python
+>>> repo = Repository("my-dataset", clone_from="<user>/<dataset_id>", token=True, repo_type="dataset")
+```
+You can also change between branches:
+```python
+>>> repo = Repository("huggingface-hub", clone_from="<user>/<dataset_id>", revision='branch1')
+>>> repo.git_checkout("branch2")
+```
+The `clone_from` method can also take any Hugging Face model ID as input, and
+will clone that repository:
+```python
+>>> repo = Repository("w2v2", clone_from="facebook/wav2vec2-large-960h-lv60")
+```
+Finally, you can choose to specify the Git username and email attributed to that
+clone directly by using the `git_user` and `git_email` parameters. When
+committing to that repository, Git will therefore be aware of who you are and
+who will be the author of the commits:
+```python
+>>> repo = Repository(
+...   "my-dataset",
+...   clone_from="<user>/<dataset_id>",
+...   token=True,
+...   repo_type="dataset",
+...   git_user="MyName",
+...   git_email="[email protected]"
+... )
+```
+The repository can be managed through this object, through wrappers of
+traditional Git methods:
+- `git_add(pattern: str, auto_lfs_track: bool)`. The `auto_lfs_track` flag
+  triggers auto tracking of large files (>10MB) with `git-lfs`
+- `git_commit(commit_message: str)`
+- `git_pull(rebase: bool)`
+- `git_push()`
+- `git_checkout(branch)`
+The `git_push` method has a parameter `blocking` which is `True` by default. When set to `False`, the push will
+happen behind the scenes - which can be helpful if you would like your script to continue on while the push is
+happening.
+LFS-tracking methods:
+- `lfs_track(pattern: Union[str, List[str]], filename: bool)`. Setting
+  `filename` to `True` will use the `--filename` parameter, which will consider
+  the pattern(s) as filenames, even if they contain special glob characters.
+- `lfs_untrack()`.
+- `auto_track_large_files()`: automatically tracks files that are larger than
+  10MB. Make sure to call this after adding files to the index.
+On top of these unitary methods lie some useful additional methods:
+- `push_to_hub(commit_message)`: consecutively does `git_add`, `git_commit` and
+  `git_push`.
+- `commit(commit_message: str, track_large_files: bool)`: this is a context
+  manager utility that handles committing to a repository. This automatically
+  tracks large files (>10Mb) with `git-lfs`. The `track_large_files` argument can
+  be set to `False` if you wish to ignore that behavior.
+These two methods also have support for the `blocking` parameter.
+Examples using the `commit` context manager:
+```python
+>>> with Repository("text-files", clone_from="<user>/text-files", token=True).commit("My first file :)"):
+...     with open("file.txt", "w+") as f:
+...         f.write(json.dumps({"hey": 8}))
+```
+```python
+>>> import torch
+>>> model = torch.nn.Transformer()
+>>> with Repository("torch-model", clone_from="<user>/torch-model", token=True).commit("My cool model :)"):
+...     torch.save(model.state_dict(), "model.pt")
+  ```
+### Non-blocking behavior
+The pushing methods have access to a `blocking` boolean parameter to indicate whether the push should happen
+asynchronously.
+In order to see if the push has finished or its status code (to spot a failure), one should use the `command_queue`
+property on the `Repository` object.
+For example:
+```python
+from huggingface_hub import Repository
+repo = Repository("<local_folder>", clone_from="<user>/<model_name>")
+with repo.commit("Commit message", blocking=False):
+    # Save data
+last_command = repo.command_queue[-1]
+# Status of the push command
+last_command.status
+# Will return the status code
+#     -> -1 will indicate the push is still ongoing
+#     -> 0 will indicate the push has completed successfully
+#     -> non-zero code indicates the error code if there was an error
+# if there was an error, the stderr may be inspected
+last_command.stderr
+# Whether the command finished or if it is still ongoing
+last_command.is_done
+# Whether the command errored-out.
+last_command.failed
+```
+When using `blocking=False`, the commands will be tracked and your script will exit only when all pushes are done, even
+if other errors happen in your script (a failed push counts as done).
+### Need to upload very large (>5GB) files?
+To upload large files (>5GB 🔥) from git command-line, you need to install the custom transfer agent
+for git-lfs, bundled in this package.
+To install, just run:
+```bash
+$ huggingface-cli lfs-enable-largefiles
+```
+This should be executed once for each model repo that contains a model file
+>5GB. If you just try to push a file bigger than 5GB without running that
+command, you will get an error with a message reminding you to run it.
+Finally, there's a `huggingface-cli lfs-multipart-upload` command but that one
+is internal (called by lfs directly) and is not meant to be called by the user.
+<br>
+## Using the Inference API wrapper
+`huggingface_hub` comes with a wrapper client to make calls to the Inference
+API! You can find some examples below, but we encourage you to visit the
+Inference API
+[documentation](https://api-inference.huggingface.co/docs/python/html/detailed_parameters.html)
+to review the specific parameters for the different tasks.
+When you instantiate the wrapper to the Inference API, you specify the model
+repository id. The pipeline (`text-classification`,  `text-to-speech`, etc) is
+automatically extracted from the
+[repository](https://huggingface.co/docs/hub/main#how-is-a-models-type-of-inference-api-and-widget-determined),
+but you can also override it as shown below.
+### Examples
+Here is a basic example of calling the Inference API for a `fill-mask` task
+using the `bert-base-uncased` model. The `fill-mask` task only expects a string
+(or list of strings) as input.
+```python
+from huggingface_hub.inference_api import InferenceApi
+inference = InferenceApi("bert-base-uncased", token=API_TOKEN)
+inference(inputs="The goal of life is [MASK].")
+>> [{'sequence': 'the goal of life is life.', 'score': 0.10933292657136917, 'token': 2166, 'token_str': 'life'}]
+```
+This is an example of a task (`question-answering`) which requires a dictionary
+as input thas has the `question` and `context` keys.
+```python
+inference = InferenceApi("deepset/roberta-base-squad2", token=API_TOKEN)
+inputs = {"question":"What's my name?", "context":"My name is Clara and I live in Berkeley."}
+inference(inputs)
+>> {'score': 0.9326569437980652, 'start': 11, 'end': 16, 'answer': 'Clara'}
+```
+Some tasks might also require additional params in the request. Here is an
+example using a `zero-shot-classification` model.
+```python
+inference = InferenceApi("typeform/distilbert-base-uncased-mnli", token=API_TOKEN)
+inputs = "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!"
+params = {"candidate_labels":["refund", "legal", "faq"]}
+inference(inputs, params)
+>> {'sequence': 'Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!', 'labels': ['refund', 'faq', 'legal'], 'scores': [0.9378499388694763, 0.04914155602455139, 0.013008488342165947]}
+```
+Finally, there are some models that might support multiple tasks. For example,
+`sentence-transformers` models can do `sentence-similarity` and
+`feature-extraction`. You can override the configured task when initializing the
+API.
+```python
+inference = InferenceApi("bert-base-uncased", task="feature-extraction", token=API_TOKEN)
+```

huggingface_hub/__init__.py ADDED Viewed

	@@ -0,0 +1,968 @@

+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ***********
+# `huggingface_hub` init has 2 modes:
+# - Normal usage:
+#       If imported to use it, all modules and functions are lazy-loaded. This means
+#       they exist at top level in module but are imported only the first time they are
+#       used. This way, `from huggingface_hub import something` will import `something`
+#       quickly without the hassle of importing all the features from `huggingface_hub`.
+# - Static check:
+#       If statically analyzed, all modules and functions are loaded normally. This way
+#       static typing check works properly as well as autocomplete in text editors and
+#       IDEs.
+#
+# The static model imports are done inside the `if TYPE_CHECKING:` statement at
+# the bottom of this file. Since module/functions imports are duplicated, it is
+# mandatory to make sure to add them twice when adding one. This is checked in the
+# `make quality` command.
+#
+# To update the static imports, please run the following command and commit the changes.
+# ```
+# # Use script
+# python utils/check_static_imports.py --update-file
+#
+# # Or run style on codebase
+# make style
+# ```
+#
+# ***********
+# Lazy loader vendored from https://github.com/scientific-python/lazy_loader
+import importlib
+import os
+import sys
+from typing import TYPE_CHECKING
+__version__ = "0.25.2"
+# Alphabetical order of definitions is ensured in tests
+# WARNING: any comment added in this dictionary definition will be lost when
+# re-generating the file !
+_SUBMOD_ATTRS = {
+    "_commit_scheduler": [
+        "CommitScheduler",
+    ],
+    "_inference_endpoints": [
+        "InferenceEndpoint",
+        "InferenceEndpointError",
+        "InferenceEndpointStatus",
+        "InferenceEndpointTimeoutError",
+        "InferenceEndpointType",
+    ],
+    "_login": [
+        "interpreter_login",
+        "login",
+        "logout",
+        "notebook_login",
+    ],
+    "_multi_commits": [
+        "MultiCommitException",
+        "plan_multi_commits",
+    ],
+    "_snapshot_download": [
+        "snapshot_download",
+    ],
+    "_space_api": [
+        "SpaceHardware",
+        "SpaceRuntime",
+        "SpaceStage",
+        "SpaceStorage",
+        "SpaceVariable",
+    ],
+    "_tensorboard_logger": [
+        "HFSummaryWriter",
+    ],
+    "_webhooks_payload": [
+        "WebhookPayload",
+        "WebhookPayloadComment",
+        "WebhookPayloadDiscussion",
+        "WebhookPayloadDiscussionChanges",
+        "WebhookPayloadEvent",
+        "WebhookPayloadMovedTo",
+        "WebhookPayloadRepo",
+        "WebhookPayloadUrl",
+        "WebhookPayloadWebhook",
+    ],
+    "_webhooks_server": [
+        "WebhooksServer",
+        "webhook_endpoint",
+    ],
+    "community": [
+        "Discussion",
+        "DiscussionComment",
+        "DiscussionCommit",
+        "DiscussionEvent",
+        "DiscussionStatusChange",
+        "DiscussionTitleChange",
+        "DiscussionWithDetails",
+    ],
+    "constants": [
+        "CONFIG_NAME",
+        "FLAX_WEIGHTS_NAME",
+        "HUGGINGFACE_CO_URL_HOME",
+        "HUGGINGFACE_CO_URL_TEMPLATE",
+        "PYTORCH_WEIGHTS_NAME",
+        "REPO_TYPE_DATASET",
+        "REPO_TYPE_MODEL",
+        "REPO_TYPE_SPACE",
+        "TF2_WEIGHTS_NAME",
+        "TF_WEIGHTS_NAME",
+    ],
+    "fastai_utils": [
+        "_save_pretrained_fastai",
+        "from_pretrained_fastai",
+        "push_to_hub_fastai",
+    ],
+    "file_download": [
+        "HfFileMetadata",
+        "_CACHED_NO_EXIST",
+        "cached_download",
+        "get_hf_file_metadata",
+        "hf_hub_download",
+        "hf_hub_url",
+        "try_to_load_from_cache",
+    ],
+    "hf_api": [
+        "Collection",
+        "CollectionItem",
+        "CommitInfo",
+        "CommitOperation",
+        "CommitOperationAdd",
+        "CommitOperationCopy",
+        "CommitOperationDelete",
+        "DatasetInfo",
+        "GitCommitInfo",
+        "GitRefInfo",
+        "GitRefs",
+        "HfApi",
+        "ModelInfo",
+        "RepoUrl",
+        "SpaceInfo",
+        "User",
+        "UserLikes",
+        "WebhookInfo",
+        "WebhookWatchedItem",
+        "accept_access_request",
+        "add_collection_item",
+        "add_space_secret",
+        "add_space_variable",
+        "auth_check",
+        "cancel_access_request",
+        "change_discussion_status",
+        "comment_discussion",
+        "create_branch",
+        "create_collection",
+        "create_commit",
+        "create_commits_on_pr",
+        "create_discussion",
+        "create_inference_endpoint",
+        "create_pull_request",
+        "create_repo",
+        "create_tag",
+        "create_webhook",
+        "dataset_info",
+        "delete_branch",
+        "delete_collection",
+        "delete_collection_item",
+        "delete_file",
+        "delete_folder",
+        "delete_inference_endpoint",
+        "delete_repo",
+        "delete_space_secret",
+        "delete_space_storage",
+        "delete_space_variable",
+        "delete_tag",
+        "delete_webhook",
+        "disable_webhook",
+        "duplicate_space",
+        "edit_discussion_comment",
+        "enable_webhook",
+        "file_exists",
+        "get_collection",
+        "get_dataset_tags",
+        "get_discussion_details",
+        "get_full_repo_name",
+        "get_inference_endpoint",
+        "get_model_tags",
+        "get_paths_info",
+        "get_repo_discussions",
+        "get_safetensors_metadata",
+        "get_space_runtime",
+        "get_space_variables",
+        "get_token_permission",
+        "get_user_overview",
+        "get_webhook",
+        "grant_access",
+        "like",
+        "list_accepted_access_requests",
+        "list_collections",
+        "list_datasets",
+        "list_inference_endpoints",
+        "list_liked_repos",
+        "list_metrics",
+        "list_models",
+        "list_organization_members",
+        "list_pending_access_requests",
+        "list_rejected_access_requests",
+        "list_repo_commits",
+        "list_repo_files",
+        "list_repo_likers",
+        "list_repo_refs",
+        "list_repo_tree",
+        "list_spaces",
+        "list_user_followers",
+        "list_user_following",
+        "list_webhooks",
+        "merge_pull_request",
+        "model_info",
+        "move_repo",
+        "parse_safetensors_file_metadata",
+        "pause_inference_endpoint",
+        "pause_space",
+        "preupload_lfs_files",
+        "reject_access_request",
+        "rename_discussion",
+        "repo_exists",
+        "repo_info",
+        "repo_type_and_id_from_hf_id",
+        "request_space_hardware",
+        "request_space_storage",
+        "restart_space",
+        "resume_inference_endpoint",
+        "revision_exists",
+        "run_as_future",
+        "scale_to_zero_inference_endpoint",
+        "set_space_sleep_time",
+        "space_info",
+        "super_squash_history",
+        "unlike",
+        "update_collection_item",
+        "update_collection_metadata",
+        "update_inference_endpoint",
+        "update_repo_settings",
+        "update_repo_visibility",
+        "update_webhook",
+        "upload_file",
+        "upload_folder",
+        "upload_large_folder",
+        "whoami",
+    ],
+    "hf_file_system": [
+        "HfFileSystem",
+        "HfFileSystemFile",
+        "HfFileSystemResolvedPath",
+        "HfFileSystemStreamFile",
+    ],
+    "hub_mixin": [
+        "ModelHubMixin",
+        "PyTorchModelHubMixin",
+    ],
+    "inference._client": [
+        "InferenceClient",
+        "InferenceTimeoutError",
+    ],
+    "inference._generated._async_client": [
+        "AsyncInferenceClient",
+    ],
+    "inference._generated.types": [
+        "AudioClassificationInput",
+        "AudioClassificationOutputElement",
+        "AudioClassificationParameters",
+        "AudioToAudioInput",
+        "AudioToAudioOutputElement",
+        "AutomaticSpeechRecognitionGenerationParameters",
+        "AutomaticSpeechRecognitionInput",
+        "AutomaticSpeechRecognitionOutput",
+        "AutomaticSpeechRecognitionOutputChunk",
+        "AutomaticSpeechRecognitionParameters",
+        "ChatCompletionInput",
+        "ChatCompletionInputFunctionDefinition",
+        "ChatCompletionInputFunctionName",
+        "ChatCompletionInputGrammarType",
+        "ChatCompletionInputMessage",
+        "ChatCompletionInputMessageChunk",
+        "ChatCompletionInputTool",
+        "ChatCompletionInputToolTypeClass",
+        "ChatCompletionInputURL",
+        "ChatCompletionOutput",
+        "ChatCompletionOutputComplete",
+        "ChatCompletionOutputFunctionDefinition",
+        "ChatCompletionOutputLogprob",
+        "ChatCompletionOutputLogprobs",
+        "ChatCompletionOutputMessage",
+        "ChatCompletionOutputToolCall",
+        "ChatCompletionOutputTopLogprob",
+        "ChatCompletionOutputUsage",
+        "ChatCompletionStreamOutput",
+        "ChatCompletionStreamOutputChoice",
+        "ChatCompletionStreamOutputDelta",
+        "ChatCompletionStreamOutputDeltaToolCall",
+        "ChatCompletionStreamOutputFunction",
+        "ChatCompletionStreamOutputLogprob",
+        "ChatCompletionStreamOutputLogprobs",
+        "ChatCompletionStreamOutputTopLogprob",
+        "DepthEstimationInput",
+        "DepthEstimationOutput",
+        "DocumentQuestionAnsweringInput",
+        "DocumentQuestionAnsweringInputData",
+        "DocumentQuestionAnsweringOutputElement",
+        "DocumentQuestionAnsweringParameters",
+        "FeatureExtractionInput",
+        "FillMaskInput",
+        "FillMaskOutputElement",
+        "FillMaskParameters",
+        "ImageClassificationInput",
+        "ImageClassificationOutputElement",
+        "ImageClassificationParameters",
+        "ImageSegmentationInput",
+        "ImageSegmentationOutputElement",
+        "ImageSegmentationParameters",
+        "ImageToImageInput",
+        "ImageToImageOutput",
+        "ImageToImageParameters",
+        "ImageToImageTargetSize",
+        "ImageToTextGenerationParameters",
+        "ImageToTextInput",
+        "ImageToTextOutput",
+        "ImageToTextParameters",
+        "ObjectDetectionBoundingBox",
+        "ObjectDetectionInput",
+        "ObjectDetectionOutputElement",
+        "ObjectDetectionParameters",
+        "QuestionAnsweringInput",
+        "QuestionAnsweringInputData",
+        "QuestionAnsweringOutputElement",
+        "QuestionAnsweringParameters",
+        "SentenceSimilarityInput",
+        "SentenceSimilarityInputData",
+        "SummarizationGenerationParameters",
+        "SummarizationInput",
+        "SummarizationOutput",
+        "TableQuestionAnsweringInput",
+        "TableQuestionAnsweringInputData",
+        "TableQuestionAnsweringOutputElement",
+        "Text2TextGenerationInput",
+        "Text2TextGenerationOutput",
+        "Text2TextGenerationParameters",
+        "TextClassificationInput",
+        "TextClassificationOutputElement",
+        "TextClassificationParameters",
+        "TextGenerationInput",
+        "TextGenerationInputGenerateParameters",
+        "TextGenerationInputGrammarType",
+        "TextGenerationOutput",
+        "TextGenerationOutputBestOfSequence",
+        "TextGenerationOutputDetails",
+        "TextGenerationOutputPrefillToken",
+        "TextGenerationOutputToken",
+        "TextGenerationStreamOutput",
+        "TextGenerationStreamOutputStreamDetails",
+        "TextGenerationStreamOutputToken",
+        "TextToAudioGenerationParameters",
+        "TextToAudioInput",
+        "TextToAudioOutput",
+        "TextToAudioParameters",
+        "TextToImageInput",
+        "TextToImageOutput",
+        "TextToImageParameters",
+        "TextToImageTargetSize",
+        "TokenClassificationInput",
+        "TokenClassificationOutputElement",
+        "TokenClassificationParameters",
+        "TranslationGenerationParameters",
+        "TranslationInput",
+        "TranslationOutput",
+        "VideoClassificationInput",
+        "VideoClassificationOutputElement",
+        "VideoClassificationParameters",
+        "VisualQuestionAnsweringInput",
+        "VisualQuestionAnsweringInputData",
+        "VisualQuestionAnsweringOutputElement",
+        "VisualQuestionAnsweringParameters",
+        "ZeroShotClassificationInput",
+        "ZeroShotClassificationInputData",
+        "ZeroShotClassificationOutputElement",
+        "ZeroShotClassificationParameters",
+        "ZeroShotImageClassificationInput",
+        "ZeroShotImageClassificationInputData",
+        "ZeroShotImageClassificationOutputElement",
+        "ZeroShotImageClassificationParameters",
+        "ZeroShotObjectDetectionBoundingBox",
+        "ZeroShotObjectDetectionInput",
+        "ZeroShotObjectDetectionInputData",
+        "ZeroShotObjectDetectionOutputElement",
+    ],
+    "inference_api": [
+        "InferenceApi",
+    ],
+    "keras_mixin": [
+        "KerasModelHubMixin",
+        "from_pretrained_keras",
+        "push_to_hub_keras",
+        "save_pretrained_keras",
+    ],
+    "repocard": [
+        "DatasetCard",
+        "ModelCard",
+        "RepoCard",
+        "SpaceCard",
+        "metadata_eval_result",
+        "metadata_load",
+        "metadata_save",
+        "metadata_update",
+    ],
+    "repocard_data": [
+        "CardData",
+        "DatasetCardData",
+        "EvalResult",
+        "ModelCardData",
+        "SpaceCardData",
+    ],
+    "repository": [
+        "Repository",
+    ],
+    "serialization": [
+        "StateDictSplit",
+        "get_tf_storage_size",
+        "get_torch_storage_id",
+        "get_torch_storage_size",
+        "save_torch_model",
+        "save_torch_state_dict",
+        "split_state_dict_into_shards_factory",
+        "split_tf_state_dict_into_shards",
+        "split_torch_state_dict_into_shards",
+    ],
+    "utils": [
+        "CacheNotFound",
+        "CachedFileInfo",
+        "CachedRepoInfo",
+        "CachedRevisionInfo",
+        "CorruptedCacheException",
+        "DeleteCacheStrategy",
+        "HFCacheInfo",
+        "HfFolder",
+        "cached_assets_path",
+        "configure_http_backend",
+        "dump_environment_info",
+        "get_session",
+        "get_token",
+        "logging",
+        "scan_cache_dir",
+    ],
+}
+def _attach(package_name, submodules=None, submod_attrs=None):
+    """Attach lazily loaded submodules, functions, or other attributes.
+    Typically, modules import submodules and attributes as follows:
+    ```py
+    import mysubmodule
+    import anothersubmodule
+    from .foo import someattr
+    ```
+    The idea is to replace a package's `__getattr__`, `__dir__`, and
+    `__all__`, such that all imports work exactly the way they would
+    with normal imports, except that the import occurs upon first use.
+    The typical way to call this function, replacing the above imports, is:
+    ```python
+    __getattr__, __dir__, __all__ = lazy.attach(
+        __name__,
+        ['mysubmodule', 'anothersubmodule'],
+        {'foo': ['someattr']}
+    )
+    ```
+    This functionality requires Python 3.7 or higher.
+    Args:
+        package_name (`str`):
+            Typically use `__name__`.
+        submodules (`set`):
+            List of submodules to attach.
+        submod_attrs (`dict`):
+            Dictionary of submodule -> list of attributes / functions.
+            These attributes are imported as they are used.
+    Returns:
+        __getattr__, __dir__, __all__
+    """
+    if submod_attrs is None:
+        submod_attrs = {}
+    if submodules is None:
+        submodules = set()
+    else:
+        submodules = set(submodules)
+    attr_to_modules = {attr: mod for mod, attrs in submod_attrs.items() for attr in attrs}
+    __all__ = list(submodules | attr_to_modules.keys())
+    def __getattr__(name):
+        if name in submodules:
+            try:
+                return importlib.import_module(f"{package_name}.{name}")
+            except Exception as e:
+                print(f"Error importing {package_name}.{name}: {e}")
+                raise
+        elif name in attr_to_modules:
+            submod_path = f"{package_name}.{attr_to_modules[name]}"
+            try:
+                submod = importlib.import_module(submod_path)
+            except Exception as e:
+                print(f"Error importing {submod_path}: {e}")
+                raise
+            attr = getattr(submod, name)
+            # If the attribute lives in a file (module) with the same
+            # name as the attribute, ensure that the attribute and *not*
+            # the module is accessible on the package.
+            if name == attr_to_modules[name]:
+                pkg = sys.modules[package_name]
+                pkg.__dict__[name] = attr
+            return attr
+        else:
+            raise AttributeError(f"No {package_name} attribute {name}")
+    def __dir__():
+        return __all__
+    return __getattr__, __dir__, list(__all__)
+__getattr__, __dir__, __all__ = _attach(__name__, submodules=[], submod_attrs=_SUBMOD_ATTRS)
+if os.environ.get("EAGER_IMPORT", ""):
+    for attr in __all__:
+        __getattr__(attr)
+# WARNING: any content below this statement is generated automatically. Any manual edit
+# will be lost when re-generating this file !
+#
+# To update the static imports, please run the following command and commit the changes.
+# ```
+# # Use script
+# python utils/check_static_imports.py --update-file
+#
+# # Or run style on codebase
+# make style
+# ```
+if TYPE_CHECKING:  # pragma: no cover
+    from ._commit_scheduler import CommitScheduler  # noqa: F401
+    from ._inference_endpoints import (
+        InferenceEndpoint,  # noqa: F401
+        InferenceEndpointError,  # noqa: F401
+        InferenceEndpointStatus,  # noqa: F401
+        InferenceEndpointTimeoutError,  # noqa: F401
+        InferenceEndpointType,  # noqa: F401
+    )
+    from ._login import (
+        interpreter_login,  # noqa: F401
+        login,  # noqa: F401
+        logout,  # noqa: F401
+        notebook_login,  # noqa: F401
+    )
+    from ._multi_commits import (
+        MultiCommitException,  # noqa: F401
+        plan_multi_commits,  # noqa: F401
+    )
+    from ._snapshot_download import snapshot_download  # noqa: F401
+    from ._space_api import (
+        SpaceHardware,  # noqa: F401
+        SpaceRuntime,  # noqa: F401
+        SpaceStage,  # noqa: F401
+        SpaceStorage,  # noqa: F401
+        SpaceVariable,  # noqa: F401
+    )
+    from ._tensorboard_logger import HFSummaryWriter  # noqa: F401
+    from ._webhooks_payload import (
+        WebhookPayload,  # noqa: F401
+        WebhookPayloadComment,  # noqa: F401
+        WebhookPayloadDiscussion,  # noqa: F401
+        WebhookPayloadDiscussionChanges,  # noqa: F401
+        WebhookPayloadEvent,  # noqa: F401
+        WebhookPayloadMovedTo,  # noqa: F401
+        WebhookPayloadRepo,  # noqa: F401
+        WebhookPayloadUrl,  # noqa: F401
+        WebhookPayloadWebhook,  # noqa: F401
+    )
+    from ._webhooks_server import (
+        WebhooksServer,  # noqa: F401
+        webhook_endpoint,  # noqa: F401
+    )
+    from .community import (
+        Discussion,  # noqa: F401
+        DiscussionComment,  # noqa: F401
+        DiscussionCommit,  # noqa: F401
+        DiscussionEvent,  # noqa: F401
+        DiscussionStatusChange,  # noqa: F401
+        DiscussionTitleChange,  # noqa: F401
+        DiscussionWithDetails,  # noqa: F401
+    )
+    from .constants import (
+        CONFIG_NAME,  # noqa: F401
+        FLAX_WEIGHTS_NAME,  # noqa: F401
+        HUGGINGFACE_CO_URL_HOME,  # noqa: F401
+        HUGGINGFACE_CO_URL_TEMPLATE,  # noqa: F401
+        PYTORCH_WEIGHTS_NAME,  # noqa: F401
+        REPO_TYPE_DATASET,  # noqa: F401
+        REPO_TYPE_MODEL,  # noqa: F401
+        REPO_TYPE_SPACE,  # noqa: F401
+        TF2_WEIGHTS_NAME,  # noqa: F401
+        TF_WEIGHTS_NAME,  # noqa: F401
+    )
+    from .fastai_utils import (
+        _save_pretrained_fastai,  # noqa: F401
+        from_pretrained_fastai,  # noqa: F401
+        push_to_hub_fastai,  # noqa: F401
+    )
+    from .file_download import (
+        _CACHED_NO_EXIST,  # noqa: F401
+        HfFileMetadata,  # noqa: F401
+        cached_download,  # noqa: F401
+        get_hf_file_metadata,  # noqa: F401
+        hf_hub_download,  # noqa: F401
+        hf_hub_url,  # noqa: F401
+        try_to_load_from_cache,  # noqa: F401
+    )
+    from .hf_api import (
+        Collection,  # noqa: F401
+        CollectionItem,  # noqa: F401
+        CommitInfo,  # noqa: F401
+        CommitOperation,  # noqa: F401
+        CommitOperationAdd,  # noqa: F401
+        CommitOperationCopy,  # noqa: F401
+        CommitOperationDelete,  # noqa: F401
+        DatasetInfo,  # noqa: F401
+        GitCommitInfo,  # noqa: F401
+        GitRefInfo,  # noqa: F401
+        GitRefs,  # noqa: F401
+        HfApi,  # noqa: F401
+        ModelInfo,  # noqa: F401
+        RepoUrl,  # noqa: F401
+        SpaceInfo,  # noqa: F401
+        User,  # noqa: F401
+        UserLikes,  # noqa: F401
+        WebhookInfo,  # noqa: F401
+        WebhookWatchedItem,  # noqa: F401
+        accept_access_request,  # noqa: F401
+        add_collection_item,  # noqa: F401
+        add_space_secret,  # noqa: F401
+        add_space_variable,  # noqa: F401
+        auth_check,  # noqa: F401
+        cancel_access_request,  # noqa: F401
+        change_discussion_status,  # noqa: F401
+        comment_discussion,  # noqa: F401
+        create_branch,  # noqa: F401
+        create_collection,  # noqa: F401
+        create_commit,  # noqa: F401
+        create_commits_on_pr,  # noqa: F401
+        create_discussion,  # noqa: F401
+        create_inference_endpoint,  # noqa: F401
+        create_pull_request,  # noqa: F401
+        create_repo,  # noqa: F401
+        create_tag,  # noqa: F401
+        create_webhook,  # noqa: F401
+        dataset_info,  # noqa: F401
+        delete_branch,  # noqa: F401
+        delete_collection,  # noqa: F401
+        delete_collection_item,  # noqa: F401
+        delete_file,  # noqa: F401
+        delete_folder,  # noqa: F401
+        delete_inference_endpoint,  # noqa: F401
+        delete_repo,  # noqa: F401
+        delete_space_secret,  # noqa: F401
+        delete_space_storage,  # noqa: F401
+        delete_space_variable,  # noqa: F401
+        delete_tag,  # noqa: F401
+        delete_webhook,  # noqa: F401
+        disable_webhook,  # noqa: F401
+        duplicate_space,  # noqa: F401
+        edit_discussion_comment,  # noqa: F401
+        enable_webhook,  # noqa: F401
+        file_exists,  # noqa: F401
+        get_collection,  # noqa: F401
+        get_dataset_tags,  # noqa: F401
+        get_discussion_details,  # noqa: F401
+        get_full_repo_name,  # noqa: F401
+        get_inference_endpoint,  # noqa: F401
+        get_model_tags,  # noqa: F401
+        get_paths_info,  # noqa: F401
+        get_repo_discussions,  # noqa: F401
+        get_safetensors_metadata,  # noqa: F401
+        get_space_runtime,  # noqa: F401
+        get_space_variables,  # noqa: F401
+        get_token_permission,  # noqa: F401
+        get_user_overview,  # noqa: F401
+        get_webhook,  # noqa: F401
+        grant_access,  # noqa: F401
+        like,  # noqa: F401
+        list_accepted_access_requests,  # noqa: F401
+        list_collections,  # noqa: F401
+        list_datasets,  # noqa: F401
+        list_inference_endpoints,  # noqa: F401
+        list_liked_repos,  # noqa: F401
+        list_metrics,  # noqa: F401
+        list_models,  # noqa: F401
+        list_organization_members,  # noqa: F401
+        list_pending_access_requests,  # noqa: F401
+        list_rejected_access_requests,  # noqa: F401
+        list_repo_commits,  # noqa: F401
+        list_repo_files,  # noqa: F401
+        list_repo_likers,  # noqa: F401
+        list_repo_refs,  # noqa: F401
+        list_repo_tree,  # noqa: F401
+        list_spaces,  # noqa: F401
+        list_user_followers,  # noqa: F401
+        list_user_following,  # noqa: F401
+        list_webhooks,  # noqa: F401
+        merge_pull_request,  # noqa: F401
+        model_info,  # noqa: F401
+        move_repo,  # noqa: F401
+        parse_safetensors_file_metadata,  # noqa: F401
+        pause_inference_endpoint,  # noqa: F401
+        pause_space,  # noqa: F401
+        preupload_lfs_files,  # noqa: F401
+        reject_access_request,  # noqa: F401
+        rename_discussion,  # noqa: F401
+        repo_exists,  # noqa: F401
+        repo_info,  # noqa: F401
+        repo_type_and_id_from_hf_id,  # noqa: F401
+        request_space_hardware,  # noqa: F401
+        request_space_storage,  # noqa: F401
+        restart_space,  # noqa: F401
+        resume_inference_endpoint,  # noqa: F401
+        revision_exists,  # noqa: F401
+        run_as_future,  # noqa: F401
+        scale_to_zero_inference_endpoint,  # noqa: F401
+        set_space_sleep_time,  # noqa: F401
+        space_info,  # noqa: F401
+        super_squash_history,  # noqa: F401
+        unlike,  # noqa: F401
+        update_collection_item,  # noqa: F401
+        update_collection_metadata,  # noqa: F401
+        update_inference_endpoint,  # noqa: F401
+        update_repo_settings,  # noqa: F401
+        update_repo_visibility,  # noqa: F401
+        update_webhook,  # noqa: F401
+        upload_file,  # noqa: F401
+        upload_folder,  # noqa: F401
+        upload_large_folder,  # noqa: F401
+        whoami,  # noqa: F401
+    )
+    from .hf_file_system import (
+        HfFileSystem,  # noqa: F401
+        HfFileSystemFile,  # noqa: F401
+        HfFileSystemResolvedPath,  # noqa: F401
+        HfFileSystemStreamFile,  # noqa: F401
+    )
+    from .hub_mixin import (
+        ModelHubMixin,  # noqa: F401
+        PyTorchModelHubMixin,  # noqa: F401
+    )
+    from .inference._client import (
+        InferenceClient,  # noqa: F401
+        InferenceTimeoutError,  # noqa: F401
+    )
+    from .inference._generated._async_client import AsyncInferenceClient  # noqa: F401
+    from .inference._generated.types import (
+        AudioClassificationInput,  # noqa: F401
+        AudioClassificationOutputElement,  # noqa: F401
+        AudioClassificationParameters,  # noqa: F401
+        AudioToAudioInput,  # noqa: F401
+        AudioToAudioOutputElement,  # noqa: F401
+        AutomaticSpeechRecognitionGenerationParameters,  # noqa: F401
+        AutomaticSpeechRecognitionInput,  # noqa: F401
+        AutomaticSpeechRecognitionOutput,  # noqa: F401
+        AutomaticSpeechRecognitionOutputChunk,  # noqa: F401
+        AutomaticSpeechRecognitionParameters,  # noqa: F401
+        ChatCompletionInput,  # noqa: F401
+        ChatCompletionInputFunctionDefinition,  # noqa: F401
+        ChatCompletionInputFunctionName,  # noqa: F401
+        ChatCompletionInputGrammarType,  # noqa: F401
+        ChatCompletionInputMessage,  # noqa: F401
+        ChatCompletionInputMessageChunk,  # noqa: F401
+        ChatCompletionInputTool,  # noqa: F401
+        ChatCompletionInputToolTypeClass,  # noqa: F401
+        ChatCompletionInputURL,  # noqa: F401
+        ChatCompletionOutput,  # noqa: F401
+        ChatCompletionOutputComplete,  # noqa: F401
+        ChatCompletionOutputFunctionDefinition,  # noqa: F401
+        ChatCompletionOutputLogprob,  # noqa: F401
+        ChatCompletionOutputLogprobs,  # noqa: F401
+        ChatCompletionOutputMessage,  # noqa: F401
+        ChatCompletionOutputToolCall,  # noqa: F401
+        ChatCompletionOutputTopLogprob,  # noqa: F401
+        ChatCompletionOutputUsage,  # noqa: F401
+        ChatCompletionStreamOutput,  # noqa: F401
+        ChatCompletionStreamOutputChoice,  # noqa: F401
+        ChatCompletionStreamOutputDelta,  # noqa: F401
+        ChatCompletionStreamOutputDeltaToolCall,  # noqa: F401
+        ChatCompletionStreamOutputFunction,  # noqa: F401
+        ChatCompletionStreamOutputLogprob,  # noqa: F401
+        ChatCompletionStreamOutputLogprobs,  # noqa: F401
+        ChatCompletionStreamOutputTopLogprob,  # noqa: F401
+        DepthEstimationInput,  # noqa: F401
+        DepthEstimationOutput,  # noqa: F401
+        DocumentQuestionAnsweringInput,  # noqa: F401
+        DocumentQuestionAnsweringInputData,  # noqa: F401
+        DocumentQuestionAnsweringOutputElement,  # noqa: F401
+        DocumentQuestionAnsweringParameters,  # noqa: F401
+        FeatureExtractionInput,  # noqa: F401
+        FillMaskInput,  # noqa: F401
+        FillMaskOutputElement,  # noqa: F401
+        FillMaskParameters,  # noqa: F401
+        ImageClassificationInput,  # noqa: F401
+        ImageClassificationOutputElement,  # noqa: F401
+        ImageClassificationParameters,  # noqa: F401
+        ImageSegmentationInput,  # noqa: F401
+        ImageSegmentationOutputElement,  # noqa: F401
+        ImageSegmentationParameters,  # noqa: F401
+        ImageToImageInput,  # noqa: F401
+        ImageToImageOutput,  # noqa: F401
+        ImageToImageParameters,  # noqa: F401
+        ImageToImageTargetSize,  # noqa: F401
+        ImageToTextGenerationParameters,  # noqa: F401
+        ImageToTextInput,  # noqa: F401
+        ImageToTextOutput,  # noqa: F401
+        ImageToTextParameters,  # noqa: F401
+        ObjectDetectionBoundingBox,  # noqa: F401
+        ObjectDetectionInput,  # noqa: F401
+        ObjectDetectionOutputElement,  # noqa: F401
+        ObjectDetectionParameters,  # noqa: F401
+        QuestionAnsweringInput,  # noqa: F401
+        QuestionAnsweringInputData,  # noqa: F401
+        QuestionAnsweringOutputElement,  # noqa: F401
+        QuestionAnsweringParameters,  # noqa: F401
+        SentenceSimilarityInput,  # noqa: F401
+        SentenceSimilarityInputData,  # noqa: F401
+        SummarizationGenerationParameters,  # noqa: F401
+        SummarizationInput,  # noqa: F401
+        SummarizationOutput,  # noqa: F401
+        TableQuestionAnsweringInput,  # noqa: F401
+        TableQuestionAnsweringInputData,  # noqa: F401
+        TableQuestionAnsweringOutputElement,  # noqa: F401
+        Text2TextGenerationInput,  # noqa: F401
+        Text2TextGenerationOutput,  # noqa: F401
+        Text2TextGenerationParameters,  # noqa: F401
+        TextClassificationInput,  # noqa: F401
+        TextClassificationOutputElement,  # noqa: F401
+        TextClassificationParameters,  # noqa: F401
+        TextGenerationInput,  # noqa: F401
+        TextGenerationInputGenerateParameters,  # noqa: F401
+        TextGenerationInputGrammarType,  # noqa: F401
+        TextGenerationOutput,  # noqa: F401
+        TextGenerationOutputBestOfSequence,  # noqa: F401
+        TextGenerationOutputDetails,  # noqa: F401
+        TextGenerationOutputPrefillToken,  # noqa: F401
+        TextGenerationOutputToken,  # noqa: F401
+        TextGenerationStreamOutput,  # noqa: F401
+        TextGenerationStreamOutputStreamDetails,  # noqa: F401
+        TextGenerationStreamOutputToken,  # noqa: F401
+        TextToAudioGenerationParameters,  # noqa: F401
+        TextToAudioInput,  # noqa: F401
+        TextToAudioOutput,  # noqa: F401
+        TextToAudioParameters,  # noqa: F401
+        TextToImageInput,  # noqa: F401
+        TextToImageOutput,  # noqa: F401
+        TextToImageParameters,  # noqa: F401
+        TextToImageTargetSize,  # noqa: F401
+        TokenClassificationInput,  # noqa: F401
+        TokenClassificationOutputElement,  # noqa: F401
+        TokenClassificationParameters,  # noqa: F401
+        TranslationGenerationParameters,  # noqa: F401
+        TranslationInput,  # noqa: F401
+        TranslationOutput,  # noqa: F401
+        VideoClassificationInput,  # noqa: F401
+        VideoClassificationOutputElement,  # noqa: F401
+        VideoClassificationParameters,  # noqa: F401
+        VisualQuestionAnsweringInput,  # noqa: F401
+        VisualQuestionAnsweringInputData,  # noqa: F401
+        VisualQuestionAnsweringOutputElement,  # noqa: F401
+        VisualQuestionAnsweringParameters,  # noqa: F401
+        ZeroShotClassificationInput,  # noqa: F401
+        ZeroShotClassificationInputData,  # noqa: F401
+        ZeroShotClassificationOutputElement,  # noqa: F401
+        ZeroShotClassificationParameters,  # noqa: F401
+        ZeroShotImageClassificationInput,  # noqa: F401
+        ZeroShotImageClassificationInputData,  # noqa: F401
+        ZeroShotImageClassificationOutputElement,  # noqa: F401
+        ZeroShotImageClassificationParameters,  # noqa: F401
+        ZeroShotObjectDetectionBoundingBox,  # noqa: F401
+        ZeroShotObjectDetectionInput,  # noqa: F401
+        ZeroShotObjectDetectionInputData,  # noqa: F401
+        ZeroShotObjectDetectionOutputElement,  # noqa: F401
+    )
+    from .inference_api import InferenceApi  # noqa: F401
+    from .keras_mixin import (
+        KerasModelHubMixin,  # noqa: F401
+        from_pretrained_keras,  # noqa: F401
+        push_to_hub_keras,  # noqa: F401
+        save_pretrained_keras,  # noqa: F401
+    )
+    from .repocard import (
+        DatasetCard,  # noqa: F401
+        ModelCard,  # noqa: F401
+        RepoCard,  # noqa: F401
+        SpaceCard,  # noqa: F401
+        metadata_eval_result,  # noqa: F401
+        metadata_load,  # noqa: F401
+        metadata_save,  # noqa: F401
+        metadata_update,  # noqa: F401
+    )
+    from .repocard_data import (
+        CardData,  # noqa: F401
+        DatasetCardData,  # noqa: F401
+        EvalResult,  # noqa: F401
+        ModelCardData,  # noqa: F401
+        SpaceCardData,  # noqa: F401
+    )
+    from .repository import Repository  # noqa: F401
+    from .serialization import (
+        StateDictSplit,  # noqa: F401
+        get_tf_storage_size,  # noqa: F401
+        get_torch_storage_id,  # noqa: F401
+        get_torch_storage_size,  # noqa: F401
+        save_torch_model,  # noqa: F401
+        save_torch_state_dict,  # noqa: F401
+        split_state_dict_into_shards_factory,  # noqa: F401
+        split_tf_state_dict_into_shards,  # noqa: F401
+        split_torch_state_dict_into_shards,  # noqa: F401
+    )
+    from .utils import (
+        CachedFileInfo,  # noqa: F401
+        CachedRepoInfo,  # noqa: F401
+        CachedRevisionInfo,  # noqa: F401
+        CacheNotFound,  # noqa: F401
+        CorruptedCacheException,  # noqa: F401
+        DeleteCacheStrategy,  # noqa: F401
+        HFCacheInfo,  # noqa: F401
+        HfFolder,  # noqa: F401
+        cached_assets_path,  # noqa: F401
+        configure_http_backend,  # noqa: F401
+        dump_environment_info,  # noqa: F401
+        get_session,  # noqa: F401
+        get_token,  # noqa: F401
+        logging,  # noqa: F401
+        scan_cache_dir,  # noqa: F401
+    )

huggingface_hub/_commit_api.py ADDED Viewed

	@@ -0,0 +1,729 @@

+"""
+Type definitions and utilities for the `create_commit` API
+"""
+import base64
+import io
+import os
+import warnings
+from collections import defaultdict
+from contextlib import contextmanager
+from dataclasses import dataclass, field
+from itertools import groupby
+from pathlib import Path, PurePosixPath
+from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Iterable, Iterator, List, Literal, Optional, Tuple, Union
+from tqdm.contrib.concurrent import thread_map
+from . import constants
+from .errors import EntryNotFoundError
+from .file_download import hf_hub_url
+from .lfs import UploadInfo, lfs_upload, post_lfs_batch_info
+from .utils import (
+    FORBIDDEN_FOLDERS,
+    chunk_iterable,
+    get_session,
+    hf_raise_for_status,
+    logging,
+    sha,
+    tqdm_stream_file,
+    validate_hf_hub_args,
+)
+from .utils import tqdm as hf_tqdm
+if TYPE_CHECKING:
+    pass
+logger = logging.get_logger(__name__)
+UploadMode = Literal["lfs", "regular"]
+# Max is 1,000 per request on the Hub for HfApi.get_paths_info
+# Otherwise we get:
+# HfHubHTTPError: 413 Client Error: Payload Too Large for url: https://huggingface.co/api/datasets/xxx (Request ID: xxx)\n\ntoo many parameters
+# See https://github.com/huggingface/huggingface_hub/issues/1503
+FETCH_LFS_BATCH_SIZE = 500
+@dataclass
+class CommitOperationDelete:
+    """
+    Data structure holding necessary info to delete a file or a folder from a repository
+    on the Hub.
+    Args:
+        path_in_repo (`str`):
+            Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
+            for a file or `"checkpoints/1fec34a/"` for a folder.
+        is_folder (`bool` or `Literal["auto"]`, *optional*)
+            Whether the Delete Operation applies to a folder or not. If "auto", the path
+            type (file or folder) is guessed automatically by looking if path ends with
+            a "/" (folder) or not (file). To explicitly set the path type, you can set
+            `is_folder=True` or `is_folder=False`.
+    """
+    path_in_repo: str
+    is_folder: Union[bool, Literal["auto"]] = "auto"
+    def __post_init__(self):
+        self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
+        if self.is_folder == "auto":
+            self.is_folder = self.path_in_repo.endswith("/")
+        if not isinstance(self.is_folder, bool):
+            raise ValueError(
+                f"Wrong value for `is_folder`. Must be one of [`True`, `False`, `'auto'`]. Got '{self.is_folder}'."
+            )
+@dataclass
+class CommitOperationCopy:
+    """
+    Data structure holding necessary info to copy a file in a repository on the Hub.
+    Limitations:
+      - Only LFS files can be copied. To copy a regular file, you need to download it locally and re-upload it
+      - Cross-repository copies are not supported.
+    Note: you can combine a [`CommitOperationCopy`] and a [`CommitOperationDelete`] to rename an LFS file on the Hub.
+    Args:
+        src_path_in_repo (`str`):
+            Relative filepath in the repo of the file to be copied, e.g. `"checkpoints/1fec34a/weights.bin"`.
+        path_in_repo (`str`):
+            Relative filepath in the repo where to copy the file, e.g. `"checkpoints/1fec34a/weights_copy.bin"`.
+        src_revision (`str`, *optional*):
+            The git revision of the file to be copied. Can be any valid git revision.
+            Default to the target commit revision.
+    """
+    src_path_in_repo: str
+    path_in_repo: str
+    src_revision: Optional[str] = None
+    def __post_init__(self):
+        self.src_path_in_repo = _validate_path_in_repo(self.src_path_in_repo)
+        self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
+@dataclass
+class CommitOperationAdd:
+    """
+    Data structure holding necessary info to upload a file to a repository on the Hub.
+    Args:
+        path_in_repo (`str`):
+            Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
+        path_or_fileobj (`str`, `Path`, `bytes`, or `BinaryIO`):
+            Either:
+            - a path to a local file (as `str` or `pathlib.Path`) to upload
+            - a buffer of bytes (`bytes`) holding the content of the file to upload
+            - a "file object" (subclass of `io.BufferedIOBase`), typically obtained
+                with `open(path, "rb")`. It must support `seek()` and `tell()` methods.
+    Raises:
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If `path_or_fileobj` is not one of `str`, `Path`, `bytes` or `io.BufferedIOBase`.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If `path_or_fileobj` is a `str` or `Path` but not a path to an existing file.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If `path_or_fileobj` is a `io.BufferedIOBase` but it doesn't support both
+            `seek()` and `tell()`.
+    """
+    path_in_repo: str
+    path_or_fileobj: Union[str, Path, bytes, BinaryIO]
+    upload_info: UploadInfo = field(init=False, repr=False)
+    # Internal attributes
+    # set to "lfs" or "regular" once known
+    _upload_mode: Optional[UploadMode] = field(init=False, repr=False, default=None)
+    # set to True if .gitignore rules prevent the file from being uploaded as LFS
+    # (server-side check)
+    _should_ignore: Optional[bool] = field(init=False, repr=False, default=None)
+    # set to the remote OID of the file if it has already been uploaded
+    # useful to determine if a commit will be empty or not
+    _remote_oid: Optional[str] = field(init=False, repr=False, default=None)
+    # set to True once the file has been uploaded as LFS
+    _is_uploaded: bool = field(init=False, repr=False, default=False)
+    # set to True once the file has been committed
+    _is_committed: bool = field(init=False, repr=False, default=False)
+    def __post_init__(self) -> None:
+        """Validates `path_or_fileobj` and compute `upload_info`."""
+        self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
+        # Validate `path_or_fileobj` value
+        if isinstance(self.path_or_fileobj, Path):
+            self.path_or_fileobj = str(self.path_or_fileobj)
+        if isinstance(self.path_or_fileobj, str):
+            path_or_fileobj = os.path.normpath(os.path.expanduser(self.path_or_fileobj))
+            if not os.path.isfile(path_or_fileobj):
+                raise ValueError(f"Provided path: '{path_or_fileobj}' is not a file on the local file system")
+        elif not isinstance(self.path_or_fileobj, (io.BufferedIOBase, bytes)):
+            # ^^ Inspired from: https://stackoverflow.com/questions/44584829/how-to-determine-if-file-is-opened-in-binary-or-text-mode
+            raise ValueError(
+                "path_or_fileobj must be either an instance of str, bytes or"
+                " io.BufferedIOBase. If you passed a file-like object, make sure it is"
+                " in binary mode."
+            )
+        if isinstance(self.path_or_fileobj, io.BufferedIOBase):
+            try:
+                self.path_or_fileobj.tell()
+                self.path_or_fileobj.seek(0, os.SEEK_CUR)
+            except (OSError, AttributeError) as exc:
+                raise ValueError(
+                    "path_or_fileobj is a file-like object but does not implement seek() and tell()"
+                ) from exc
+        # Compute "upload_info" attribute
+        if isinstance(self.path_or_fileobj, str):
+            self.upload_info = UploadInfo.from_path(self.path_or_fileobj)
+        elif isinstance(self.path_or_fileobj, bytes):
+            self.upload_info = UploadInfo.from_bytes(self.path_or_fileobj)
+        else:
+            self.upload_info = UploadInfo.from_fileobj(self.path_or_fileobj)
+    @contextmanager
+    def as_file(self, with_tqdm: bool = False) -> Iterator[BinaryIO]:
+        """
+        A context manager that yields a file-like object allowing to read the underlying
+        data behind `path_or_fileobj`.
+        Args:
+            with_tqdm (`bool`, *optional*, defaults to `False`):
+                If True, iterating over the file object will display a progress bar. Only
+                works if the file-like object is a path to a file. Pure bytes and buffers
+                are not supported.
+        Example:
+        ```python
+        >>> operation = CommitOperationAdd(
+        ...        path_in_repo="remote/dir/weights.h5",
+        ...        path_or_fileobj="./local/weights.h5",
+        ... )
+        CommitOperationAdd(path_in_repo='remote/dir/weights.h5', path_or_fileobj='./local/weights.h5')
+        >>> with operation.as_file() as file:
+        ...     content = file.read()
+        >>> with operation.as_file(with_tqdm=True) as file:
+        ...     while True:
+        ...         data = file.read(1024)
+        ...         if not data:
+        ...              break
+        config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
+        >>> with operation.as_file(with_tqdm=True) as file:
+        ...     requests.put(..., data=file)
+        config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
+        ```
+        """
+        if isinstance(self.path_or_fileobj, str) or isinstance(self.path_or_fileobj, Path):
+            if with_tqdm:
+                with tqdm_stream_file(self.path_or_fileobj) as file:
+                    yield file
+            else:
+                with open(self.path_or_fileobj, "rb") as file:
+                    yield file
+        elif isinstance(self.path_or_fileobj, bytes):
+            yield io.BytesIO(self.path_or_fileobj)
+        elif isinstance(self.path_or_fileobj, io.BufferedIOBase):
+            prev_pos = self.path_or_fileobj.tell()
+            yield self.path_or_fileobj
+            self.path_or_fileobj.seek(prev_pos, io.SEEK_SET)
+    def b64content(self) -> bytes:
+        """
+        The base64-encoded content of `path_or_fileobj`
+        Returns: `bytes`
+        """
+        with self.as_file() as file:
+            return base64.b64encode(file.read())
+    @property
+    def _local_oid(self) -> Optional[str]:
+        """Return the OID of the local file.
+        This OID is then compared to `self._remote_oid` to check if the file has changed compared to the remote one.
+        If the file did not change, we won't upload it again to prevent empty commits.
+        For LFS files, the OID corresponds to the SHA256 of the file content (used a LFS ref).
+        For regular files, the OID corresponds to the SHA1 of the file content.
+        Note: this is slightly different to git OID computation since the oid of an LFS file is usually the git-SHA1 of the
+              pointer file content (not the actual file content). However, using the SHA256 is enough to detect changes
+              and more convenient client-side.
+        """
+        if self._upload_mode is None:
+            return None
+        elif self._upload_mode == "lfs":
+            return self.upload_info.sha256.hex()
+        else:
+            # Regular file => compute sha1
+            # => no need to read by chunk since the file is guaranteed to be <=5MB.
+            with self.as_file() as file:
+                return sha.git_hash(file.read())
+def _validate_path_in_repo(path_in_repo: str) -> str:
+    # Validate `path_in_repo` value to prevent a server-side issue
+    if path_in_repo.startswith("/"):
+        path_in_repo = path_in_repo[1:]
+    if path_in_repo == "." or path_in_repo == ".." or path_in_repo.startswith("../"):
+        raise ValueError(f"Invalid `path_in_repo` in CommitOperation: '{path_in_repo}'")
+    if path_in_repo.startswith("./"):
+        path_in_repo = path_in_repo[2:]
+    for forbidden in FORBIDDEN_FOLDERS:
+        if any(part == forbidden for part in path_in_repo.split("/")):
+            raise ValueError(
+                f"Invalid `path_in_repo` in CommitOperation: cannot update files under a '{forbidden}/' folder (path:"
+                f" '{path_in_repo}')."
+            )
+    return path_in_repo
+CommitOperation = Union[CommitOperationAdd, CommitOperationCopy, CommitOperationDelete]
+def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None:
+    """
+    Warn user when a list of operations is expected to overwrite itself in a single
+    commit.
+    Rules:
+    - If a filepath is updated by multiple `CommitOperationAdd` operations, a warning
+      message is triggered.
+    - If a filepath is updated at least once by a `CommitOperationAdd` and then deleted
+      by a `CommitOperationDelete`, a warning is triggered.
+    - If a `CommitOperationDelete` deletes a filepath that is then updated by a
+      `CommitOperationAdd`, no warning is triggered. This is usually useless (no need to
+      delete before upload) but can happen if a user deletes an entire folder and then
+      add new files to it.
+    """
+    nb_additions_per_path: Dict[str, int] = defaultdict(int)
+    for operation in operations:
+        path_in_repo = operation.path_in_repo
+        if isinstance(operation, CommitOperationAdd):
+            if nb_additions_per_path[path_in_repo] > 0:
+                warnings.warn(
+                    "About to update multiple times the same file in the same commit:"
+                    f" '{path_in_repo}'. This can cause undesired inconsistencies in"
+                    " your repo."
+                )
+            nb_additions_per_path[path_in_repo] += 1
+            for parent in PurePosixPath(path_in_repo).parents:
+                # Also keep track of number of updated files per folder
+                # => warns if deleting a folder overwrite some contained files
+                nb_additions_per_path[str(parent)] += 1
+        if isinstance(operation, CommitOperationDelete):
+            if nb_additions_per_path[str(PurePosixPath(path_in_repo))] > 0:
+                if operation.is_folder:
+                    warnings.warn(
+                        "About to delete a folder containing files that have just been"
+                        f" updated within the same commit: '{path_in_repo}'. This can"
+                        " cause undesired inconsistencies in your repo."
+                    )
+                else:
+                    warnings.warn(
+                        "About to delete a file that have just been updated within the"
+                        f" same commit: '{path_in_repo}'. This can cause undesired"
+                        " inconsistencies in your repo."
+                    )
+@validate_hf_hub_args
+def _upload_lfs_files(
+    *,
+    additions: List[CommitOperationAdd],
+    repo_type: str,
+    repo_id: str,
+    headers: Dict[str, str],
+    endpoint: Optional[str] = None,
+    num_threads: int = 5,
+    revision: Optional[str] = None,
+):
+    """
+    Uploads the content of `additions` to the Hub using the large file storage protocol.
+    Relevant external documentation:
+        - LFS Batch API: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md
+    Args:
+        additions (`List` of `CommitOperationAdd`):
+            The files to be uploaded
+        repo_type (`str`):
+            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
+        repo_id (`str`):
+            A namespace (user or an organization) and a repo name separated
+            by a `/`.
+        headers (`Dict[str, str]`):
+            Headers to use for the request, including authorization headers and user agent.
+        num_threads (`int`, *optional*):
+            The number of concurrent threads to use when uploading. Defaults to 5.
+        revision (`str`, *optional*):
+            The git revision to upload to.
+    Raises:
+        [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
+            If an upload failed for any reason
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If the server returns malformed responses
+        [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
+            If the LFS batch endpoint returned an HTTP error.
+    """
+    # Step 1: retrieve upload instructions from the LFS batch endpoint.
+    #         Upload instructions are retrieved by chunk of 256 files to avoid reaching
+    #         the payload limit.
+    batch_actions: List[Dict] = []
+    for chunk in chunk_iterable(additions, chunk_size=256):
+        batch_actions_chunk, batch_errors_chunk = post_lfs_batch_info(
+            upload_infos=[op.upload_info for op in chunk],
+            repo_id=repo_id,
+            repo_type=repo_type,
+            revision=revision,
+            endpoint=endpoint,
+            headers=headers,
+            token=None,  # already passed in 'headers'
+        )
+        # If at least 1 error, we do not retrieve information for other chunks
+        if batch_errors_chunk:
+            message = "\n".join(
+                [
+                    f'Encountered error for file with OID {err.get("oid")}: `{err.get("error", {}).get("message")}'
+                    for err in batch_errors_chunk
+                ]
+            )
+            raise ValueError(f"LFS batch endpoint returned errors:\n{message}")
+        batch_actions += batch_actions_chunk
+    oid2addop = {add_op.upload_info.sha256.hex(): add_op for add_op in additions}
+    # Step 2: ignore files that have already been uploaded
+    filtered_actions = []
+    for action in batch_actions:
+        if action.get("actions") is None:
+            logger.debug(
+                f"Content of file {oid2addop[action['oid']].path_in_repo} is already"
+                " present upstream - skipping upload."
+            )
+        else:
+            filtered_actions.append(action)
+    if len(filtered_actions) == 0:
+        logger.debug("No LFS files to upload.")
+        return
+    # Step 3: upload files concurrently according to these instructions
+    def _wrapped_lfs_upload(batch_action) -> None:
+        try:
+            operation = oid2addop[batch_action["oid"]]
+            lfs_upload(operation=operation, lfs_batch_action=batch_action, headers=headers, endpoint=endpoint)
+        except Exception as exc:
+            raise RuntimeError(f"Error while uploading '{operation.path_in_repo}' to the Hub.") from exc
+    if constants.HF_HUB_ENABLE_HF_TRANSFER:
+        logger.debug(f"Uploading {len(filtered_actions)} LFS files to the Hub using `hf_transfer`.")
+        for action in hf_tqdm(filtered_actions, name="huggingface_hub.lfs_upload"):
+            _wrapped_lfs_upload(action)
+    elif len(filtered_actions) == 1:
+        logger.debug("Uploading 1 LFS file to the Hub")
+        _wrapped_lfs_upload(filtered_actions[0])
+    else:
+        logger.debug(
+            f"Uploading {len(filtered_actions)} LFS files to the Hub using up to {num_threads} threads concurrently"
+        )
+        thread_map(
+            _wrapped_lfs_upload,
+            filtered_actions,
+            desc=f"Upload {len(filtered_actions)} LFS files",
+            max_workers=num_threads,
+            tqdm_class=hf_tqdm,
+        )
+def _validate_preupload_info(preupload_info: dict):
+    files = preupload_info.get("files")
+    if not isinstance(files, list):
+        raise ValueError("preupload_info is improperly formatted")
+    for file_info in files:
+        if not (
+            isinstance(file_info, dict)
+            and isinstance(file_info.get("path"), str)
+            and isinstance(file_info.get("uploadMode"), str)
+            and (file_info["uploadMode"] in ("lfs", "regular"))
+        ):
+            raise ValueError("preupload_info is improperly formatted:")
+    return preupload_info
+@validate_hf_hub_args
+def _fetch_upload_modes(
+    additions: Iterable[CommitOperationAdd],
+    repo_type: str,
+    repo_id: str,
+    headers: Dict[str, str],
+    revision: str,
+    endpoint: Optional[str] = None,
+    create_pr: bool = False,
+    gitignore_content: Optional[str] = None,
+) -> None:
+    """
+    Requests the Hub "preupload" endpoint to determine whether each input file should be uploaded as a regular git blob
+    or as git LFS blob. Input `additions` are mutated in-place with the upload mode.
+    Args:
+        additions (`Iterable` of :class:`CommitOperationAdd`):
+            Iterable of :class:`CommitOperationAdd` describing the files to
+            upload to the Hub.
+        repo_type (`str`):
+            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
+        repo_id (`str`):
+            A namespace (user or an organization) and a repo name separated
+            by a `/`.
+        headers (`Dict[str, str]`):
+            Headers to use for the request, including authorization headers and user agent.
+        revision (`str`):
+            The git revision to upload the files to. Can be any valid git revision.
+        gitignore_content (`str`, *optional*):
+            The content of the `.gitignore` file to know which files should be ignored. The order of priority
+            is to first check if `gitignore_content` is passed, then check if the `.gitignore` file is present
+            in the list of files to commit and finally default to the `.gitignore` file already hosted on the Hub
+            (if any).
+    Raises:
+        [`~utils.HfHubHTTPError`]
+            If the Hub API returned an error.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If the Hub API response is improperly formatted.
+    """
+    endpoint = endpoint if endpoint is not None else constants.ENDPOINT
+    # Fetch upload mode (LFS or regular) chunk by chunk.
+    upload_modes: Dict[str, UploadMode] = {}
+    should_ignore_info: Dict[str, bool] = {}
+    oid_info: Dict[str, Optional[str]] = {}
+    for chunk in chunk_iterable(additions, 256):
+        payload: Dict = {
+            "files": [
+                {
+                    "path": op.path_in_repo,
+                    "sample": base64.b64encode(op.upload_info.sample).decode("ascii"),
+                    "size": op.upload_info.size,
+                }
+                for op in chunk
+            ]
+        }
+        if gitignore_content is not None:
+            payload["gitIgnore"] = gitignore_content
+        resp = get_session().post(
+            f"{endpoint}/api/{repo_type}s/{repo_id}/preupload/{revision}",
+            json=payload,
+            headers=headers,
+            params={"create_pr": "1"} if create_pr else None,
+        )
+        hf_raise_for_status(resp)
+        preupload_info = _validate_preupload_info(resp.json())
+        upload_modes.update(**{file["path"]: file["uploadMode"] for file in preupload_info["files"]})
+        should_ignore_info.update(**{file["path"]: file["shouldIgnore"] for file in preupload_info["files"]})
+        oid_info.update(**{file["path"]: file.get("oid") for file in preupload_info["files"]})
+    # Set upload mode for each addition operation
+    for addition in additions:
+        addition._upload_mode = upload_modes[addition.path_in_repo]
+        addition._should_ignore = should_ignore_info[addition.path_in_repo]
+        addition._remote_oid = oid_info[addition.path_in_repo]
+    # Empty files cannot be uploaded as LFS (S3 would fail with a 501 Not Implemented)
+    # => empty files are uploaded as "regular" to still allow users to commit them.
+    for addition in additions:
+        if addition.upload_info.size == 0:
+            addition._upload_mode = "regular"
+@validate_hf_hub_args
+def _fetch_files_to_copy(
+    copies: Iterable[CommitOperationCopy],
+    repo_type: str,
+    repo_id: str,
+    headers: Dict[str, str],
+    revision: str,
+    endpoint: Optional[str] = None,
+) -> Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]]:
+    """
+    Fetch information about the files to copy.
+    For LFS files, we only need their metadata (file size and sha256) while for regular files
+    we need to download the raw content from the Hub.
+    Args:
+        copies (`Iterable` of :class:`CommitOperationCopy`):
+            Iterable of :class:`CommitOperationCopy` describing the files to
+            copy on the Hub.
+        repo_type (`str`):
+            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
+        repo_id (`str`):
+            A namespace (user or an organization) and a repo name separated
+            by a `/`.
+        headers (`Dict[str, str]`):
+            Headers to use for the request, including authorization headers and user agent.
+        revision (`str`):
+            The git revision to upload the files to. Can be any valid git revision.
+    Returns: `Dict[Tuple[str, Optional[str]], Union[RepoFile, bytes]]]`
+        Key is the file path and revision of the file to copy.
+        Value is the raw content as bytes (for regular files) or the file information as a RepoFile (for LFS files).
+    Raises:
+        [`~utils.HfHubHTTPError`]
+            If the Hub API returned an error.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If the Hub API response is improperly formatted.
+    """
+    from .hf_api import HfApi, RepoFolder
+    hf_api = HfApi(endpoint=endpoint, headers=headers)
+    files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]] = {}
+    for src_revision, operations in groupby(copies, key=lambda op: op.src_revision):
+        operations = list(operations)  # type: ignore
+        paths = [op.src_path_in_repo for op in operations]
+        for offset in range(0, len(paths), FETCH_LFS_BATCH_SIZE):
+            src_repo_files = hf_api.get_paths_info(
+                repo_id=repo_id,
+                paths=paths[offset : offset + FETCH_LFS_BATCH_SIZE],
+                revision=src_revision or revision,
+                repo_type=repo_type,
+            )
+            for src_repo_file in src_repo_files:
+                if isinstance(src_repo_file, RepoFolder):
+                    raise NotImplementedError("Copying a folder is not implemented.")
+                if src_repo_file.lfs:
+                    files_to_copy[(src_repo_file.path, src_revision)] = src_repo_file
+                else:
+                    # TODO: (optimization) download regular files to copy concurrently
+                    url = hf_hub_url(
+                        endpoint=endpoint,
+                        repo_type=repo_type,
+                        repo_id=repo_id,
+                        revision=src_revision or revision,
+                        filename=src_repo_file.path,
+                    )
+                    response = get_session().get(url, headers=headers)
+                    hf_raise_for_status(response)
+                    files_to_copy[(src_repo_file.path, src_revision)] = response.content
+        for operation in operations:
+            if (operation.src_path_in_repo, src_revision) not in files_to_copy:
+                raise EntryNotFoundError(
+                    f"Cannot copy {operation.src_path_in_repo} at revision "
+                    f"{src_revision or revision}: file is missing on repo."
+                )
+    return files_to_copy
+def _prepare_commit_payload(
+    operations: Iterable[CommitOperation],
+    files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]],
+    commit_message: str,
+    commit_description: Optional[str] = None,
+    parent_commit: Optional[str] = None,
+) -> Iterable[Dict[str, Any]]:
+    """
+    Builds the payload to POST to the `/commit` API of the Hub.
+    Payload is returned as an iterator so that it can be streamed as a ndjson in the
+    POST request.
+    For more information, see:
+        - https://github.com/huggingface/huggingface_hub/issues/1085#issuecomment-1265208073
+        - http://ndjson.org/
+    """
+    commit_description = commit_description if commit_description is not None else ""
+    # 1. Send a header item with the commit metadata
+    header_value = {"summary": commit_message, "description": commit_description}
+    if parent_commit is not None:
+        header_value["parentCommit"] = parent_commit
+    yield {"key": "header", "value": header_value}
+    nb_ignored_files = 0
+    # 2. Send operations, one per line
+    for operation in operations:
+        # Skip ignored files
+        if isinstance(operation, CommitOperationAdd) and operation._should_ignore:
+            logger.debug(f"Skipping file '{operation.path_in_repo}' in commit (ignored by gitignore file).")
+            nb_ignored_files += 1
+            continue
+        # 2.a. Case adding a regular file
+        if isinstance(operation, CommitOperationAdd) and operation._upload_mode == "regular":
+            yield {
+                "key": "file",
+                "value": {
+                    "content": operation.b64content().decode(),
+                    "path": operation.path_in_repo,
+                    "encoding": "base64",
+                },
+            }
+        # 2.b. Case adding an LFS file
+        elif isinstance(operation, CommitOperationAdd) and operation._upload_mode == "lfs":
+            yield {
+                "key": "lfsFile",
+                "value": {
+                    "path": operation.path_in_repo,
+                    "algo": "sha256",
+                    "oid": operation.upload_info.sha256.hex(),
+                    "size": operation.upload_info.size,
+                },
+            }
+        # 2.c. Case deleting a file or folder
+        elif isinstance(operation, CommitOperationDelete):
+            yield {
+                "key": "deletedFolder" if operation.is_folder else "deletedFile",
+                "value": {"path": operation.path_in_repo},
+            }
+        # 2.d. Case copying a file or folder
+        elif isinstance(operation, CommitOperationCopy):
+            file_to_copy = files_to_copy[(operation.src_path_in_repo, operation.src_revision)]
+            if isinstance(file_to_copy, bytes):
+                yield {
+                    "key": "file",
+                    "value": {
+                        "content": base64.b64encode(file_to_copy).decode(),
+                        "path": operation.path_in_repo,
+                        "encoding": "base64",
+                    },
+                }
+            elif file_to_copy.lfs:
+                yield {
+                    "key": "lfsFile",
+                    "value": {
+                        "path": operation.path_in_repo,
+                        "algo": "sha256",
+                        "oid": file_to_copy.lfs.sha256,
+                    },
+                }
+            else:
+                raise ValueError(
+                    "Malformed files_to_copy (should be raw file content as bytes or RepoFile objects with LFS info."
+                )
+        # 2.e. Never expected to happen
+        else:
+            raise ValueError(
+                f"Unknown operation to commit. Operation: {operation}. Upload mode:"
+                f" {getattr(operation, '_upload_mode', None)}"
+            )
+    if nb_ignored_files > 0:
+        logger.info(f"Skipped {nb_ignored_files} file(s) in commit (ignored by gitignore file).")

huggingface_hub/_commit_scheduler.py ADDED Viewed

	@@ -0,0 +1,327 @@

+import atexit
+import logging
+import os
+import time
+from concurrent.futures import Future
+from dataclasses import dataclass
+from io import SEEK_END, SEEK_SET, BytesIO
+from pathlib import Path
+from threading import Lock, Thread
+from typing import Dict, List, Optional, Union
+from .hf_api import DEFAULT_IGNORE_PATTERNS, CommitInfo, CommitOperationAdd, HfApi
+from .utils import filter_repo_objects
+logger = logging.getLogger(__name__)
+@dataclass(frozen=True)
+class _FileToUpload:
+    """Temporary dataclass to store info about files to upload. Not meant to be used directly."""
+    local_path: Path
+    path_in_repo: str
+    size_limit: int
+    last_modified: float
+class CommitScheduler:
+    """
+    Scheduler to upload a local folder to the Hub at regular intervals (e.g. push to hub every 5 minutes).
+    The scheduler is started when instantiated and run indefinitely. At the end of your script, a last commit is
+    triggered. Checkout the [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#scheduled-uploads)
+    to learn more about how to use it.
+    Args:
+        repo_id (`str`):
+            The id of the repo to commit to.
+        folder_path (`str` or `Path`):
+            Path to the local folder to upload regularly.
+        every (`int` or `float`, *optional*):
+            The number of minutes between each commit. Defaults to 5 minutes.
+        path_in_repo (`str`, *optional*):
+            Relative path of the directory in the repo, for example: `"checkpoints/"`. Defaults to the root folder
+            of the repository.
+        repo_type (`str`, *optional*):
+            The type of the repo to commit to. Defaults to `model`.
+        revision (`str`, *optional*):
+            The revision of the repo to commit to. Defaults to `main`.
+        private (`bool`, *optional*):
+            Whether to make the repo private. Defaults to `False`. This value is ignored if the repo already exist.
+        token (`str`, *optional*):
+            The token to use to commit to the repo. Defaults to the token saved on the machine.
+        allow_patterns (`List[str]` or `str`, *optional*):
+            If provided, only files matching at least one pattern are uploaded.
+        ignore_patterns (`List[str]` or `str`, *optional*):
+            If provided, files matching any of the patterns are not uploaded.
+        squash_history (`bool`, *optional*):
+            Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
+            useful to avoid degraded performances on the repo when it grows too large.
+        hf_api (`HfApi`, *optional*):
+            The [`HfApi`] client to use to commit to the Hub. Can be set with custom settings (user agent, token,...).
+    Example:
+    ```py
+    >>> from pathlib import Path
+    >>> from huggingface_hub import CommitScheduler
+    # Scheduler uploads every 10 minutes
+    >>> csv_path = Path("watched_folder/data.csv")
+    >>> CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path=csv_path.parent, every=10)
+    >>> with csv_path.open("a") as f:
+    ...     f.write("first line")
+    # Some time later (...)
+    >>> with csv_path.open("a") as f:
+    ...     f.write("second line")
+    ```
+    """
+    def __init__(
+        self,
+        *,
+        repo_id: str,
+        folder_path: Union[str, Path],
+        every: Union[int, float] = 5,
+        path_in_repo: Optional[str] = None,
+        repo_type: Optional[str] = None,
+        revision: Optional[str] = None,
+        private: bool = False,
+        token: Optional[str] = None,
+        allow_patterns: Optional[Union[List[str], str]] = None,
+        ignore_patterns: Optional[Union[List[str], str]] = None,
+        squash_history: bool = False,
+        hf_api: Optional["HfApi"] = None,
+    ) -> None:
+        self.api = hf_api or HfApi(token=token)
+        # Folder
+        self.folder_path = Path(folder_path).expanduser().resolve()
+        self.path_in_repo = path_in_repo or ""
+        self.allow_patterns = allow_patterns
+        if ignore_patterns is None:
+            ignore_patterns = []
+        elif isinstance(ignore_patterns, str):
+            ignore_patterns = [ignore_patterns]
+        self.ignore_patterns = ignore_patterns + DEFAULT_IGNORE_PATTERNS
+        if self.folder_path.is_file():
+            raise ValueError(f"'folder_path' must be a directory, not a file: '{self.folder_path}'.")
+        self.folder_path.mkdir(parents=True, exist_ok=True)
+        # Repository
+        repo_url = self.api.create_repo(repo_id=repo_id, private=private, repo_type=repo_type, exist_ok=True)
+        self.repo_id = repo_url.repo_id
+        self.repo_type = repo_type
+        self.revision = revision
+        self.token = token
+        # Keep track of already uploaded files
+        self.last_uploaded: Dict[Path, float] = {}  # key is local path, value is timestamp
+        # Scheduler
+        if not every > 0:
+            raise ValueError(f"'every' must be a positive integer, not '{every}'.")
+        self.lock = Lock()
+        self.every = every
+        self.squash_history = squash_history
+        logger.info(f"Scheduled job to push '{self.folder_path}' to '{self.repo_id}' every {self.every} minutes.")
+        self._scheduler_thread = Thread(target=self._run_scheduler, daemon=True)
+        self._scheduler_thread.start()
+        atexit.register(self._push_to_hub)
+        self.__stopped = False
+    def stop(self) -> None:
+        """Stop the scheduler.
+        A stopped scheduler cannot be restarted. Mostly for tests purposes.
+        """
+        self.__stopped = True
+    def _run_scheduler(self) -> None:
+        """Dumb thread waiting between each scheduled push to Hub."""
+        while True:
+            self.last_future = self.trigger()
+            time.sleep(self.every * 60)
+            if self.__stopped:
+                break
+    def trigger(self) -> Future:
+        """Trigger a `push_to_hub` and return a future.
+        This method is automatically called every `every` minutes. You can also call it manually to trigger a commit
+        immediately, without waiting for the next scheduled commit.
+        """
+        return self.api.run_as_future(self._push_to_hub)
+    def _push_to_hub(self) -> Optional[CommitInfo]:
+        if self.__stopped:  # If stopped, already scheduled commits are ignored
+            return None
+        logger.info("(Background) scheduled commit triggered.")
+        try:
+            value = self.push_to_hub()
+            if self.squash_history:
+                logger.info("(Background) squashing repo history.")
+                self.api.super_squash_history(repo_id=self.repo_id, repo_type=self.repo_type, branch=self.revision)
+            return value
+        except Exception as e:
+            logger.error(f"Error while pushing to Hub: {e}")  # Depending on the setup, error might be silenced
+            raise
+    def push_to_hub(self) -> Optional[CommitInfo]:
+        """
+        Push folder to the Hub and return the commit info.
+        <Tip warning={true}>
+        This method is not meant to be called directly. It is run in the background by the scheduler, respecting a
+        queue mechanism to avoid concurrent commits. Making a direct call to the method might lead to concurrency
+        issues.
+        </Tip>
+        The default behavior of `push_to_hub` is to assume an append-only folder. It lists all files in the folder and
+        uploads only changed files. If no changes are found, the method returns without committing anything. If you want
+        to change this behavior, you can inherit from [`CommitScheduler`] and override this method. This can be useful
+        for example to compress data together in a single file before committing. For more details and examples, check
+        out our [integration guide](https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#scheduled-uploads).
+        """
+        # Check files to upload (with lock)
+        with self.lock:
+            logger.debug("Listing files to upload for scheduled commit.")
+            # List files from folder (taken from `_prepare_upload_folder_additions`)
+            relpath_to_abspath = {
+                path.relative_to(self.folder_path).as_posix(): path
+                for path in sorted(self.folder_path.glob("**/*"))  # sorted to be deterministic
+                if path.is_file()
+            }
+            prefix = f"{self.path_in_repo.strip('/')}/" if self.path_in_repo else ""
+            # Filter with pattern + filter out unchanged files + retrieve current file size
+            files_to_upload: List[_FileToUpload] = []
+            for relpath in filter_repo_objects(
+                relpath_to_abspath.keys(), allow_patterns=self.allow_patterns, ignore_patterns=self.ignore_patterns
+            ):
+                local_path = relpath_to_abspath[relpath]
+                stat = local_path.stat()
+                if self.last_uploaded.get(local_path) is None or self.last_uploaded[local_path] != stat.st_mtime:
+                    files_to_upload.append(
+                        _FileToUpload(
+                            local_path=local_path,
+                            path_in_repo=prefix + relpath,
+                            size_limit=stat.st_size,
+                            last_modified=stat.st_mtime,
+                        )
+                    )
+        # Return if nothing to upload
+        if len(files_to_upload) == 0:
+            logger.debug("Dropping schedule commit: no changed file to upload.")
+            return None
+        # Convert `_FileToUpload` as `CommitOperationAdd` (=> compute file shas + limit to file size)
+        logger.debug("Removing unchanged files since previous scheduled commit.")
+        add_operations = [
+            CommitOperationAdd(
+                # Cap the file to its current size, even if the user append data to it while a scheduled commit is happening
+                path_or_fileobj=PartialFileIO(file_to_upload.local_path, size_limit=file_to_upload.size_limit),
+                path_in_repo=file_to_upload.path_in_repo,
+            )
+            for file_to_upload in files_to_upload
+        ]
+        # Upload files (append mode expected - no need for lock)
+        logger.debug("Uploading files for scheduled commit.")
+        commit_info = self.api.create_commit(
+            repo_id=self.repo_id,
+            repo_type=self.repo_type,
+            operations=add_operations,
+            commit_message="Scheduled Commit",
+            revision=self.revision,
+        )
+        # Successful commit: keep track of the latest "last_modified" for each file
+        for file in files_to_upload:
+            self.last_uploaded[file.local_path] = file.last_modified
+        return commit_info
+class PartialFileIO(BytesIO):
+    """A file-like object that reads only the first part of a file.
+    Useful to upload a file to the Hub when the user might still be appending data to it. Only the first part of the
+    file is uploaded (i.e. the part that was available when the filesystem was first scanned).
+    In practice, only used internally by the CommitScheduler to regularly push a folder to the Hub with minimal
+    disturbance for the user. The object is passed to `CommitOperationAdd`.
+    Only supports `read`, `tell` and `seek` methods.
+    Args:
+        file_path (`str` or `Path`):
+            Path to the file to read.
+        size_limit (`int`):
+            The maximum number of bytes to read from the file. If the file is larger than this, only the first part
+            will be read (and uploaded).
+    """
+    def __init__(self, file_path: Union[str, Path], size_limit: int) -> None:
+        self._file_path = Path(file_path)
+        self._file = self._file_path.open("rb")
+        self._size_limit = min(size_limit, os.fstat(self._file.fileno()).st_size)
+    def __del__(self) -> None:
+        self._file.close()
+        return super().__del__()
+    def __repr__(self) -> str:
+        return f"<PartialFileIO file_path={self._file_path} size_limit={self._size_limit}>"
+    def __len__(self) -> int:
+        return self._size_limit
+    def __getattribute__(self, name: str):
+        if name.startswith("_") or name in ("read", "tell", "seek"):  # only 3 public methods supported
+            return super().__getattribute__(name)
+        raise NotImplementedError(f"PartialFileIO does not support '{name}'.")
+    def tell(self) -> int:
+        """Return the current file position."""
+        return self._file.tell()
+    def seek(self, __offset: int, __whence: int = SEEK_SET) -> int:
+        """Change the stream position to the given offset.
+        Behavior is the same as a regular file, except that the position is capped to the size limit.
+        """
+        if __whence == SEEK_END:
+            # SEEK_END => set from the truncated end
+            __offset = len(self) + __offset
+            __whence = SEEK_SET
+        pos = self._file.seek(__offset, __whence)
+        if pos > self._size_limit:
+            return self._file.seek(self._size_limit)
+        return pos
+    def read(self, __size: Optional[int] = -1) -> bytes:
+        """Read at most `__size` bytes from the file.
+        Behavior is the same as a regular file, except that it is capped to the size limit.
+        """
+        current = self._file.tell()
+        if __size is None or __size < 0:
+            # Read until file limit
+            truncated_size = self._size_limit - current
+        else:
+            # Read until file limit or __size
+            truncated_size = min(__size, self._size_limit - current)
+        return self._file.read(truncated_size)

huggingface_hub/_inference_endpoints.py ADDED Viewed

	@@ -0,0 +1,396 @@

+import time
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from typing import TYPE_CHECKING, Dict, Optional, Union
+from huggingface_hub.errors import InferenceEndpointError, InferenceEndpointTimeoutError
+from .inference._client import InferenceClient
+from .inference._generated._async_client import AsyncInferenceClient
+from .utils import get_session, logging, parse_datetime
+if TYPE_CHECKING:
+    from .hf_api import HfApi
+logger = logging.get_logger(__name__)
+class InferenceEndpointStatus(str, Enum):
+    PENDING = "pending"
+    INITIALIZING = "initializing"
+    UPDATING = "updating"
+    UPDATE_FAILED = "updateFailed"
+    RUNNING = "running"
+    PAUSED = "paused"
+    FAILED = "failed"
+    SCALED_TO_ZERO = "scaledToZero"
+class InferenceEndpointType(str, Enum):
+    PUBlIC = "public"
+    PROTECTED = "protected"
+    PRIVATE = "private"
+@dataclass
+class InferenceEndpoint:
+    """
+    Contains information about a deployed Inference Endpoint.
+    Args:
+        name (`str`):
+            The unique name of the Inference Endpoint.
+        namespace (`str`):
+            The namespace where the Inference Endpoint is located.
+        repository (`str`):
+            The name of the model repository deployed on this Inference Endpoint.
+        status ([`InferenceEndpointStatus`]):
+            The current status of the Inference Endpoint.
+        url (`str`, *optional*):
+            The URL of the Inference Endpoint, if available. Only a deployed Inference Endpoint will have a URL.
+        framework (`str`):
+            The machine learning framework used for the model.
+        revision (`str`):
+            The specific model revision deployed on the Inference Endpoint.
+        task (`str`):
+            The task associated with the deployed model.
+        created_at (`datetime.datetime`):
+            The timestamp when the Inference Endpoint was created.
+        updated_at (`datetime.datetime`):
+            The timestamp of the last update of the Inference Endpoint.
+        type ([`InferenceEndpointType`]):
+            The type of the Inference Endpoint (public, protected, private).
+        raw (`Dict`):
+            The raw dictionary data returned from the API.
+        token (`str` or `bool`, *optional*):
+            Authentication token for the Inference Endpoint, if set when requesting the API. Will default to the
+            locally saved token if not provided. Pass `token=False` if you don't want to send your token to the server.
+    Example:
+        ```python
+        >>> from huggingface_hub import get_inference_endpoint
+        >>> endpoint = get_inference_endpoint("my-text-to-image")
+        >>> endpoint
+        InferenceEndpoint(name='my-text-to-image', ...)
+        # Get status
+        >>> endpoint.status
+        'running'
+        >>> endpoint.url
+        'https://my-text-to-image.region.vendor.endpoints.huggingface.cloud'
+        # Run inference
+        >>> endpoint.client.text_to_image(...)
+        # Pause endpoint to save $$$
+        >>> endpoint.pause()
+        # ...
+        # Resume and wait for deployment
+        >>> endpoint.resume()
+        >>> endpoint.wait()
+        >>> endpoint.client.text_to_image(...)
+        ```
+    """
+    # Field in __repr__
+    name: str = field(init=False)
+    namespace: str
+    repository: str = field(init=False)
+    status: InferenceEndpointStatus = field(init=False)
+    url: Optional[str] = field(init=False)
+    # Other fields
+    framework: str = field(repr=False, init=False)
+    revision: str = field(repr=False, init=False)
+    task: str = field(repr=False, init=False)
+    created_at: datetime = field(repr=False, init=False)
+    updated_at: datetime = field(repr=False, init=False)
+    type: InferenceEndpointType = field(repr=False, init=False)
+    # Raw dict from the API
+    raw: Dict = field(repr=False)
+    # Internal fields
+    _token: Union[str, bool, None] = field(repr=False, compare=False)
+    _api: "HfApi" = field(repr=False, compare=False)
+    @classmethod
+    def from_raw(
+        cls, raw: Dict, namespace: str, token: Union[str, bool, None] = None, api: Optional["HfApi"] = None
+    ) -> "InferenceEndpoint":
+        """Initialize object from raw dictionary."""
+        if api is None:
+            from .hf_api import HfApi
+            api = HfApi()
+        if token is None:
+            token = api.token
+        # All other fields are populated in __post_init__
+        return cls(raw=raw, namespace=namespace, _token=token, _api=api)
+    def __post_init__(self) -> None:
+        """Populate fields from raw dictionary."""
+        self._populate_from_raw()
+    @property
+    def client(self) -> InferenceClient:
+        """Returns a client to make predictions on this Inference Endpoint.
+        Returns:
+            [`InferenceClient`]: an inference client pointing to the deployed endpoint.
+        Raises:
+            [`InferenceEndpointError`]: If the Inference Endpoint is not yet deployed.
+        """
+        if self.url is None:
+            raise InferenceEndpointError(
+                "Cannot create a client for this Inference Endpoint as it is not yet deployed. "
+                "Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again."
+            )
+        return InferenceClient(model=self.url, token=self._token)
+    @property
+    def async_client(self) -> AsyncInferenceClient:
+        """Returns a client to make predictions on this Inference Endpoint.
+        Returns:
+            [`AsyncInferenceClient`]: an asyncio-compatible inference client pointing to the deployed endpoint.
+        Raises:
+            [`InferenceEndpointError`]: If the Inference Endpoint is not yet deployed.
+        """
+        if self.url is None:
+            raise InferenceEndpointError(
+                "Cannot create a client for this Inference Endpoint as it is not yet deployed. "
+                "Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again."
+            )
+        return AsyncInferenceClient(model=self.url, token=self._token)
+    def wait(self, timeout: Optional[int] = None, refresh_every: int = 5) -> "InferenceEndpoint":
+        """Wait for the Inference Endpoint to be deployed.
+        Information from the server will be fetched every 1s. If the Inference Endpoint is not deployed after `timeout`
+        seconds, a [`InferenceEndpointTimeoutError`] will be raised. The [`InferenceEndpoint`] will be mutated in place with the latest
+        data.
+        Args:
+            timeout (`int`, *optional*):
+                The maximum time to wait for the Inference Endpoint to be deployed, in seconds. If `None`, will wait
+                indefinitely.
+            refresh_every (`int`, *optional*):
+                The time to wait between each fetch of the Inference Endpoint status, in seconds. Defaults to 5s.
+        Returns:
+            [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
+        Raises:
+            [`InferenceEndpointError`]
+                If the Inference Endpoint ended up in a failed state.
+            [`InferenceEndpointTimeoutError`]
+                If the Inference Endpoint is not deployed after `timeout` seconds.
+        """
+        if timeout is not None and timeout < 0:
+            raise ValueError("`timeout` cannot be negative.")
+        if refresh_every <= 0:
+            raise ValueError("`refresh_every` must be positive.")
+        start = time.time()
+        while True:
+            if self.url is not None:
+                # Means the URL is provisioned => check if the endpoint is reachable
+                response = get_session().get(self.url, headers=self._api._build_hf_headers(token=self._token))
+                if response.status_code == 200:
+                    logger.info("Inference Endpoint is ready to be used.")
+                    return self
+            if self.status == InferenceEndpointStatus.FAILED:
+                raise InferenceEndpointError(
+                    f"Inference Endpoint {self.name} failed to deploy. Please check the logs for more information."
+                )
+            if timeout is not None:
+                if time.time() - start > timeout:
+                    raise InferenceEndpointTimeoutError("Timeout while waiting for Inference Endpoint to be deployed.")
+            logger.info(f"Inference Endpoint is not deployed yet ({self.status}). Waiting {refresh_every}s...")
+            time.sleep(refresh_every)
+            self.fetch()
+    def fetch(self) -> "InferenceEndpoint":
+        """Fetch latest information about the Inference Endpoint.
+        Returns:
+            [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
+        """
+        obj = self._api.get_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token)  # type: ignore [arg-type]
+        self.raw = obj.raw
+        self._populate_from_raw()
+        return self
+    def update(
+        self,
+        *,
+        # Compute update
+        accelerator: Optional[str] = None,
+        instance_size: Optional[str] = None,
+        instance_type: Optional[str] = None,
+        min_replica: Optional[int] = None,
+        max_replica: Optional[int] = None,
+        scale_to_zero_timeout: Optional[int] = None,
+        # Model update
+        repository: Optional[str] = None,
+        framework: Optional[str] = None,
+        revision: Optional[str] = None,
+        task: Optional[str] = None,
+        custom_image: Optional[Dict] = None,
+        secrets: Optional[Dict[str, str]] = None,
+    ) -> "InferenceEndpoint":
+        """Update the Inference Endpoint.
+        This method allows the update of either the compute configuration, the deployed model, or both. All arguments are
+        optional but at least one must be provided.
+        This is an alias for [`HfApi.update_inference_endpoint`]. The current object is mutated in place with the
+        latest data from the server.
+        Args:
+            accelerator (`str`, *optional*):
+                The hardware accelerator to be used for inference (e.g. `"cpu"`).
+            instance_size (`str`, *optional*):
+                The size or type of the instance to be used for hosting the model (e.g. `"x4"`).
+            instance_type (`str`, *optional*):
+                The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`).
+            min_replica (`int`, *optional*):
+                The minimum number of replicas (instances) to keep running for the Inference Endpoint.
+            max_replica (`int`, *optional*):
+                The maximum number of replicas (instances) to scale to for the Inference Endpoint.
+            scale_to_zero_timeout (`int`, *optional*):
+                The duration in minutes before an inactive endpoint is scaled to zero.
+            repository (`str`, *optional*):
+                The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`).
+            framework (`str`, *optional*):
+                The machine learning framework used for the model (e.g. `"custom"`).
+            revision (`str`, *optional*):
+                The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
+            task (`str`, *optional*):
+                The task on which to deploy the model (e.g. `"text-classification"`).
+            custom_image (`Dict`, *optional*):
+                A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
+                Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
+            secrets (`Dict[str, str]`, *optional*):
+                Secret values to inject in the container environment.
+        Returns:
+            [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
+        """
+        # Make API call
+        obj = self._api.update_inference_endpoint(
+            name=self.name,
+            namespace=self.namespace,
+            accelerator=accelerator,
+            instance_size=instance_size,
+            instance_type=instance_type,
+            min_replica=min_replica,
+            max_replica=max_replica,
+            scale_to_zero_timeout=scale_to_zero_timeout,
+            repository=repository,
+            framework=framework,
+            revision=revision,
+            task=task,
+            custom_image=custom_image,
+            secrets=secrets,
+            token=self._token,  # type: ignore [arg-type]
+        )
+        # Mutate current object
+        self.raw = obj.raw
+        self._populate_from_raw()
+        return self
+    def pause(self) -> "InferenceEndpoint":
+        """Pause the Inference Endpoint.
+        A paused Inference Endpoint will not be charged. It can be resumed at any time using [`InferenceEndpoint.resume`].
+        This is different than scaling the Inference Endpoint to zero with [`InferenceEndpoint.scale_to_zero`], which
+        would be automatically restarted when a request is made to it.
+        This is an alias for [`HfApi.pause_inference_endpoint`]. The current object is mutated in place with the
+        latest data from the server.
+        Returns:
+            [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
+        """
+        obj = self._api.pause_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token)  # type: ignore [arg-type]
+        self.raw = obj.raw
+        self._populate_from_raw()
+        return self
+    def resume(self, running_ok: bool = True) -> "InferenceEndpoint":
+        """Resume the Inference Endpoint.
+        This is an alias for [`HfApi.resume_inference_endpoint`]. The current object is mutated in place with the
+        latest data from the server.
+        Args:
+            running_ok (`bool`, *optional*):
+                If `True`, the method will not raise an error if the Inference Endpoint is already running. Defaults to
+                `True`.
+        Returns:
+            [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
+        """
+        obj = self._api.resume_inference_endpoint(
+            name=self.name, namespace=self.namespace, running_ok=running_ok, token=self._token
+        )  # type: ignore [arg-type]
+        self.raw = obj.raw
+        self._populate_from_raw()
+        return self
+    def scale_to_zero(self) -> "InferenceEndpoint":
+        """Scale Inference Endpoint to zero.
+        An Inference Endpoint scaled to zero will not be charged. It will be resume on the next request to it, with a
+        cold start delay. This is different than pausing the Inference Endpoint with [`InferenceEndpoint.pause`], which
+        would require a manual resume with [`InferenceEndpoint.resume`].
+        This is an alias for [`HfApi.scale_to_zero_inference_endpoint`]. The current object is mutated in place with the
+        latest data from the server.
+        Returns:
+            [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
+        """
+        obj = self._api.scale_to_zero_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token)  # type: ignore [arg-type]
+        self.raw = obj.raw
+        self._populate_from_raw()
+        return self
+    def delete(self) -> None:
+        """Delete the Inference Endpoint.
+        This operation is not reversible. If you don't want to be charged for an Inference Endpoint, it is preferable
+        to pause it with [`InferenceEndpoint.pause`] or scale it to zero with [`InferenceEndpoint.scale_to_zero`].
+        This is an alias for [`HfApi.delete_inference_endpoint`].
+        """
+        self._api.delete_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token)  # type: ignore [arg-type]
+    def _populate_from_raw(self) -> None:
+        """Populate fields from raw dictionary.
+        Called in __post_init__ + each time the Inference Endpoint is updated.
+        """
+        # Repr fields
+        self.name = self.raw["name"]
+        self.repository = self.raw["model"]["repository"]
+        self.status = self.raw["status"]["state"]
+        self.url = self.raw["status"].get("url")
+        # Other fields
+        self.framework = self.raw["model"]["framework"]
+        self.revision = self.raw["model"]["revision"]
+        self.task = self.raw["model"]["task"]
+        self.created_at = parse_datetime(self.raw["status"]["createdAt"])
+        self.updated_at = parse_datetime(self.raw["status"]["updatedAt"])
+        self.type = self.raw["type"]

huggingface_hub/_local_folder.py ADDED Viewed

	@@ -0,0 +1,425 @@

+# coding=utf-8
+# Copyright 2024-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains utilities to handle the `../.cache/huggingface` folder in local directories.
+First discussed in https://github.com/huggingface/huggingface_hub/issues/1738 to store
+download metadata when downloading files from the hub to a local directory (without
+using the cache).
+./.cache/huggingface folder structure:
+[4.0K]  data
+├── [4.0K]  .cache
+│   └── [4.0K]  huggingface
+│       └── [4.0K]  download
+│           ├── [  16]  file.parquet.metadata
+│           ├── [  16]  file.txt.metadata
+│           └── [4.0K]  folder
+│               └── [  16]  file.parquet.metadata
+│
+├── [6.5G]  file.parquet
+├── [1.5K]  file.txt
+└── [4.0K]  folder
+    └── [   16]  file.parquet
+Download metadata file structure:
+```
+# file.txt.metadata
+11c5a3d5811f50298f278a704980280950aedb10
+a16a55fda99d2f2e7b69cce5cf93ff4ad3049930
+1712656091.123
+# file.parquet.metadata
+11c5a3d5811f50298f278a704980280950aedb10
+7c5d3f4b8b76583b422fcb9189ad6c89d5d97a094541ce8932dce3ecabde1421
+1712656091.123
+}
+```
+"""
+import logging
+import os
+import time
+from dataclasses import dataclass
+from functools import lru_cache
+from pathlib import Path
+from typing import Optional
+from .utils import WeakFileLock
+logger = logging.getLogger(__name__)
+@dataclass
+class LocalDownloadFilePaths:
+    """
+    Paths to the files related to a download process in a local dir.
+    Returned by [`get_local_download_paths`].
+    Attributes:
+        file_path (`Path`):
+            Path where the file will be saved.
+        lock_path (`Path`):
+            Path to the lock file used to ensure atomicity when reading/writing metadata.
+        metadata_path (`Path`):
+            Path to the metadata file.
+    """
+    file_path: Path
+    lock_path: Path
+    metadata_path: Path
+    def incomplete_path(self, etag: str) -> Path:
+        """Return the path where a file will be temporarily downloaded before being moved to `file_path`."""
+        return self.metadata_path.with_suffix(f".{etag}.incomplete")
+@dataclass(frozen=True)
+class LocalUploadFilePaths:
+    """
+    Paths to the files related to an upload process in a local dir.
+    Returned by [`get_local_upload_paths`].
+    Attributes:
+        path_in_repo (`str`):
+            Path of the file in the repo.
+        file_path (`Path`):
+            Path where the file will be saved.
+        lock_path (`Path`):
+            Path to the lock file used to ensure atomicity when reading/writing metadata.
+        metadata_path (`Path`):
+            Path to the metadata file.
+    """
+    path_in_repo: str
+    file_path: Path
+    lock_path: Path
+    metadata_path: Path
+@dataclass
+class LocalDownloadFileMetadata:
+    """
+    Metadata about a file in the local directory related to a download process.
+    Attributes:
+        filename (`str`):
+            Path of the file in the repo.
+        commit_hash (`str`):
+            Commit hash of the file in the repo.
+        etag (`str`):
+            ETag of the file in the repo. Used to check if the file has changed.
+            For LFS files, this is the sha256 of the file. For regular files, it corresponds to the git hash.
+        timestamp (`int`):
+            Unix timestamp of when the metadata was saved i.e. when the metadata was accurate.
+    """
+    filename: str
+    commit_hash: str
+    etag: str
+    timestamp: float
+@dataclass
+class LocalUploadFileMetadata:
+    """
+    Metadata about a file in the local directory related to an upload process.
+    """
+    size: int
+    # Default values correspond to "we don't know yet"
+    timestamp: Optional[float] = None
+    should_ignore: Optional[bool] = None
+    sha256: Optional[str] = None
+    upload_mode: Optional[str] = None
+    is_uploaded: bool = False
+    is_committed: bool = False
+    def save(self, paths: LocalUploadFilePaths) -> None:
+        """Save the metadata to disk."""
+        with WeakFileLock(paths.lock_path):
+            with paths.metadata_path.open("w") as f:
+                new_timestamp = time.time()
+                f.write(str(new_timestamp) + "\n")
+                f.write(str(self.size))  # never None
+                f.write("\n")
+                if self.should_ignore is not None:
+                    f.write(str(int(self.should_ignore)))
+                f.write("\n")
+                if self.sha256 is not None:
+                    f.write(self.sha256)
+                f.write("\n")
+                if self.upload_mode is not None:
+                    f.write(self.upload_mode)
+                f.write("\n")
+                f.write(str(int(self.is_uploaded)) + "\n")
+                f.write(str(int(self.is_committed)) + "\n")
+            self.timestamp = new_timestamp
+@lru_cache(maxsize=128)  # ensure singleton
+def get_local_download_paths(local_dir: Path, filename: str) -> LocalDownloadFilePaths:
+    """Compute paths to the files related to a download process.
+    Folders containing the paths are all guaranteed to exist.
+    Args:
+        local_dir (`Path`):
+            Path to the local directory in which files are downloaded.
+        filename (`str`):
+            Path of the file in the repo.
+    Return:
+        [`LocalDownloadFilePaths`]: the paths to the files (file_path, lock_path, metadata_path, incomplete_path).
+    """
+    # filename is the path in the Hub repository (separated by '/')
+    # make sure to have a cross platform transcription
+    sanitized_filename = os.path.join(*filename.split("/"))
+    if os.name == "nt":
+        if sanitized_filename.startswith("..\\") or "\\..\\" in sanitized_filename:
+            raise ValueError(
+                f"Invalid filename: cannot handle filename '{sanitized_filename}' on Windows. Please ask the repository"
+                " owner to rename this file."
+            )
+    file_path = local_dir / sanitized_filename
+    metadata_path = _huggingface_dir(local_dir) / "download" / f"{sanitized_filename}.metadata"
+    lock_path = metadata_path.with_suffix(".lock")
+    # Some Windows versions do not allow for paths longer than 255 characters.
+    # In this case, we must specify it as an extended path by using the "\\?\" prefix
+    if os.name == "nt":
+        if not str(local_dir).startswith("\\\\?\\") and len(os.path.abspath(lock_path)) > 255:
+            file_path = Path("\\\\?\\" + os.path.abspath(file_path))
+            lock_path = Path("\\\\?\\" + os.path.abspath(lock_path))
+            metadata_path = Path("\\\\?\\" + os.path.abspath(metadata_path))
+    file_path.parent.mkdir(parents=True, exist_ok=True)
+    metadata_path.parent.mkdir(parents=True, exist_ok=True)
+    return LocalDownloadFilePaths(file_path=file_path, lock_path=lock_path, metadata_path=metadata_path)
+@lru_cache(maxsize=128)  # ensure singleton
+def get_local_upload_paths(local_dir: Path, filename: str) -> LocalUploadFilePaths:
+    """Compute paths to the files related to an upload process.
+    Folders containing the paths are all guaranteed to exist.
+    Args:
+        local_dir (`Path`):
+            Path to the local directory that is uploaded.
+        filename (`str`):
+            Path of the file in the repo.
+    Return:
+        [`LocalUploadFilePaths`]: the paths to the files (file_path, lock_path, metadata_path).
+    """
+    # filename is the path in the Hub repository (separated by '/')
+    # make sure to have a cross platform transcription
+    sanitized_filename = os.path.join(*filename.split("/"))
+    if os.name == "nt":
+        if sanitized_filename.startswith("..\\") or "\\..\\" in sanitized_filename:
+            raise ValueError(
+                f"Invalid filename: cannot handle filename '{sanitized_filename}' on Windows. Please ask the repository"
+                " owner to rename this file."
+            )
+    file_path = local_dir / sanitized_filename
+    metadata_path = _huggingface_dir(local_dir) / "upload" / f"{sanitized_filename}.metadata"
+    lock_path = metadata_path.with_suffix(".lock")
+    # Some Windows versions do not allow for paths longer than 255 characters.
+    # In this case, we must specify it as an extended path by using the "\\?\" prefix
+    if os.name == "nt":
+        if not str(local_dir).startswith("\\\\?\\") and len(os.path.abspath(lock_path)) > 255:
+            file_path = Path("\\\\?\\" + os.path.abspath(file_path))
+            lock_path = Path("\\\\?\\" + os.path.abspath(lock_path))
+            metadata_path = Path("\\\\?\\" + os.path.abspath(metadata_path))
+    file_path.parent.mkdir(parents=True, exist_ok=True)
+    metadata_path.parent.mkdir(parents=True, exist_ok=True)
+    return LocalUploadFilePaths(
+        path_in_repo=filename, file_path=file_path, lock_path=lock_path, metadata_path=metadata_path
+    )
+def read_download_metadata(local_dir: Path, filename: str) -> Optional[LocalDownloadFileMetadata]:
+    """Read metadata about a file in the local directory related to a download process.
+    Args:
+        local_dir (`Path`):
+            Path to the local directory in which files are downloaded.
+        filename (`str`):
+            Path of the file in the repo.
+    Return:
+        `[LocalDownloadFileMetadata]` or `None`: the metadata if it exists, `None` otherwise.
+    """
+    paths = get_local_download_paths(local_dir, filename)
+    with WeakFileLock(paths.lock_path):
+        if paths.metadata_path.exists():
+            try:
+                with paths.metadata_path.open() as f:
+                    commit_hash = f.readline().strip()
+                    etag = f.readline().strip()
+                    timestamp = float(f.readline().strip())
+                    metadata = LocalDownloadFileMetadata(
+                        filename=filename,
+                        commit_hash=commit_hash,
+                        etag=etag,
+                        timestamp=timestamp,
+                    )
+            except Exception as e:
+                # remove the metadata file if it is corrupted / not the right format
+                logger.warning(
+                    f"Invalid metadata file {paths.metadata_path}: {e}. Removing it from disk and continue."
+                )
+                try:
+                    paths.metadata_path.unlink()
+                except Exception as e:
+                    logger.warning(f"Could not remove corrupted metadata file {paths.metadata_path}: {e}")
+            try:
+                # check if the file exists and hasn't been modified since the metadata was saved
+                stat = paths.file_path.stat()
+                if (
+                    stat.st_mtime - 1 <= metadata.timestamp
+                ):  # allow 1s difference as stat.st_mtime might not be precise
+                    return metadata
+                logger.info(f"Ignored metadata for '{filename}' (outdated). Will re-compute hash.")
+            except FileNotFoundError:
+                # file does not exist => metadata is outdated
+                return None
+    return None
+def read_upload_metadata(local_dir: Path, filename: str) -> LocalUploadFileMetadata:
+    """Read metadata about a file in the local directory related to an upload process.
+    TODO: factorize logic with `read_download_metadata`.
+    Args:
+        local_dir (`Path`):
+            Path to the local directory in which files are downloaded.
+        filename (`str`):
+            Path of the file in the repo.
+    Return:
+        `[LocalUploadFileMetadata]` or `None`: the metadata if it exists, `None` otherwise.
+    """
+    paths = get_local_upload_paths(local_dir, filename)
+    with WeakFileLock(paths.lock_path):
+        if paths.metadata_path.exists():
+            try:
+                with paths.metadata_path.open() as f:
+                    timestamp = float(f.readline().strip())
+                    size = int(f.readline().strip())  # never None
+                    _should_ignore = f.readline().strip()
+                    should_ignore = None if _should_ignore == "" else bool(int(_should_ignore))
+                    _sha256 = f.readline().strip()
+                    sha256 = None if _sha256 == "" else _sha256
+                    _upload_mode = f.readline().strip()
+                    upload_mode = None if _upload_mode == "" else _upload_mode
+                    if upload_mode not in (None, "regular", "lfs"):
+                        raise ValueError(f"Invalid upload mode in metadata {paths.path_in_repo}: {upload_mode}")
+                    is_uploaded = bool(int(f.readline().strip()))
+                    is_committed = bool(int(f.readline().strip()))
+                    metadata = LocalUploadFileMetadata(
+                        timestamp=timestamp,
+                        size=size,
+                        should_ignore=should_ignore,
+                        sha256=sha256,
+                        upload_mode=upload_mode,
+                        is_uploaded=is_uploaded,
+                        is_committed=is_committed,
+                    )
+            except Exception as e:
+                # remove the metadata file if it is corrupted / not the right format
+                logger.warning(
+                    f"Invalid metadata file {paths.metadata_path}: {e}. Removing it from disk and continue."
+                )
+                try:
+                    paths.metadata_path.unlink()
+                except Exception as e:
+                    logger.warning(f"Could not remove corrupted metadata file {paths.metadata_path}: {e}")
+            # TODO: can we do better?
+            if (
+                metadata.timestamp is not None
+                and metadata.is_uploaded  # file was uploaded
+                and not metadata.is_committed  # but not committed
+                and time.time() - metadata.timestamp > 20 * 3600  # and it's been more than 20 hours
+            ):  # => we consider it as garbage-collected by S3
+                metadata.is_uploaded = False
+            # check if the file exists and hasn't been modified since the metadata was saved
+            try:
+                if metadata.timestamp is not None and paths.file_path.stat().st_mtime <= metadata.timestamp:
+                    return metadata
+                logger.info(f"Ignored metadata for '{filename}' (outdated). Will re-compute hash.")
+            except FileNotFoundError:
+                # file does not exist => metadata is outdated
+                pass
+    # empty metadata => we don't know anything expect its size
+    return LocalUploadFileMetadata(size=paths.file_path.stat().st_size)
+def write_download_metadata(local_dir: Path, filename: str, commit_hash: str, etag: str) -> None:
+    """Write metadata about a file in the local directory related to a download process.
+    Args:
+        local_dir (`Path`):
+            Path to the local directory in which files are downloaded.
+    """
+    paths = get_local_download_paths(local_dir, filename)
+    with WeakFileLock(paths.lock_path):
+        with paths.metadata_path.open("w") as f:
+            f.write(f"{commit_hash}\n{etag}\n{time.time()}\n")
+@lru_cache()
+def _huggingface_dir(local_dir: Path) -> Path:
+    """Return the path to the `.cache/huggingface` directory in a local directory."""
+    # Wrap in lru_cache to avoid overwriting the .gitignore file if called multiple times
+    path = local_dir / ".cache" / "huggingface"
+    path.mkdir(exist_ok=True, parents=True)
+    # Create a .gitignore file in the .cache/huggingface directory if it doesn't exist
+    # Should be thread-safe enough like this.
+    gitignore = path / ".gitignore"
+    gitignore_lock = path / ".gitignore.lock"
+    if not gitignore.exists():
+        try:
+            with WeakFileLock(gitignore_lock):
+                gitignore.write_text("*")
+            gitignore_lock.unlink()
+        except OSError:  # FileNotFoundError, PermissionError, etc.
+            pass
+    return path

huggingface_hub/_login.py ADDED Viewed

	@@ -0,0 +1,397 @@

+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains methods to log in to the Hub."""
+import os
+import subprocess
+from functools import partial
+from getpass import getpass
+from pathlib import Path
+from typing import Optional
+from . import constants
+from .commands._cli_utils import ANSI
+from .utils import (
+    capture_output,
+    get_token,
+    is_google_colab,
+    is_notebook,
+    list_credential_helpers,
+    logging,
+    run_subprocess,
+    set_git_credential,
+    unset_git_credential,
+)
+from .utils._token import _get_token_from_environment, _get_token_from_google_colab
+logger = logging.get_logger(__name__)
+_HF_LOGO_ASCII = """
+    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
+    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
+    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
+    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
+    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
+"""
+def login(
+    token: Optional[str] = None,
+    add_to_git_credential: bool = False,
+    new_session: bool = True,
+    write_permission: bool = False,
+) -> None:
+    """Login the machine to access the Hub.
+    The `token` is persisted in cache and set as a git credential. Once done, the machine
+    is logged in and the access token will be available across all `huggingface_hub`
+    components. If `token` is not provided, it will be prompted to the user either with
+    a widget (in a notebook) or via the terminal.
+    To log in from outside of a script, one can also use `huggingface-cli login` which is
+    a cli command that wraps [`login`].
+    <Tip>
+    [`login`] is a drop-in replacement method for [`notebook_login`] as it wraps and
+    extends its capabilities.
+    </Tip>
+    <Tip>
+    When the token is not passed, [`login`] will automatically detect if the script runs
+    in a notebook or not. However, this detection might not be accurate due to the
+    variety of notebooks that exists nowadays. If that is the case, you can always force
+    the UI by using [`notebook_login`] or [`interpreter_login`].
+    </Tip>
+    Args:
+        token (`str`, *optional*):
+            User access token to generate from https://huggingface.co/settings/token.
+        add_to_git_credential (`bool`, defaults to `False`):
+            If `True`, token will be set as git credential. If no git credential helper
+            is configured, a warning will be displayed to the user. If `token` is `None`,
+            the value of `add_to_git_credential` is ignored and will be prompted again
+            to the end user.
+        new_session (`bool`, defaults to `True`):
+            If `True`, will request a token even if one is already saved on the machine.
+        write_permission (`bool`, defaults to `False`):
+            If `True`, requires a token with write permission.
+    Raises:
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If an organization token is passed. Only personal account tokens are valid
+            to log in.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If token is invalid.
+        [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError)
+            If running in a notebook but `ipywidgets` is not installed.
+    """
+    if token is not None:
+        if not add_to_git_credential:
+            print(
+                "The token has not been saved to the git credentials helper. Pass "
+                "`add_to_git_credential=True` in this function directly or "
+                "`--add-to-git-credential` if using via `huggingface-cli` if "
+                "you want to set the git credential as well."
+            )
+        _login(token, add_to_git_credential=add_to_git_credential, write_permission=write_permission)
+    elif is_notebook():
+        notebook_login(new_session=new_session, write_permission=write_permission)
+    else:
+        interpreter_login(new_session=new_session, write_permission=write_permission)
+def logout() -> None:
+    """Logout the machine from the Hub.
+    Token is deleted from the machine and removed from git credential.
+    """
+    if get_token() is None:
+        print("Not logged in!")
+        return
+    # Delete token from git credentials
+    unset_git_credential()
+    # Delete token file
+    try:
+        Path(constants.HF_TOKEN_PATH).unlink()
+    except FileNotFoundError:
+        pass
+    # Check if still logged in
+    if _get_token_from_google_colab() is not None:
+        raise EnvironmentError(
+            "You are automatically logged in using a Google Colab secret.\n"
+            "To log out, you must unset the `HF_TOKEN` secret in your Colab settings."
+        )
+    if _get_token_from_environment() is not None:
+        raise EnvironmentError(
+            "Token has been deleted from your machine but you are still logged in.\n"
+            "To log out, you must clear out both `HF_TOKEN` and `HUGGING_FACE_HUB_TOKEN` environment variables."
+        )
+    print("Successfully logged out.")
+###
+# Interpreter-based login (text)
+###
+def interpreter_login(new_session: bool = True, write_permission: bool = False) -> None:
+    """
+    Displays a prompt to log in to the HF website and store the token.
+    This is equivalent to [`login`] without passing a token when not run in a notebook.
+    [`interpreter_login`] is useful if you want to force the use of the terminal prompt
+    instead of a notebook widget.
+    For more details, see [`login`].
+    Args:
+        new_session (`bool`, defaults to `True`):
+            If `True`, will request a token even if one is already saved on the machine.
+        write_permission (`bool`, defaults to `False`):
+            If `True`, requires a token with write permission.
+    """
+    if not new_session and _current_token_okay(write_permission=write_permission):
+        print("User is already logged in.")
+        return
+    from .commands.delete_cache import _ask_for_confirmation_no_tui
+    print(_HF_LOGO_ASCII)
+    if get_token() is not None:
+        print(
+            "    A token is already saved on your machine. Run `huggingface-cli"
+            " whoami` to get more information or `huggingface-cli logout` if you want"
+            " to log out."
+        )
+        print("    Setting a new token will erase the existing one.")
+    print("    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .")
+    if os.name == "nt":
+        print("Token can be pasted using 'Right-Click'.")
+    token = getpass("Enter your token (input will not be visible): ")
+    add_to_git_credential = _ask_for_confirmation_no_tui("Add token as git credential?")
+    _login(token=token, add_to_git_credential=add_to_git_credential, write_permission=write_permission)
+###
+# Notebook-based login (widget)
+###
+NOTEBOOK_LOGIN_PASSWORD_HTML = """<center> <img
+src=https://huggingface.co/front/assets/huggingface_logo-noborder.svg
+alt='Hugging Face'> <br> Immediately click login after typing your password or
+it might be stored in plain text in this notebook file. </center>"""
+NOTEBOOK_LOGIN_TOKEN_HTML_START = """<center> <img
+src=https://huggingface.co/front/assets/huggingface_logo-noborder.svg
+alt='Hugging Face'> <br> Copy a token from <a
+href="https://huggingface.co/settings/tokens" target="_blank">your Hugging Face
+tokens page</a> and paste it below. <br> Immediately click login after copying
+your token or it might be stored in plain text in this notebook file. </center>"""
+NOTEBOOK_LOGIN_TOKEN_HTML_END = """
+<b>Pro Tip:</b> If you don't already have one, you can create a dedicated
+'notebooks' token with 'write' access, that you can then easily reuse for all
+notebooks. </center>"""
+def notebook_login(new_session: bool = True, write_permission: bool = False) -> None:
+    """
+    Displays a widget to log in to the HF website and store the token.
+    This is equivalent to [`login`] without passing a token when run in a notebook.
+    [`notebook_login`] is useful if you want to force the use of the notebook widget
+    instead of a prompt in the terminal.
+    For more details, see [`login`].
+    Args:
+        new_session (`bool`, defaults to `True`):
+            If `True`, will request a token even if one is already saved on the machine.
+        write_permission (`bool`, defaults to `False`):
+            If `True`, requires a token with write permission.
+    """
+    try:
+        import ipywidgets.widgets as widgets  # type: ignore
+        from IPython.display import display  # type: ignore
+    except ImportError:
+        raise ImportError(
+            "The `notebook_login` function can only be used in a notebook (Jupyter or"
+            " Colab) and you need the `ipywidgets` module: `pip install ipywidgets`."
+        )
+    if not new_session and _current_token_okay(write_permission=write_permission):
+        print("User is already logged in.")
+        return
+    box_layout = widgets.Layout(display="flex", flex_flow="column", align_items="center", width="50%")
+    token_widget = widgets.Password(description="Token:")
+    git_checkbox_widget = widgets.Checkbox(value=True, description="Add token as git credential?")
+    token_finish_button = widgets.Button(description="Login")
+    login_token_widget = widgets.VBox(
+        [
+            widgets.HTML(NOTEBOOK_LOGIN_TOKEN_HTML_START),
+            token_widget,
+            git_checkbox_widget,
+            token_finish_button,
+            widgets.HTML(NOTEBOOK_LOGIN_TOKEN_HTML_END),
+        ],
+        layout=box_layout,
+    )
+    display(login_token_widget)
+    # On click events
+    def login_token_event(t, write_permission: bool = False):
+        """
+        Event handler for the login button.
+        Args:
+            write_permission (`bool`, defaults to `False`):
+                If `True`, requires a token with write permission.
+        """
+        token = token_widget.value
+        add_to_git_credential = git_checkbox_widget.value
+        # Erase token and clear value to make sure it's not saved in the notebook.
+        token_widget.value = ""
+        # Hide inputs
+        login_token_widget.children = [widgets.Label("Connecting...")]
+        try:
+            with capture_output() as captured:
+                _login(token, add_to_git_credential=add_to_git_credential, write_permission=write_permission)
+            message = captured.getvalue()
+        except Exception as error:
+            message = str(error)
+        # Print result (success message or error)
+        login_token_widget.children = [widgets.Label(line) for line in message.split("\n") if line.strip()]
+    token_finish_button.on_click(partial(login_token_event, write_permission=write_permission))
+###
+# Login private helpers
+###
+def _login(token: str, add_to_git_credential: bool, write_permission: bool = False) -> None:
+    from .hf_api import get_token_permission  # avoid circular import
+    if token.startswith("api_org"):
+        raise ValueError("You must use your personal account token, not an organization token.")
+    permission = get_token_permission(token)
+    if permission is None:
+        raise ValueError("Invalid token passed!")
+    elif write_permission and permission != "write":
+        raise ValueError(
+            "Token is valid but is 'read-only' and a 'write' token is required.\nPlease provide a new token with"
+            " correct permission."
+        )
+    print(f"Token is valid (permission: {permission}).")
+    if add_to_git_credential:
+        if _is_git_credential_helper_configured():
+            set_git_credential(token)
+            print(
+                "Your token has been saved in your configured git credential helpers"
+                + f" ({','.join(list_credential_helpers())})."
+            )
+        else:
+            print("Token has not been saved to git credential helper.")
+    # Save token
+    path = Path(constants.HF_TOKEN_PATH)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(token)
+    print(f"Your token has been saved to {constants.HF_TOKEN_PATH}")
+    print("Login successful")
+def _current_token_okay(write_permission: bool = False):
+    """Check if the current token is valid.
+    Args:
+        write_permission (`bool`, defaults to `False`):
+            If `True`, requires a token with write permission.
+    Returns:
+        `bool`: `True` if the current token is valid, `False` otherwise.
+    """
+    from .hf_api import get_token_permission  # avoid circular import
+    permission = get_token_permission()
+    if permission is None or (write_permission and permission != "write"):
+        return False
+    return True
+def _is_git_credential_helper_configured() -> bool:
+    """Check if a git credential helper is configured.
+    Warns user if not the case (except for Google Colab where "store" is set by default
+    by `huggingface_hub`).
+    """
+    helpers = list_credential_helpers()
+    if len(helpers) > 0:
+        return True  # Do not warn: at least 1 helper is set
+    # Only in Google Colab to avoid the warning message
+    # See https://github.com/huggingface/huggingface_hub/issues/1043#issuecomment-1247010710
+    if is_google_colab():
+        _set_store_as_git_credential_helper_globally()
+        return True  # Do not warn: "store" is used by default in Google Colab
+    # Otherwise, warn user
+    print(
+        ANSI.red(
+            "Cannot authenticate through git-credential as no helper is defined on your"
+            " machine.\nYou might have to re-authenticate when pushing to the Hugging"
+            " Face Hub.\nRun the following command in your terminal in case you want to"
+            " set the 'store' credential helper as default.\n\ngit config --global"
+            " credential.helper store\n\nRead"
+            " https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more"
+            " details."
+        )
+    )
+    return False
+def _set_store_as_git_credential_helper_globally() -> None:
+    """Set globally the credential.helper to `store`.
+    To be used only in Google Colab as we assume the user doesn't care about the git
+    credential config. It is the only particular case where we don't want to display the
+    warning message in [`notebook_login()`].
+    Related:
+    - https://github.com/huggingface/huggingface_hub/issues/1043
+    - https://github.com/huggingface/huggingface_hub/issues/1051
+    - https://git-scm.com/docs/git-credential-store
+    """
+    try:
+        run_subprocess("git config --global credential.helper store")
+    except subprocess.CalledProcessError as exc:
+        raise EnvironmentError(exc.stderr)

huggingface_hub/_multi_commits.py ADDED Viewed

	@@ -0,0 +1,306 @@

+# coding=utf-8
+# Copyright 2023-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains utilities to multi-commits (i.e. push changes iteratively on a PR)."""
+import re
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple, Union
+from ._commit_api import CommitOperationAdd, CommitOperationDelete
+from .community import DiscussionWithDetails
+from .utils import experimental
+from .utils._cache_manager import _format_size
+from .utils.insecure_hashlib import sha256
+if TYPE_CHECKING:
+    from .hf_api import HfApi
+class MultiCommitException(Exception):
+    """Base exception for any exception happening while doing a multi-commit."""
+MULTI_COMMIT_PR_DESCRIPTION_TEMPLATE = """
+## {commit_message}
+{commit_description}
+**Multi commit ID:** {multi_commit_id}
+Scheduled commits:
+{multi_commit_strategy}
+_This is a PR opened using the `huggingface_hub` library in the context of a multi-commit. PR can be commented as a usual PR. However, please be aware that manually updating the PR description, changing the PR status, or pushing new commits, is not recommended as it might corrupt the commit process. Learn more about multi-commits [in this guide](https://huggingface.co/docs/huggingface_hub/main/guides/upload)._
+"""
+MULTI_COMMIT_PR_COMPLETION_COMMENT_TEMPLATE = """
+Multi-commit is now completed! You can ping the repo owner to review the changes. This PR can now be commented or modified without risking to corrupt it.
+_This is a comment posted using the `huggingface_hub` library in the context of a multi-commit. Learn more about multi-commits [in this guide](https://huggingface.co/docs/huggingface_hub/main/guides/upload)._
+"""
+MULTI_COMMIT_PR_CLOSING_COMMENT_TEMPLATE = """
+`create_pr=False` has been passed so PR is automatically merged.
+_This is a comment posted using the `huggingface_hub` library in the context of a multi-commit. Learn more about multi-commits [in this guide](https://huggingface.co/docs/huggingface_hub/main/guides/upload)._
+"""
+MULTI_COMMIT_PR_CLOSE_COMMENT_FAILURE_NO_CHANGES_TEMPLATE = """
+Cannot merge Pull Requests as no changes are associated. This PR will be closed automatically.
+_This is a comment posted using the `huggingface_hub` library in the context of a multi-commit. Learn more about multi-commits [in this guide](https://huggingface.co/docs/huggingface_hub/main/guides/upload)._
+"""
+MULTI_COMMIT_PR_CLOSE_COMMENT_FAILURE_BAD_REQUEST_TEMPLATE = """
+An error occurred while trying to merge the Pull Request: `{error_message}`.
+_This is a comment posted using the `huggingface_hub` library in the context of a multi-commit. Learn more about multi-commits [in this guide](https://huggingface.co/docs/huggingface_hub/main/guides/upload)._
+"""
+STEP_ID_REGEX = re.compile(r"- \[(?P<completed>[ |x])\].*(?P<step_id>[a-fA-F0-9]{64})", flags=re.MULTILINE)
+@experimental
+def plan_multi_commits(
+    operations: Iterable[Union[CommitOperationAdd, CommitOperationDelete]],
+    max_operations_per_commit: int = 50,
+    max_upload_size_per_commit: int = 2 * 1024 * 1024 * 1024,
+) -> Tuple[List[List[CommitOperationAdd]], List[List[CommitOperationDelete]]]:
+    """Split a list of operations in a list of commits to perform.
+    Implementation follows a sub-optimal (yet simple) algorithm:
+    1. Delete operations are grouped together by commits of maximum `max_operations_per_commits` operations.
+    2. All additions exceeding `max_upload_size_per_commit` are committed 1 by 1.
+    3. All remaining additions are grouped together and split each time the `max_operations_per_commit` or the
+       `max_upload_size_per_commit` limit is reached.
+    We do not try to optimize the splitting to get the lowest number of commits as this is a NP-hard problem (see
+    [bin packing problem](https://en.wikipedia.org/wiki/Bin_packing_problem)). For our use case, it is not problematic
+    to use a sub-optimal solution so we favored an easy-to-explain implementation.
+    Args:
+        operations (`List` of [`~hf_api.CommitOperation`]):
+            The list of operations to split into commits.
+        max_operations_per_commit (`int`):
+            Maximum number of operations in a single commit. Defaults to 50.
+        max_upload_size_per_commit (`int`):
+            Maximum size to upload (in bytes) in a single commit. Defaults to 2GB. Files bigger than this limit are
+            uploaded, 1 per commit.
+    Returns:
+        `Tuple[List[List[CommitOperationAdd]], List[List[CommitOperationDelete]]]`: a tuple. First item is a list of
+        lists of [`CommitOperationAdd`] representing the addition commits to push. The second item is a list of lists
+        of [`CommitOperationDelete`] representing the deletion commits.
+    <Tip warning={true}>
+    `plan_multi_commits` is experimental. Its API and behavior is subject to change in the future without prior notice.
+    </Tip>
+    Example:
+    ```python
+    >>> from huggingface_hub import HfApi, plan_multi_commits
+    >>> addition_commits, deletion_commits = plan_multi_commits(
+    ...     operations=[
+    ...          CommitOperationAdd(...),
+    ...          CommitOperationAdd(...),
+    ...          CommitOperationDelete(...),
+    ...          CommitOperationDelete(...),
+    ...          CommitOperationAdd(...),
+    ...     ],
+    ... )
+    >>> HfApi().create_commits_on_pr(
+    ...     repo_id="my-cool-model",
+    ...     addition_commits=addition_commits,
+    ...     deletion_commits=deletion_commits,
+    ...     (...)
+    ...     verbose=True,
+    ... )
+    ```
+    <Tip warning={true}>
+    The initial order of the operations is not guaranteed! All deletions will be performed before additions. If you are
+    not updating multiple times the same file, you are fine.
+    </Tip>
+    """
+    addition_commits: List[List[CommitOperationAdd]] = []
+    deletion_commits: List[List[CommitOperationDelete]] = []
+    additions: List[CommitOperationAdd] = []
+    additions_size = 0
+    deletions: List[CommitOperationDelete] = []
+    for op in operations:
+        if isinstance(op, CommitOperationDelete):
+            # Group delete operations together
+            deletions.append(op)
+            if len(deletions) >= max_operations_per_commit:
+                deletion_commits.append(deletions)
+                deletions = []
+        elif op.upload_info.size >= max_upload_size_per_commit:
+            # Upload huge files 1 by 1
+            addition_commits.append([op])
+        elif additions_size + op.upload_info.size < max_upload_size_per_commit:
+            # Group other additions and split if size limit is reached (either max_nb_files or max_upload_size)
+            additions.append(op)
+            additions_size += op.upload_info.size
+        else:
+            addition_commits.append(additions)
+            additions = [op]
+            additions_size = op.upload_info.size
+        if len(additions) >= max_operations_per_commit:
+            addition_commits.append(additions)
+            additions = []
+            additions_size = 0
+    if len(additions) > 0:
+        addition_commits.append(additions)
+    if len(deletions) > 0:
+        deletion_commits.append(deletions)
+    return addition_commits, deletion_commits
+@dataclass
+class MultiCommitStep:
+    """Dataclass containing a list of CommitOperation to commit at once.
+    A [`MultiCommitStep`] is one atomic part of a [`MultiCommitStrategy`]. Each step is identified by its own
+    deterministic ID based on the list of commit operations (hexadecimal sha256). ID is persistent between re-runs if
+    the list of commits is kept the same.
+    """
+    operations: List[Union[CommitOperationAdd, CommitOperationDelete]]
+    id: str = field(init=False)
+    completed: bool = False
+    def __post_init__(self) -> None:
+        if len(self.operations) == 0:
+            raise ValueError("A MultiCommitStep must have at least 1 commit operation, got 0.")
+        # Generate commit id
+        sha = sha256()
+        for op in self.operations:
+            if isinstance(op, CommitOperationAdd):
+                sha.update(b"ADD")
+                sha.update(op.path_in_repo.encode())
+                sha.update(op.upload_info.sha256)
+            elif isinstance(op, CommitOperationDelete):
+                sha.update(b"DELETE")
+                sha.update(op.path_in_repo.encode())
+                sha.update(str(op.is_folder).encode())
+            else:
+                NotImplementedError()
+        self.id = sha.hexdigest()
+    def __str__(self) -> str:
+        """Format a step for PR description.
+        Formatting can be changed in the future as long as it is single line, starts with `- [ ]`/`- [x]` and contains
+        `self.id`. Must be able to match `STEP_ID_REGEX`.
+        """
+        additions = [op for op in self.operations if isinstance(op, CommitOperationAdd)]
+        file_deletions = [op for op in self.operations if isinstance(op, CommitOperationDelete) and not op.is_folder]
+        folder_deletions = [op for op in self.operations if isinstance(op, CommitOperationDelete) and op.is_folder]
+        if len(additions) > 0:
+            return (
+                f"- [{'x' if self.completed else ' '}] Upload {len(additions)} file(s) "
+                f"totalling {_format_size(sum(add.upload_info.size for add in additions))}"
+                f" ({self.id})"
+            )
+        else:
+            return (
+                f"- [{'x' if self.completed else ' '}] Delete {len(file_deletions)} file(s) and"
+                f" {len(folder_deletions)} folder(s) ({self.id})"
+            )
+@dataclass
+class MultiCommitStrategy:
+    """Dataclass containing a list of [`MultiCommitStep`] to commit iteratively.
+    A strategy is identified by its own deterministic ID based on the list of its steps (hexadecimal sha256). ID is
+    persistent between re-runs if the list of commits is kept the same.
+    """
+    addition_commits: List[MultiCommitStep]
+    deletion_commits: List[MultiCommitStep]
+    id: str = field(init=False)
+    all_steps: Set[str] = field(init=False)
+    def __post_init__(self) -> None:
+        self.all_steps = {step.id for step in self.addition_commits + self.deletion_commits}
+        if len(self.all_steps) < len(self.addition_commits) + len(self.deletion_commits):
+            raise ValueError("Got duplicate commits in MultiCommitStrategy. All commits must be unique.")
+        if len(self.all_steps) == 0:
+            raise ValueError("A MultiCommitStrategy must have at least 1 commit, got 0.")
+        # Generate strategy id
+        sha = sha256()
+        for step in self.addition_commits + self.deletion_commits:
+            sha.update("new step".encode())
+            sha.update(step.id.encode())
+        self.id = sha.hexdigest()
+def multi_commit_create_pull_request(
+    api: "HfApi",
+    repo_id: str,
+    commit_message: str,
+    commit_description: Optional[str],
+    strategy: MultiCommitStrategy,
+    repo_type: Optional[str],
+    token: Union[str, bool, None] = None,
+) -> DiscussionWithDetails:
+    return api.create_pull_request(
+        repo_id=repo_id,
+        title=f"[WIP] {commit_message} (multi-commit {strategy.id})",
+        description=multi_commit_generate_comment(
+            commit_message=commit_message, commit_description=commit_description, strategy=strategy
+        ),
+        token=token,
+        repo_type=repo_type,
+    )
+def multi_commit_generate_comment(
+    commit_message: str,
+    commit_description: Optional[str],
+    strategy: MultiCommitStrategy,
+) -> str:
+    return MULTI_COMMIT_PR_DESCRIPTION_TEMPLATE.format(
+        commit_message=commit_message,
+        commit_description=commit_description or "",
+        multi_commit_id=strategy.id,
+        multi_commit_strategy="\n".join(
+            str(commit) for commit in strategy.deletion_commits + strategy.addition_commits
+        ),
+    )
+def multi_commit_parse_pr_description(description: str) -> Set[str]:
+    return {match[1] for match in STEP_ID_REGEX.findall(description)}

huggingface_hub/_snapshot_download.py ADDED Viewed

	@@ -0,0 +1,304 @@

+import os
+from pathlib import Path
+from typing import Dict, List, Literal, Optional, Union
+import requests
+from tqdm.auto import tqdm as base_tqdm
+from tqdm.contrib.concurrent import thread_map
+from . import constants
+from .errors import GatedRepoError, LocalEntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError
+from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name
+from .hf_api import DatasetInfo, HfApi, ModelInfo, SpaceInfo
+from .utils import OfflineModeIsEnabled, filter_repo_objects, logging, validate_hf_hub_args
+from .utils import tqdm as hf_tqdm
+logger = logging.get_logger(__name__)
+@validate_hf_hub_args
+def snapshot_download(
+    repo_id: str,
+    *,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Optional[Union[Dict, str]] = None,
+    proxies: Optional[Dict] = None,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    force_download: bool = False,
+    token: Optional[Union[bool, str]] = None,
+    local_files_only: bool = False,
+    allow_patterns: Optional[Union[List[str], str]] = None,
+    ignore_patterns: Optional[Union[List[str], str]] = None,
+    max_workers: int = 8,
+    tqdm_class: Optional[base_tqdm] = None,
+    headers: Optional[Dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    # Deprecated args
+    local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
+    resume_download: Optional[bool] = None,
+) -> str:
+    """Download repo files.
+    Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from
+    a repo, because you don't know which ones you will need a priori. All files are nested inside a folder in order
+    to keep their actual filename relative to that folder. You can also filter which files to download using
+    `allow_patterns` and `ignore_patterns`.
+    If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this
+    option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir`
+    to store some metadata related to the downloaded files. While this mechanism is not as robust as the main
+    cache-system, it's optimized for regularly pulling the latest version of a repository.
+    An alternative would be to clone the repo but this requires git and git-lfs to be installed and properly
+    configured. It is also not possible to filter which files to download when cloning a repository using git.
+    Args:
+        repo_id (`str`):
+            A user or an organization name and a repo name separated by a `/`.
+        repo_type (`str`, *optional*):
+            Set to `"dataset"` or `"space"` if downloading from a dataset or space,
+            `None` or `"model"` if downloading from a model. Default is `None`.
+        revision (`str`, *optional*):
+            An optional Git revision id which can be a branch name, a tag, or a
+            commit hash.
+        cache_dir (`str`, `Path`, *optional*):
+            Path to the folder where cached files are stored.
+        local_dir (`str` or `Path`, *optional*):
+            If provided, the downloaded files will be placed under this directory.
+        library_name (`str`, *optional*):
+            The name of the library to which the object corresponds.
+        library_version (`str`, *optional*):
+            The version of the library.
+        user_agent (`str`, `dict`, *optional*):
+            The user-agent info in the form of a dictionary or a string.
+        proxies (`dict`, *optional*):
+            Dictionary mapping protocol to the URL of the proxy passed to
+            `requests.request`.
+        etag_timeout (`float`, *optional*, defaults to `10`):
+            When fetching ETag, how many seconds to wait for the server to send
+            data before giving up which is passed to `requests.request`.
+        force_download (`bool`, *optional*, defaults to `False`):
+            Whether the file should be downloaded even if it already exists in the local cache.
+        token (`str`, `bool`, *optional*):
+            A token to be used for the download.
+                - If `True`, the token is read from the HuggingFace config
+                  folder.
+                - If a string, it's used as the authentication token.
+        headers (`dict`, *optional*):
+            Additional headers to include in the request. Those headers take precedence over the others.
+        local_files_only (`bool`, *optional*, defaults to `False`):
+            If `True`, avoid downloading the file and return the path to the
+            local cached file if it exists.
+        allow_patterns (`List[str]` or `str`, *optional*):
+            If provided, only files matching at least one pattern are downloaded.
+        ignore_patterns (`List[str]` or `str`, *optional*):
+            If provided, files matching any of the patterns are not downloaded.
+        max_workers (`int`, *optional*):
+            Number of concurrent threads to download files (1 thread = 1 file download).
+            Defaults to 8.
+        tqdm_class (`tqdm`, *optional*):
+            If provided, overwrites the default behavior for the progress bar. Passed
+            argument must inherit from `tqdm.auto.tqdm` or at least mimic its behavior.
+            Note that the `tqdm_class` is not passed to each individual download.
+            Defaults to the custom HF progress bar that can be disabled by setting
+            `HF_HUB_DISABLE_PROGRESS_BARS` environment variable.
+    Returns:
+        `str`: folder path of the repo snapshot.
+    Raises:
+        [`~utils.RepositoryNotFoundError`]
+            If the repository to download from cannot be found. This may be because it doesn't exist,
+            or because it is set to `private` and you do not have access.
+        [`~utils.RevisionNotFoundError`]
+            If the revision to download from cannot be found.
+        [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
+            If `token=True` and the token cannot be found.
+        [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
+            ETag cannot be determined.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            if some parameter value is invalid.
+    """
+    if cache_dir is None:
+        cache_dir = constants.HF_HUB_CACHE
+    if revision is None:
+        revision = constants.DEFAULT_REVISION
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+    if repo_type is None:
+        repo_type = "model"
+    if repo_type not in constants.REPO_TYPES:
+        raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}")
+    storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type))
+    repo_info: Union[ModelInfo, DatasetInfo, SpaceInfo, None] = None
+    api_call_error: Optional[Exception] = None
+    if not local_files_only:
+        # try/except logic to handle different errors => taken from `hf_hub_download`
+        try:
+            # if we have internet connection we want to list files to download
+            api = HfApi(
+                library_name=library_name,
+                library_version=library_version,
+                user_agent=user_agent,
+                endpoint=endpoint,
+                headers=headers,
+            )
+            repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type, revision=revision, token=token)
+        except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
+            # Actually raise for those subclasses of ConnectionError
+            raise
+        except (
+            requests.exceptions.ConnectionError,
+            requests.exceptions.Timeout,
+            OfflineModeIsEnabled,
+        ) as error:
+            # Internet connection is down
+            # => will try to use local files only
+            api_call_error = error
+            pass
+        except RevisionNotFoundError:
+            # The repo was found but the revision doesn't exist on the Hub (never existed or got deleted)
+            raise
+        except requests.HTTPError as error:
+            # Multiple reasons for an http error:
+            # - Repository is private and invalid/missing token sent
+            # - Repository is gated and invalid/missing token sent
+            # - Hub is down (error 500 or 504)
+            # => let's switch to 'local_files_only=True' to check if the files are already cached.
+            #    (if it's not the case, the error will be re-raised)
+            api_call_error = error
+            pass
+    # At this stage, if `repo_info` is None it means either:
+    # - internet connection is down
+    # - internet connection is deactivated (local_files_only=True or HF_HUB_OFFLINE=True)
+    # - repo is private/gated and invalid/missing token sent
+    # - Hub is down
+    # => let's look if we can find the appropriate folder in the cache:
+    #    - if the specified revision is a commit hash, look inside "snapshots".
+    #    - f the specified revision is a branch or tag, look inside "refs".
+    # => if local_dir is not None, we will return the path to the local folder if it exists.
+    if repo_info is None:
+        # Try to get which commit hash corresponds to the specified revision
+        commit_hash = None
+        if REGEX_COMMIT_HASH.match(revision):
+            commit_hash = revision
+        else:
+            ref_path = os.path.join(storage_folder, "refs", revision)
+            if os.path.exists(ref_path):
+                # retrieve commit_hash from refs file
+                with open(ref_path) as f:
+                    commit_hash = f.read()
+        # Try to locate snapshot folder for this commit hash
+        if commit_hash is not None:
+            snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
+            if os.path.exists(snapshot_folder):
+                # Snapshot folder exists => let's return it
+                # (but we can't check if all the files are actually there)
+                return snapshot_folder
+        # If local_dir is not None, return it if it exists and is not empty
+        if local_dir is not None:
+            local_dir = Path(local_dir)
+            if local_dir.is_dir() and any(local_dir.iterdir()):
+                logger.warning(
+                    f"Returning existing local_dir `{local_dir}` as remote repo cannot be accessed in `snapshot_download` ({api_call_error})."
+                )
+                return str(local_dir.resolve())
+        # If we couldn't find the appropriate folder on disk, raise an error.
+        if local_files_only:
+            raise LocalEntryNotFoundError(
+                "Cannot find an appropriate cached snapshot folder for the specified revision on the local disk and "
+                "outgoing traffic has been disabled. To enable repo look-ups and downloads online, pass "
+                "'local_files_only=False' as input."
+            )
+        elif isinstance(api_call_error, OfflineModeIsEnabled):
+            raise LocalEntryNotFoundError(
+                "Cannot find an appropriate cached snapshot folder for the specified revision on the local disk and "
+                "outgoing traffic has been disabled. To enable repo look-ups and downloads online, set "
+                "'HF_HUB_OFFLINE=0' as environment variable."
+            ) from api_call_error
+        elif isinstance(api_call_error, RepositoryNotFoundError) or isinstance(api_call_error, GatedRepoError):
+            # Repo not found => let's raise the actual error
+            raise api_call_error
+        else:
+            # Otherwise: most likely a connection issue or Hub downtime => let's warn the user
+            raise LocalEntryNotFoundError(
+                "An error happened while trying to locate the files on the Hub and we cannot find the appropriate"
+                " snapshot folder for the specified revision on the local disk. Please check your internet connection"
+                " and try again."
+            ) from api_call_error
+    # At this stage, internet connection is up and running
+    # => let's download the files!
+    assert repo_info.sha is not None, "Repo info returned from server must have a revision sha."
+    assert repo_info.siblings is not None, "Repo info returned from server must have a siblings list."
+    filtered_repo_files = list(
+        filter_repo_objects(
+            items=[f.rfilename for f in repo_info.siblings],
+            allow_patterns=allow_patterns,
+            ignore_patterns=ignore_patterns,
+        )
+    )
+    commit_hash = repo_info.sha
+    snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
+    # if passed revision is not identical to commit_hash
+    # then revision has to be a branch name or tag name.
+    # In that case store a ref.
+    if revision != commit_hash:
+        ref_path = os.path.join(storage_folder, "refs", revision)
+        os.makedirs(os.path.dirname(ref_path), exist_ok=True)
+        with open(ref_path, "w") as f:
+            f.write(commit_hash)
+    # we pass the commit_hash to hf_hub_download
+    # so no network call happens if we already
+    # have the file locally.
+    def _inner_hf_hub_download(repo_file: str):
+        return hf_hub_download(
+            repo_id,
+            filename=repo_file,
+            repo_type=repo_type,
+            revision=commit_hash,
+            endpoint=endpoint,
+            cache_dir=cache_dir,
+            local_dir=local_dir,
+            local_dir_use_symlinks=local_dir_use_symlinks,
+            library_name=library_name,
+            library_version=library_version,
+            user_agent=user_agent,
+            proxies=proxies,
+            etag_timeout=etag_timeout,
+            resume_download=resume_download,
+            force_download=force_download,
+            token=token,
+            headers=headers,
+        )
+    if constants.HF_HUB_ENABLE_HF_TRANSFER:
+        # when using hf_transfer we don't want extra parallelism
+        # from the one hf_transfer provides
+        for file in filtered_repo_files:
+            _inner_hf_hub_download(file)
+    else:
+        thread_map(
+            _inner_hf_hub_download,
+            filtered_repo_files,
+            desc=f"Fetching {len(filtered_repo_files)} files",
+            max_workers=max_workers,
+            # User can use its own tqdm class or the default one from `huggingface_hub.utils`
+            tqdm_class=tqdm_class or hf_tqdm,
+        )
+    if local_dir is not None:
+        return str(os.path.realpath(local_dir))
+    return snapshot_folder

huggingface_hub/_space_api.py ADDED Viewed

	@@ -0,0 +1,160 @@

+# coding=utf-8
+# Copyright 2019-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dataclasses import dataclass
+from datetime import datetime
+from enum import Enum
+from typing import Dict, Optional
+from huggingface_hub.utils import parse_datetime
+class SpaceStage(str, Enum):
+    """
+    Enumeration of possible stage of a Space on the Hub.
+    Value can be compared to a string:
+    ```py
+    assert SpaceStage.BUILDING == "BUILDING"
+    ```
+    Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceInfo.ts#L61 (private url).
+    """
+    # Copied from moon-landing > server > repo_types > SpaceInfo.ts (private repo)
+    NO_APP_FILE = "NO_APP_FILE"
+    CONFIG_ERROR = "CONFIG_ERROR"
+    BUILDING = "BUILDING"
+    BUILD_ERROR = "BUILD_ERROR"
+    RUNNING = "RUNNING"
+    RUNNING_BUILDING = "RUNNING_BUILDING"
+    RUNTIME_ERROR = "RUNTIME_ERROR"
+    DELETING = "DELETING"
+    STOPPED = "STOPPED"
+    PAUSED = "PAUSED"
+class SpaceHardware(str, Enum):
+    """
+    Enumeration of hardwares available to run your Space on the Hub.
+    Value can be compared to a string:
+    ```py
+    assert SpaceHardware.CPU_BASIC == "cpu-basic"
+    ```
+    Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceInfo.ts#L73 (private url).
+    """
+    CPU_BASIC = "cpu-basic"
+    CPU_UPGRADE = "cpu-upgrade"
+    T4_SMALL = "t4-small"
+    T4_MEDIUM = "t4-medium"
+    L4X1 = "l4x1"
+    L4X4 = "l4x4"
+    ZERO_A10G = "zero-a10g"
+    A10G_SMALL = "a10g-small"
+    A10G_LARGE = "a10g-large"
+    A10G_LARGEX2 = "a10g-largex2"
+    A10G_LARGEX4 = "a10g-largex4"
+    A100_LARGE = "a100-large"
+    V5E_1X1 = "v5e-1x1"
+    V5E_2X2 = "v5e-2x2"
+    V5E_2X4 = "v5e-2x4"
+class SpaceStorage(str, Enum):
+    """
+    Enumeration of persistent storage available for your Space on the Hub.
+    Value can be compared to a string:
+    ```py
+    assert SpaceStorage.SMALL == "small"
+    ```
+    Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceHardwareFlavor.ts#L24 (private url).
+    """
+    SMALL = "small"
+    MEDIUM = "medium"
+    LARGE = "large"
+@dataclass
+class SpaceRuntime:
+    """
+    Contains information about the current runtime of a Space.
+    Args:
+        stage (`str`):
+            Current stage of the space. Example: RUNNING.
+        hardware (`str` or `None`):
+            Current hardware of the space. Example: "cpu-basic". Can be `None` if Space
+            is `BUILDING` for the first time.
+        requested_hardware (`str` or `None`):
+            Requested hardware. Can be different than `hardware` especially if the request
+            has just been made. Example: "t4-medium". Can be `None` if no hardware has
+            been requested yet.
+        sleep_time (`int` or `None`):
+            Number of seconds the Space will be kept alive after the last request. By default (if value is `None`), the
+            Space will never go to sleep if it's running on an upgraded hardware, while it will go to sleep after 48
+            hours on a free 'cpu-basic' hardware. For more details, see https://huggingface.co/docs/hub/spaces-gpus#sleep-time.
+        raw (`dict`):
+            Raw response from the server. Contains more information about the Space
+            runtime like number of replicas, number of cpu, memory size,...
+    """
+    stage: SpaceStage
+    hardware: Optional[SpaceHardware]
+    requested_hardware: Optional[SpaceHardware]
+    sleep_time: Optional[int]
+    storage: Optional[SpaceStorage]
+    raw: Dict
+    def __init__(self, data: Dict) -> None:
+        self.stage = data["stage"]
+        self.hardware = data.get("hardware", {}).get("current")
+        self.requested_hardware = data.get("hardware", {}).get("requested")
+        self.sleep_time = data.get("gcTimeout")
+        self.storage = data.get("storage")
+        self.raw = data
+@dataclass
+class SpaceVariable:
+    """
+    Contains information about the current variables of a Space.
+    Args:
+        key (`str`):
+            Variable key. Example: `"MODEL_REPO_ID"`
+        value (`str`):
+            Variable value. Example: `"the_model_repo_id"`.
+        description (`str` or None):
+            Description of the variable. Example: `"Model Repo ID of the implemented model"`.
+        updatedAt (`datetime` or None):
+            datetime of the last update of the variable (if the variable has been updated at least once).
+    """
+    key: str
+    value: str
+    description: Optional[str]
+    updated_at: Optional[datetime]
+    def __init__(self, key: str, values: Dict) -> None:
+        self.key = key
+        self.value = values["value"]
+        self.description = values.get("description")
+        updated_at = values.get("updatedAt")
+        self.updated_at = parse_datetime(updated_at) if updated_at is not None else None

huggingface_hub/_tensorboard_logger.py ADDED Viewed

	@@ -0,0 +1,195 @@

+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains a logger to push training logs to the Hub, using Tensorboard."""
+from pathlib import Path
+from typing import TYPE_CHECKING, List, Optional, Union
+from ._commit_scheduler import CommitScheduler
+from .errors import EntryNotFoundError
+from .repocard import ModelCard
+from .utils import experimental
+# Depending on user's setup, SummaryWriter can come either from 'tensorboardX'
+# or from 'torch.utils.tensorboard'. Both are compatible so let's try to load
+# from either of them.
+try:
+    from tensorboardX import SummaryWriter
+    is_summary_writer_available = True
+except ImportError:
+    try:
+        from torch.utils.tensorboard import SummaryWriter
+        is_summary_writer_available = False
+    except ImportError:
+        # Dummy class to avoid failing at import. Will raise on instance creation.
+        SummaryWriter = object
+        is_summary_writer_available = False
+if TYPE_CHECKING:
+    from tensorboardX import SummaryWriter
+class HFSummaryWriter(SummaryWriter):
+    """
+    Wrapper around the tensorboard's `SummaryWriter` to push training logs to the Hub.
+    Data is logged locally and then pushed to the Hub asynchronously. Pushing data to the Hub is done in a separate
+    thread to avoid blocking the training script. In particular, if the upload fails for any reason (e.g. a connection
+    issue), the main script will not be interrupted. Data is automatically pushed to the Hub every `commit_every`
+    minutes (default to every 5 minutes).
+    <Tip warning={true}>
+    `HFSummaryWriter` is experimental. Its API is subject to change in the future without prior notice.
+    </Tip>
+    Args:
+        repo_id (`str`):
+            The id of the repo to which the logs will be pushed.
+        logdir (`str`, *optional*):
+            The directory where the logs will be written. If not specified, a local directory will be created by the
+            underlying `SummaryWriter` object.
+        commit_every (`int` or `float`, *optional*):
+            The frequency (in minutes) at which the logs will be pushed to the Hub. Defaults to 5 minutes.
+        squash_history (`bool`, *optional*):
+            Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
+            useful to avoid degraded performances on the repo when it grows too large.
+        repo_type (`str`, *optional*):
+            The type of the repo to which the logs will be pushed. Defaults to "model".
+        repo_revision (`str`, *optional*):
+            The revision of the repo to which the logs will be pushed. Defaults to "main".
+        repo_private (`bool`, *optional*):
+            Whether to create a private repo or not. Defaults to False. This argument is ignored if the repo already
+            exists.
+        path_in_repo (`str`, *optional*):
+            The path to the folder in the repo where the logs will be pushed. Defaults to "tensorboard/".
+        repo_allow_patterns (`List[str]` or `str`, *optional*):
+            A list of patterns to include in the upload. Defaults to `"*.tfevents.*"`. Check out the
+            [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details.
+        repo_ignore_patterns (`List[str]` or `str`, *optional*):
+            A list of patterns to exclude in the upload. Check out the
+            [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details.
+        token (`str`, *optional*):
+            Authentication token. Will default to the stored token. See https://huggingface.co/settings/token for more
+            details
+        kwargs:
+            Additional keyword arguments passed to `SummaryWriter`.
+    Examples:
+    ```diff
+    # Taken from https://pytorch.org/docs/stable/tensorboard.html
+    - from torch.utils.tensorboard import SummaryWriter
+    + from huggingface_hub import HFSummaryWriter
+    import numpy as np
+    - writer = SummaryWriter()
+    + writer = HFSummaryWriter(repo_id="username/my-trained-model")
+    for n_iter in range(100):
+        writer.add_scalar('Loss/train', np.random.random(), n_iter)
+        writer.add_scalar('Loss/test', np.random.random(), n_iter)
+        writer.add_scalar('Accuracy/train', np.random.random(), n_iter)
+        writer.add_scalar('Accuracy/test', np.random.random(), n_iter)
+    ```
+    ```py
+    >>> from huggingface_hub import HFSummaryWriter
+    # Logs are automatically pushed every 15 minutes (5 by default) + when exiting the context manager
+    >>> with HFSummaryWriter(repo_id="test_hf_logger", commit_every=15) as logger:
+    ...     logger.add_scalar("a", 1)
+    ...     logger.add_scalar("b", 2)
+    ```
+    """
+    @experimental
+    def __new__(cls, *args, **kwargs) -> "HFSummaryWriter":
+        if not is_summary_writer_available:
+            raise ImportError(
+                "You must have `tensorboard` installed to use `HFSummaryWriter`. Please run `pip install --upgrade"
+                " tensorboardX` first."
+            )
+        return super().__new__(cls)
+    def __init__(
+        self,
+        repo_id: str,
+        *,
+        logdir: Optional[str] = None,
+        commit_every: Union[int, float] = 5,
+        squash_history: bool = False,
+        repo_type: Optional[str] = None,
+        repo_revision: Optional[str] = None,
+        repo_private: bool = False,
+        path_in_repo: Optional[str] = "tensorboard",
+        repo_allow_patterns: Optional[Union[List[str], str]] = "*.tfevents.*",
+        repo_ignore_patterns: Optional[Union[List[str], str]] = None,
+        token: Optional[str] = None,
+        **kwargs,
+    ):
+        # Initialize SummaryWriter
+        super().__init__(logdir=logdir, **kwargs)
+        # Check logdir has been correctly initialized and fail early otherwise. In practice, SummaryWriter takes care of it.
+        if not isinstance(self.logdir, str):
+            raise ValueError(f"`self.logdir` must be a string. Got '{self.logdir}' of type {type(self.logdir)}.")
+        # Append logdir name to `path_in_repo`
+        if path_in_repo is None or path_in_repo == "":
+            path_in_repo = Path(self.logdir).name
+        else:
+            path_in_repo = path_in_repo.strip("/") + "/" + Path(self.logdir).name
+        # Initialize scheduler
+        self.scheduler = CommitScheduler(
+            folder_path=self.logdir,
+            path_in_repo=path_in_repo,
+            repo_id=repo_id,
+            repo_type=repo_type,
+            revision=repo_revision,
+            private=repo_private,
+            token=token,
+            allow_patterns=repo_allow_patterns,
+            ignore_patterns=repo_ignore_patterns,
+            every=commit_every,
+            squash_history=squash_history,
+        )
+        # Exposing some high-level info at root level
+        self.repo_id = self.scheduler.repo_id
+        self.repo_type = self.scheduler.repo_type
+        self.repo_revision = self.scheduler.revision
+        # Add `hf-summary-writer` tag to the model card metadata
+        try:
+            card = ModelCard.load(repo_id_or_path=self.repo_id, repo_type=self.repo_type)
+        except EntryNotFoundError:
+            card = ModelCard("")
+        tags = card.data.get("tags", [])
+        if "hf-summary-writer" not in tags:
+            tags.append("hf-summary-writer")
+            card.data["tags"] = tags
+            card.push_to_hub(repo_id=self.repo_id, repo_type=self.repo_type)
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Push to hub in a non-blocking way when exiting the logger's context manager."""
+        super().__exit__(exc_type, exc_val, exc_tb)
+        future = self.scheduler.trigger()
+        future.result()

huggingface_hub/_upload_large_folder.py ADDED Viewed

	@@ -0,0 +1,621 @@

+# coding=utf-8
+# Copyright 2024-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import enum
+import logging
+import os
+import queue
+import shutil
+import sys
+import threading
+import time
+import traceback
+from datetime import datetime
+from pathlib import Path
+from threading import Lock
+from typing import TYPE_CHECKING, List, Optional, Tuple, Union
+from . import constants
+from ._commit_api import CommitOperationAdd, UploadInfo, _fetch_upload_modes
+from ._local_folder import LocalUploadFileMetadata, LocalUploadFilePaths, get_local_upload_paths, read_upload_metadata
+from .constants import DEFAULT_REVISION, REPO_TYPES
+from .utils import DEFAULT_IGNORE_PATTERNS, filter_repo_objects, tqdm
+from .utils._cache_manager import _format_size
+from .utils.sha import sha_fileobj
+if TYPE_CHECKING:
+    pass
+logger = logging.getLogger(__name__)
+WAITING_TIME_IF_NO_TASKS = 10  # seconds
+MAX_NB_REGULAR_FILES_PER_COMMIT = 75
+MAX_NB_LFS_FILES_PER_COMMIT = 150
+def upload_large_folder_internal(
+    api: "HfApi",
+    repo_id: str,
+    folder_path: Union[str, Path],
+    *,
+    repo_type: str,  # Repo type is required!
+    revision: Optional[str] = None,
+    private: bool = False,
+    allow_patterns: Optional[Union[List[str], str]] = None,
+    ignore_patterns: Optional[Union[List[str], str]] = None,
+    num_workers: Optional[int] = None,
+    print_report: bool = True,
+    print_report_every: int = 60,
+):
+    """Upload a large folder to the Hub in the most resilient way possible.
+    See [`HfApi.upload_large_folder`] for the full documentation.
+    """
+    # 1. Check args and setup
+    if repo_type is None:
+        raise ValueError(
+            "For large uploads, `repo_type` is explicitly required. Please set it to `model`, `dataset` or `space`."
+            " If you are using the CLI, pass it as `--repo-type=model`."
+        )
+    if repo_type not in REPO_TYPES:
+        raise ValueError(f"Invalid repo type, must be one of {REPO_TYPES}")
+    if revision is None:
+        revision = DEFAULT_REVISION
+    folder_path = Path(folder_path).expanduser().resolve()
+    if not folder_path.is_dir():
+        raise ValueError(f"Provided path: '{folder_path}' is not a directory")
+    if ignore_patterns is None:
+        ignore_patterns = []
+    elif isinstance(ignore_patterns, str):
+        ignore_patterns = [ignore_patterns]
+    ignore_patterns += DEFAULT_IGNORE_PATTERNS
+    if num_workers is None:
+        nb_cores = os.cpu_count() or 1
+        num_workers = max(nb_cores - 2, 2)  # Use all but 2 cores, or at least 2 cores
+    # 2. Create repo if missing
+    repo_url = api.create_repo(repo_id=repo_id, repo_type=repo_type, private=private, exist_ok=True)
+    logger.info(f"Repo created: {repo_url}")
+    repo_id = repo_url.repo_id
+    # 3. List files to upload
+    filtered_paths_list = filter_repo_objects(
+        (path.relative_to(folder_path).as_posix() for path in folder_path.glob("**/*") if path.is_file()),
+        allow_patterns=allow_patterns,
+        ignore_patterns=ignore_patterns,
+    )
+    paths_list = [get_local_upload_paths(folder_path, relpath) for relpath in filtered_paths_list]
+    logger.info(f"Found {len(paths_list)} candidate files to upload")
+    # Read metadata for each file
+    items = [
+        (paths, read_upload_metadata(folder_path, paths.path_in_repo))
+        for paths in tqdm(paths_list, desc="Recovering from metadata files")
+    ]
+    # 4. Start workers
+    status = LargeUploadStatus(items)
+    threads = [
+        threading.Thread(
+            target=_worker_job,
+            kwargs={
+                "status": status,
+                "api": api,
+                "repo_id": repo_id,
+                "repo_type": repo_type,
+                "revision": revision,
+            },
+        )
+        for _ in range(num_workers)
+    ]
+    for thread in threads:
+        thread.start()
+    # 5. Print regular reports
+    if print_report:
+        print("\n\n" + status.current_report())
+    last_report_ts = time.time()
+    while True:
+        time.sleep(1)
+        if time.time() - last_report_ts >= print_report_every:
+            if print_report:
+                _print_overwrite(status.current_report())
+            last_report_ts = time.time()
+        if status.is_done():
+            logging.info("Is done: exiting main loop")
+            break
+    for thread in threads:
+        thread.join()
+    logger.info(status.current_report())
+    logging.info("Upload is complete!")
+####################
+# Logic to manage workers and synchronize tasks
+####################
+class WorkerJob(enum.Enum):
+    SHA256 = enum.auto()
+    GET_UPLOAD_MODE = enum.auto()
+    PREUPLOAD_LFS = enum.auto()
+    COMMIT = enum.auto()
+    WAIT = enum.auto()  # if no tasks are available but we don't want to exit
+JOB_ITEM_T = Tuple[LocalUploadFilePaths, LocalUploadFileMetadata]
+class LargeUploadStatus:
+    """Contains information, queues and tasks for a large upload process."""
+    def __init__(self, items: List[JOB_ITEM_T]):
+        self.items = items
+        self.queue_sha256: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
+        self.queue_get_upload_mode: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
+        self.queue_preupload_lfs: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
+        self.queue_commit: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
+        self.lock = Lock()
+        self.nb_workers_sha256: int = 0
+        self.nb_workers_get_upload_mode: int = 0
+        self.nb_workers_preupload_lfs: int = 0
+        self.nb_workers_commit: int = 0
+        self.nb_workers_waiting: int = 0
+        self.last_commit_attempt: Optional[float] = None
+        self._started_at = datetime.now()
+        # Setup queues
+        for item in self.items:
+            paths, metadata = item
+            if metadata.sha256 is None:
+                self.queue_sha256.put(item)
+            elif metadata.upload_mode is None:
+                self.queue_get_upload_mode.put(item)
+            elif metadata.upload_mode == "lfs" and not metadata.is_uploaded:
+                self.queue_preupload_lfs.put(item)
+            elif not metadata.is_committed:
+                self.queue_commit.put(item)
+            else:
+                logger.debug(f"Skipping file {paths.path_in_repo} (already uploaded and committed)")
+    def current_report(self) -> str:
+        """Generate a report of the current status of the large upload."""
+        nb_hashed = 0
+        size_hashed = 0
+        nb_preuploaded = 0
+        nb_lfs = 0
+        nb_lfs_unsure = 0
+        size_preuploaded = 0
+        nb_committed = 0
+        size_committed = 0
+        total_size = 0
+        ignored_files = 0
+        total_files = 0
+        with self.lock:
+            for _, metadata in self.items:
+                if metadata.should_ignore:
+                    ignored_files += 1
+                    continue
+                total_size += metadata.size
+                total_files += 1
+                if metadata.sha256 is not None:
+                    nb_hashed += 1
+                    size_hashed += metadata.size
+                if metadata.upload_mode == "lfs":
+                    nb_lfs += 1
+                if metadata.upload_mode is None:
+                    nb_lfs_unsure += 1
+                if metadata.is_uploaded:
+                    nb_preuploaded += 1
+                    size_preuploaded += metadata.size
+                if metadata.is_committed:
+                    nb_committed += 1
+                    size_committed += metadata.size
+            total_size_str = _format_size(total_size)
+            now = datetime.now()
+            now_str = now.strftime("%Y-%m-%d %H:%M:%S")
+            elapsed = now - self._started_at
+            elapsed_str = str(elapsed).split(".")[0]  # remove milliseconds
+            message = "\n" + "-" * 10
+            message += f" {now_str} ({elapsed_str}) "
+            message += "-" * 10 + "\n"
+            message += "Files:   "
+            message += f"hashed {nb_hashed}/{total_files} ({_format_size(size_hashed)}/{total_size_str}) | "
+            message += f"pre-uploaded: {nb_preuploaded}/{nb_lfs} ({_format_size(size_preuploaded)}/{total_size_str})"
+            if nb_lfs_unsure > 0:
+                message += f" (+{nb_lfs_unsure} unsure)"
+            message += f" | committed: {nb_committed}/{total_files} ({_format_size(size_committed)}/{total_size_str})"
+            message += f" | ignored: {ignored_files}\n"
+            message += "Workers: "
+            message += f"hashing: {self.nb_workers_sha256} | "
+            message += f"get upload mode: {self.nb_workers_get_upload_mode} | "
+            message += f"pre-uploading: {self.nb_workers_preupload_lfs} | "
+            message += f"committing: {self.nb_workers_commit} | "
+            message += f"waiting: {self.nb_workers_waiting}\n"
+            message += "-" * 51
+            return message
+    def is_done(self) -> bool:
+        with self.lock:
+            return all(metadata.is_committed or metadata.should_ignore for _, metadata in self.items)
+def _worker_job(
+    status: LargeUploadStatus,
+    api: "HfApi",
+    repo_id: str,
+    repo_type: str,
+    revision: str,
+):
+    """
+    Main process for a worker. The worker will perform tasks based on the priority list until all files are uploaded
+    and committed. If no tasks are available, the worker will wait for 10 seconds before checking again.
+    If a task fails for any reason, the item(s) are put back in the queue for another worker to pick up.
+    Read `upload_large_folder` docstring for more information on how tasks are prioritized.
+    """
+    while True:
+        next_job: Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]] = None
+        # Determine next task
+        next_job = _determine_next_job(status)
+        if next_job is None:
+            return
+        job, items = next_job
+        # Perform task
+        if job == WorkerJob.SHA256:
+            item = items[0]  # single item
+            try:
+                _compute_sha256(item)
+                status.queue_get_upload_mode.put(item)
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                logger.error(f"Failed to compute sha256: {e}")
+                traceback.format_exc()
+                status.queue_sha256.put(item)
+            with status.lock:
+                status.nb_workers_sha256 -= 1
+        elif job == WorkerJob.GET_UPLOAD_MODE:
+            try:
+                _get_upload_mode(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                logger.error(f"Failed to get upload mode: {e}")
+                traceback.format_exc()
+            # Items are either:
+            # - dropped (if should_ignore)
+            # - put in LFS queue (if LFS)
+            # - put in commit queue (if regular)
+            # - or put back (if error occurred).
+            for item in items:
+                _, metadata = item
+                if metadata.should_ignore:
+                    continue
+                if metadata.upload_mode == "lfs":
+                    status.queue_preupload_lfs.put(item)
+                elif metadata.upload_mode == "regular":
+                    status.queue_commit.put(item)
+                else:
+                    status.queue_get_upload_mode.put(item)
+            with status.lock:
+                status.nb_workers_get_upload_mode -= 1
+        elif job == WorkerJob.PREUPLOAD_LFS:
+            item = items[0]  # single item
+            try:
+                _preupload_lfs(item, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
+                status.queue_commit.put(item)
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                logger.error(f"Failed to preupload LFS: {e}")
+                traceback.format_exc()
+                status.queue_preupload_lfs.put(item)
+            with status.lock:
+                status.nb_workers_preupload_lfs -= 1
+        elif job == WorkerJob.COMMIT:
+            try:
+                _commit(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                logger.error(f"Failed to commit: {e}")
+                traceback.format_exc()
+                for item in items:
+                    status.queue_commit.put(item)
+            with status.lock:
+                status.last_commit_attempt = time.time()
+                status.nb_workers_commit -= 1
+        elif job == WorkerJob.WAIT:
+            time.sleep(WAITING_TIME_IF_NO_TASKS)
+            with status.lock:
+                status.nb_workers_waiting -= 1
+def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]]:
+    with status.lock:
+        # 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file)
+        if (
+            status.nb_workers_commit == 0
+            and status.queue_commit.qsize() > 0
+            and status.last_commit_attempt is not None
+            and time.time() - status.last_commit_attempt > 5 * 60
+        ):
+            status.nb_workers_commit += 1
+            logger.debug("Job: commit (more than 5 minutes since last commit attempt)")
+            return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
+        # 2. Commit if at least 100 files are ready to commit
+        elif status.nb_workers_commit == 0 and status.queue_commit.qsize() >= 150:
+            status.nb_workers_commit += 1
+            logger.debug("Job: commit (>100 files ready)")
+            return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
+        # 3. Get upload mode if at least 10 files
+        elif status.queue_get_upload_mode.qsize() >= 10:
+            status.nb_workers_get_upload_mode += 1
+            logger.debug("Job: get upload mode (>10 files ready)")
+            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
+        # 4. Preupload LFS file if at least 1 file and no worker is preuploading LFS
+        elif status.queue_preupload_lfs.qsize() > 0 and status.nb_workers_preupload_lfs == 0:
+            status.nb_workers_preupload_lfs += 1
+            logger.debug("Job: preupload LFS (no other worker preuploading LFS)")
+            return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
+        # 5. Compute sha256 if at least 1 file and no worker is computing sha256
+        elif status.queue_sha256.qsize() > 0 and status.nb_workers_sha256 == 0:
+            status.nb_workers_sha256 += 1
+            logger.debug("Job: sha256 (no other worker computing sha256)")
+            return (WorkerJob.SHA256, _get_one(status.queue_sha256))
+        # 6. Get upload mode if at least 1 file and no worker is getting upload mode
+        elif status.queue_get_upload_mode.qsize() > 0 and status.nb_workers_get_upload_mode == 0:
+            status.nb_workers_get_upload_mode += 1
+            logger.debug("Job: get upload mode (no other worker getting upload mode)")
+            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
+        # 7. Preupload LFS file if at least 1 file
+        #    Skip if hf_transfer is enabled and there is already a worker preuploading LFS
+        elif status.queue_preupload_lfs.qsize() > 0 and (
+            status.nb_workers_preupload_lfs == 0 or not constants.HF_HUB_ENABLE_HF_TRANSFER
+        ):
+            status.nb_workers_preupload_lfs += 1
+            logger.debug("Job: preupload LFS")
+            return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
+        # 8. Compute sha256 if at least 1 file
+        elif status.queue_sha256.qsize() > 0:
+            status.nb_workers_sha256 += 1
+            logger.debug("Job: sha256")
+            return (WorkerJob.SHA256, _get_one(status.queue_sha256))
+        # 9. Get upload mode if at least 1 file
+        elif status.queue_get_upload_mode.qsize() > 0:
+            status.nb_workers_get_upload_mode += 1
+            logger.debug("Job: get upload mode")
+            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
+        # 10. Commit if at least 1 file and 1 min since last commit attempt
+        elif (
+            status.nb_workers_commit == 0
+            and status.queue_commit.qsize() > 0
+            and status.last_commit_attempt is not None
+            and time.time() - status.last_commit_attempt > 1 * 60
+        ):
+            status.nb_workers_commit += 1
+            logger.debug("Job: commit (1 min since last commit attempt)")
+            return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
+        # 11. Commit if at least 1 file all other queues are empty and all workers are waiting
+        #     e.g. when it's the last commit
+        elif (
+            status.nb_workers_commit == 0
+            and status.queue_commit.qsize() > 0
+            and status.queue_sha256.qsize() == 0
+            and status.queue_get_upload_mode.qsize() == 0
+            and status.queue_preupload_lfs.qsize() == 0
+            and status.nb_workers_sha256 == 0
+            and status.nb_workers_get_upload_mode == 0
+            and status.nb_workers_preupload_lfs == 0
+        ):
+            status.nb_workers_commit += 1
+            logger.debug("Job: commit")
+            return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
+        # 12. If all queues are empty, exit
+        elif all(metadata.is_committed or metadata.should_ignore for _, metadata in status.items):
+            logger.info("All files have been processed! Exiting worker.")
+            return None
+        # 13. If no task is available, wait
+        else:
+            status.nb_workers_waiting += 1
+            logger.debug(f"No task available, waiting... ({WAITING_TIME_IF_NO_TASKS}s)")
+            return (WorkerJob.WAIT, [])
+####################
+# Atomic jobs (sha256, get_upload_mode, preupload_lfs, commit)
+####################
+def _compute_sha256(item: JOB_ITEM_T) -> None:
+    """Compute sha256 of a file and save it in metadata."""
+    paths, metadata = item
+    if metadata.sha256 is None:
+        with paths.file_path.open("rb") as f:
+            metadata.sha256 = sha_fileobj(f).hex()
+    metadata.save(paths)
+def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
+    """Get upload mode for each file and update metadata.
+    Also receive info if the file should be ignored.
+    """
+    additions = [_build_hacky_operation(item) for item in items]
+    _fetch_upload_modes(
+        additions=additions,
+        repo_type=repo_type,
+        repo_id=repo_id,
+        headers=api._build_hf_headers(),
+        revision=revision,
+    )
+    for item, addition in zip(items, additions):
+        paths, metadata = item
+        metadata.upload_mode = addition._upload_mode
+        metadata.should_ignore = addition._should_ignore
+        metadata.save(paths)
+def _preupload_lfs(item: JOB_ITEM_T, api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
+    """Preupload LFS file and update metadata."""
+    paths, metadata = item
+    addition = _build_hacky_operation(item)
+    api.preupload_lfs_files(
+        repo_id=repo_id,
+        repo_type=repo_type,
+        revision=revision,
+        additions=[addition],
+    )
+    metadata.is_uploaded = True
+    metadata.save(paths)
+def _commit(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
+    """Commit files to the repo."""
+    additions = [_build_hacky_operation(item) for item in items]
+    api.create_commit(
+        repo_id=repo_id,
+        repo_type=repo_type,
+        revision=revision,
+        operations=additions,
+        commit_message="Add files using upload-large-folder tool",
+    )
+    for paths, metadata in items:
+        metadata.is_committed = True
+        metadata.save(paths)
+####################
+# Hacks with CommitOperationAdd to bypass checks/sha256 calculation
+####################
+class HackyCommitOperationAdd(CommitOperationAdd):
+    def __post_init__(self) -> None:
+        if isinstance(self.path_or_fileobj, Path):
+            self.path_or_fileobj = str(self.path_or_fileobj)
+def _build_hacky_operation(item: JOB_ITEM_T) -> HackyCommitOperationAdd:
+    paths, metadata = item
+    operation = HackyCommitOperationAdd(path_in_repo=paths.path_in_repo, path_or_fileobj=paths.file_path)
+    with paths.file_path.open("rb") as file:
+        sample = file.peek(512)[:512]
+    if metadata.sha256 is None:
+        raise ValueError("sha256 must have been computed by now!")
+    operation.upload_info = UploadInfo(sha256=bytes.fromhex(metadata.sha256), size=metadata.size, sample=sample)
+    return operation
+####################
+# Misc helpers
+####################
+def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]:
+    return [queue.get()]
+def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> List[JOB_ITEM_T]:
+    return [queue.get() for _ in range(min(queue.qsize(), n))]
+def _get_items_to_commit(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]:
+    """Special case for commit job: the number of items to commit depends on the type of files."""
+    # Can take at most 50 regular files and/or 100 LFS files in a single commit
+    items: List[JOB_ITEM_T] = []
+    nb_lfs, nb_regular = 0, 0
+    while True:
+        # If empty queue => commit everything
+        if queue.qsize() == 0:
+            return items
+        # If we have enough items => commit them
+        if nb_lfs >= MAX_NB_LFS_FILES_PER_COMMIT or nb_regular >= MAX_NB_REGULAR_FILES_PER_COMMIT:
+            return items
+        # Else, get a new item and increase counter
+        item = queue.get()
+        items.append(item)
+        _, metadata = item
+        if metadata.upload_mode == "lfs":
+            nb_lfs += 1
+        else:
+            nb_regular += 1
+def _print_overwrite(report: str) -> None:
+    """Print a report, overwriting the previous lines.
+    Since tqdm in using `sys.stderr` to (re-)write progress bars, we need to use `sys.stdout`
+    to print the report.
+    Note: works well only if no other process is writing to `sys.stdout`!
+    """
+    report += "\n"
+    # Get terminal width
+    terminal_width = shutil.get_terminal_size().columns
+    # Count number of lines that should be cleared
+    nb_lines = sum(len(line) // terminal_width + 1 for line in report.splitlines())
+    # Clear previous lines based on the number of lines in the report
+    for _ in range(nb_lines):
+        sys.stdout.write("\r\033[K")  # Clear line
+        sys.stdout.write("\033[F")  # Move cursor up one line
+    # Print the new report, filling remaining space with whitespace
+    sys.stdout.write(report)
+    sys.stdout.write(" " * (terminal_width - len(report.splitlines()[-1])))
+    sys.stdout.flush()

huggingface_hub/_webhooks_payload.py ADDED Viewed

	@@ -0,0 +1,137 @@

+# coding=utf-8
+# Copyright 2023-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains data structures to parse the webhooks payload."""
+from typing import List, Literal, Optional
+from .utils import is_pydantic_available
+if is_pydantic_available():
+    from pydantic import BaseModel
+else:
+    # Define a dummy BaseModel to avoid import errors when pydantic is not installed
+    # Import error will be raised when trying to use the class
+    class BaseModel:  # type: ignore [no-redef]
+        def __init__(self, *args, **kwargs) -> None:
+            raise ImportError(
+                "You must have `pydantic` installed to use `WebhookPayload`. This is an optional dependency that"
+                " should be installed separately. Please run `pip install --upgrade pydantic` and retry."
+            )
+# This is an adaptation of the ReportV3 interface implemented in moon-landing. V0, V1 and V2 have been ignored as they
+# are not in used anymore. To keep in sync when format is updated in
+# https://github.com/huggingface/moon-landing/blob/main/server/lib/HFWebhooks.ts (internal link).
+WebhookEvent_T = Literal[
+    "create",
+    "delete",
+    "move",
+    "update",
+]
+RepoChangeEvent_T = Literal[
+    "add",
+    "move",
+    "remove",
+    "update",
+]
+RepoType_T = Literal[
+    "dataset",
+    "model",
+    "space",
+]
+DiscussionStatus_T = Literal[
+    "closed",
+    "draft",
+    "open",
+    "merged",
+]
+SupportedWebhookVersion = Literal[3]
+class ObjectId(BaseModel):
+    id: str
+class WebhookPayloadUrl(BaseModel):
+    web: str
+    api: Optional[str] = None
+class WebhookPayloadMovedTo(BaseModel):
+    name: str
+    owner: ObjectId
+class WebhookPayloadWebhook(ObjectId):
+    version: SupportedWebhookVersion
+class WebhookPayloadEvent(BaseModel):
+    action: WebhookEvent_T
+    scope: str
+class WebhookPayloadDiscussionChanges(BaseModel):
+    base: str
+    mergeCommitId: Optional[str] = None
+class WebhookPayloadComment(ObjectId):
+    author: ObjectId
+    hidden: bool
+    content: Optional[str] = None
+    url: WebhookPayloadUrl
+class WebhookPayloadDiscussion(ObjectId):
+    num: int
+    author: ObjectId
+    url: WebhookPayloadUrl
+    title: str
+    isPullRequest: bool
+    status: DiscussionStatus_T
+    changes: Optional[WebhookPayloadDiscussionChanges] = None
+    pinned: Optional[bool] = None
+class WebhookPayloadRepo(ObjectId):
+    owner: ObjectId
+    head_sha: Optional[str] = None
+    name: str
+    private: bool
+    subdomain: Optional[str] = None
+    tags: Optional[List[str]] = None
+    type: Literal["dataset", "model", "space"]
+    url: WebhookPayloadUrl
+class WebhookPayloadUpdatedRef(BaseModel):
+    ref: str
+    oldSha: Optional[str] = None
+    newSha: Optional[str] = None
+class WebhookPayload(BaseModel):
+    event: WebhookPayloadEvent
+    repo: WebhookPayloadRepo
+    discussion: Optional[WebhookPayloadDiscussion] = None
+    comment: Optional[WebhookPayloadComment] = None
+    webhook: WebhookPayloadWebhook
+    movedTo: Optional[WebhookPayloadMovedTo] = None
+    updatedRefs: Optional[List[WebhookPayloadUpdatedRef]] = None

huggingface_hub/_webhooks_server.py ADDED Viewed

	@@ -0,0 +1,386 @@

+# coding=utf-8
+# Copyright 2023-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains `WebhooksServer` and `webhook_endpoint` to create a webhook server easily."""
+import atexit
+import inspect
+import os
+from functools import wraps
+from typing import TYPE_CHECKING, Any, Callable, Dict, Optional
+from .utils import experimental, is_fastapi_available, is_gradio_available
+if TYPE_CHECKING:
+    import gradio as gr
+    from fastapi import Request
+if is_fastapi_available():
+    from fastapi import FastAPI, Request
+    from fastapi.responses import JSONResponse
+else:
+    # Will fail at runtime if FastAPI is not available
+    FastAPI = Request = JSONResponse = None  # type: ignore [misc, assignment]
+_global_app: Optional["WebhooksServer"] = None
+_is_local = os.environ.get("SPACE_ID") is None
+@experimental
+class WebhooksServer:
+    """
+    The [`WebhooksServer`] class lets you create an instance of a Gradio app that can receive Huggingface webhooks.
+    These webhooks can be registered using the [`~WebhooksServer.add_webhook`] decorator. Webhook endpoints are added to
+    the app as a POST endpoint to the FastAPI router. Once all the webhooks are registered, the `launch` method has to be
+    called to start the app.
+    It is recommended to accept [`WebhookPayload`] as the first argument of the webhook function. It is a Pydantic
+    model that contains all the information about the webhook event. The data will be parsed automatically for you.
+    Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
+    WebhooksServer and deploy it on a Space.
+    <Tip warning={true}>
+    `WebhooksServer` is experimental. Its API is subject to change in the future.
+    </Tip>
+    <Tip warning={true}>
+    You must have `gradio` installed to use `WebhooksServer` (`pip install --upgrade gradio`).
+    </Tip>
+    Args:
+        ui (`gradio.Blocks`, optional):
+            A Gradio UI instance to be used as the Space landing page. If `None`, a UI displaying instructions
+            about the configured webhooks is created.
+        webhook_secret (`str`, optional):
+            A secret key to verify incoming webhook requests. You can set this value to any secret you want as long as
+            you also configure it in your [webhooks settings panel](https://huggingface.co/settings/webhooks). You
+            can also set this value as the `WEBHOOK_SECRET` environment variable. If no secret is provided, the
+            webhook endpoints are opened without any security.
+    Example:
+        ```python
+        import gradio as gr
+        from huggingface_hub import WebhooksServer, WebhookPayload
+        with gr.Blocks() as ui:
+            ...
+        app = WebhooksServer(ui=ui, webhook_secret="my_secret_key")
+        @app.add_webhook("/say_hello")
+        async def hello(payload: WebhookPayload):
+            return {"message": "hello"}
+        app.launch()
+        ```
+    """
+    def __new__(cls, *args, **kwargs) -> "WebhooksServer":
+        if not is_gradio_available():
+            raise ImportError(
+                "You must have `gradio` installed to use `WebhooksServer`. Please run `pip install --upgrade gradio`"
+                " first."
+            )
+        if not is_fastapi_available():
+            raise ImportError(
+                "You must have `fastapi` installed to use `WebhooksServer`. Please run `pip install --upgrade fastapi`"
+                " first."
+            )
+        return super().__new__(cls)
+    def __init__(
+        self,
+        ui: Optional["gr.Blocks"] = None,
+        webhook_secret: Optional[str] = None,
+    ) -> None:
+        self._ui = ui
+        self.webhook_secret = webhook_secret or os.getenv("WEBHOOK_SECRET")
+        self.registered_webhooks: Dict[str, Callable] = {}
+        _warn_on_empty_secret(self.webhook_secret)
+    def add_webhook(self, path: Optional[str] = None) -> Callable:
+        """
+        Decorator to add a webhook to the [`WebhooksServer`] server.
+        Args:
+            path (`str`, optional):
+                The URL path to register the webhook function. If not provided, the function name will be used as the
+                path. In any case, all webhooks are registered under `/webhooks`.
+        Raises:
+            ValueError: If the provided path is already registered as a webhook.
+        Example:
+            ```python
+            from huggingface_hub import WebhooksServer, WebhookPayload
+            app = WebhooksServer()
+            @app.add_webhook
+            async def trigger_training(payload: WebhookPayload):
+                if payload.repo.type == "dataset" and payload.event.action == "update":
+                    # Trigger a training job if a dataset is updated
+                    ...
+            app.launch()
+        ```
+        """
+        # Usage: directly as decorator. Example: `@app.add_webhook`
+        if callable(path):
+            # If path is a function, it means it was used as a decorator without arguments
+            return self.add_webhook()(path)
+        # Usage: provide a path. Example: `@app.add_webhook(...)`
+        @wraps(FastAPI.post)
+        def _inner_post(*args, **kwargs):
+            func = args[0]
+            abs_path = f"/webhooks/{(path or func.__name__).strip('/')}"
+            if abs_path in self.registered_webhooks:
+                raise ValueError(f"Webhook {abs_path} already exists.")
+            self.registered_webhooks[abs_path] = func
+        return _inner_post
+    def launch(self, prevent_thread_lock: bool = False, **launch_kwargs: Any) -> None:
+        """Launch the Gradio app and register webhooks to the underlying FastAPI server.
+        Input parameters are forwarded to Gradio when launching the app.
+        """
+        ui = self._ui or self._get_default_ui()
+        # Start Gradio App
+        #   - as non-blocking so that webhooks can be added afterwards
+        #   - as shared if launch locally (to debug webhooks)
+        launch_kwargs.setdefault("share", _is_local)
+        self.fastapi_app, _, _ = ui.launch(prevent_thread_lock=True, **launch_kwargs)
+        # Register webhooks to FastAPI app
+        for path, func in self.registered_webhooks.items():
+            # Add secret check if required
+            if self.webhook_secret is not None:
+                func = _wrap_webhook_to_check_secret(func, webhook_secret=self.webhook_secret)
+            # Add route to FastAPI app
+            self.fastapi_app.post(path)(func)
+        # Print instructions and block main thread
+        space_host = os.environ.get("SPACE_HOST")
+        url = "https://" + space_host if space_host is not None else (ui.share_url or ui.local_url)
+        url = url.strip("/")
+        message = "\nWebhooks are correctly setup and ready to use:"
+        message += "\n" + "\n".join(f"  - POST {url}{webhook}" for webhook in self.registered_webhooks)
+        message += "\nGo to https://huggingface.co/settings/webhooks to setup your webhooks."
+        print(message)
+        if not prevent_thread_lock:
+            ui.block_thread()
+    def _get_default_ui(self) -> "gr.Blocks":
+        """Default UI if not provided (lists webhooks and provides basic instructions)."""
+        import gradio as gr
+        with gr.Blocks() as ui:
+            gr.Markdown("# This is an app to process 🤗 Webhooks")
+            gr.Markdown(
+                "Webhooks are a foundation for MLOps-related features. They allow you to listen for new changes on"
+                " specific repos or to all repos belonging to particular set of users/organizations (not just your"
+                " repos, but any repo). Check out this [guide](https://huggingface.co/docs/hub/webhooks) to get to"
+                " know more about webhooks on the Huggingface Hub."
+            )
+            gr.Markdown(
+                f"{len(self.registered_webhooks)} webhook(s) are registered:"
+                + "\n\n"
+                + "\n ".join(
+                    f"- [{webhook_path}]({_get_webhook_doc_url(webhook.__name__, webhook_path)})"
+                    for webhook_path, webhook in self.registered_webhooks.items()
+                )
+            )
+            gr.Markdown(
+                "Go to https://huggingface.co/settings/webhooks to setup your webhooks."
+                + "\nYou app is running locally. Please look at the logs to check the full URL you need to set."
+                if _is_local
+                else (
+                    "\nThis app is running on a Space. You can find the corresponding URL in the options menu"
+                    " (top-right) > 'Embed the Space'. The URL looks like 'https://{username}-{repo_name}.hf.space'."
+                )
+            )
+        return ui
+@experimental
+def webhook_endpoint(path: Optional[str] = None) -> Callable:
+    """Decorator to start a [`WebhooksServer`] and register the decorated function as a webhook endpoint.
+    This is a helper to get started quickly. If you need more flexibility (custom landing page or webhook secret),
+    you can use [`WebhooksServer`] directly. You can register multiple webhook endpoints (to the same server) by using
+    this decorator multiple times.
+    Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
+    server and deploy it on a Space.
+    <Tip warning={true}>
+    `webhook_endpoint` is experimental. Its API is subject to change in the future.
+    </Tip>
+    <Tip warning={true}>
+    You must have `gradio` installed to use `webhook_endpoint` (`pip install --upgrade gradio`).
+    </Tip>
+    Args:
+        path (`str`, optional):
+            The URL path to register the webhook function. If not provided, the function name will be used as the path.
+            In any case, all webhooks are registered under `/webhooks`.
+    Examples:
+        The default usage is to register a function as a webhook endpoint. The function name will be used as the path.
+        The server will be started automatically at exit (i.e. at the end of the script).
+        ```python
+        from huggingface_hub import webhook_endpoint, WebhookPayload
+        @webhook_endpoint
+        async def trigger_training(payload: WebhookPayload):
+            if payload.repo.type == "dataset" and payload.event.action == "update":
+                # Trigger a training job if a dataset is updated
+                ...
+        # Server is automatically started at the end of the script.
+        ```
+        Advanced usage: register a function as a webhook endpoint and start the server manually. This is useful if you
+        are running it in a notebook.
+        ```python
+        from huggingface_hub import webhook_endpoint, WebhookPayload
+        @webhook_endpoint
+        async def trigger_training(payload: WebhookPayload):
+            if payload.repo.type == "dataset" and payload.event.action == "update":
+                # Trigger a training job if a dataset is updated
+                ...
+        # Start the server manually
+        trigger_training.launch()
+        ```
+    """
+    if callable(path):
+        # If path is a function, it means it was used as a decorator without arguments
+        return webhook_endpoint()(path)
+    @wraps(WebhooksServer.add_webhook)
+    def _inner(func: Callable) -> Callable:
+        app = _get_global_app()
+        app.add_webhook(path)(func)
+        if len(app.registered_webhooks) == 1:
+            # Register `app.launch` to run at exit (only once)
+            atexit.register(app.launch)
+        @wraps(app.launch)
+        def _launch_now():
+            # Run the app directly (without waiting atexit)
+            atexit.unregister(app.launch)
+            app.launch()
+        func.launch = _launch_now  # type: ignore
+        return func
+    return _inner
+def _get_global_app() -> WebhooksServer:
+    global _global_app
+    if _global_app is None:
+        _global_app = WebhooksServer()
+    return _global_app
+def _warn_on_empty_secret(webhook_secret: Optional[str]) -> None:
+    if webhook_secret is None:
+        print("Webhook secret is not defined. This means your webhook endpoints will be open to everyone.")
+        print(
+            "To add a secret, set `WEBHOOK_SECRET` as environment variable or pass it at initialization: "
+            "\n\t`app = WebhooksServer(webhook_secret='my_secret', ...)`"
+        )
+        print(
+            "For more details about webhook secrets, please refer to"
+            " https://huggingface.co/docs/hub/webhooks#webhook-secret."
+        )
+    else:
+        print("Webhook secret is correctly defined.")
+def _get_webhook_doc_url(webhook_name: str, webhook_path: str) -> str:
+    """Returns the anchor to a given webhook in the docs (experimental)"""
+    return "/docs#/default/" + webhook_name + webhook_path.replace("/", "_") + "_post"
+def _wrap_webhook_to_check_secret(func: Callable, webhook_secret: str) -> Callable:
+    """Wraps a webhook function to check the webhook secret before calling the function.
+    This is a hacky way to add the `request` parameter to the function signature. Since FastAPI based itself on route
+    parameters to inject the values to the function, we need to hack the function signature to retrieve the `Request`
+    object (and hence the headers). A far cleaner solution would be to use a middleware. However, since
+    `fastapi==0.90.1`, a middleware cannot be added once the app has started. And since the FastAPI app is started by
+    Gradio internals (and not by us), we cannot add a middleware.
+    This method is called only when a secret has been defined by the user. If a request is sent without the
+    "x-webhook-secret", the function will return a 401 error (unauthorized). If the header is sent but is incorrect,
+    the function will return a 403 error (forbidden).
+    Inspired by https://stackoverflow.com/a/33112180.
+    """
+    initial_sig = inspect.signature(func)
+    @wraps(func)
+    async def _protected_func(request: Request, **kwargs):
+        request_secret = request.headers.get("x-webhook-secret")
+        if request_secret is None:
+            return JSONResponse({"error": "x-webhook-secret header not set."}, status_code=401)
+        if request_secret != webhook_secret:
+            return JSONResponse({"error": "Invalid webhook secret."}, status_code=403)
+        # Inject `request` in kwargs if required
+        if "request" in initial_sig.parameters:
+            kwargs["request"] = request
+        # Handle both sync and async routes
+        if inspect.iscoroutinefunction(func):
+            return await func(**kwargs)
+        else:
+            return func(**kwargs)
+    # Update signature to include request
+    if "request" not in initial_sig.parameters:
+        _protected_func.__signature__ = initial_sig.replace(  # type: ignore
+            parameters=(
+                inspect.Parameter(name="request", kind=inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=Request),
+            )
+            + tuple(initial_sig.parameters.values())
+        )
+    # Return protected route
+    return _protected_func

huggingface_hub/commands/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from abc import ABC, abstractmethod
+from argparse import _SubParsersAction
+class BaseHuggingfaceCLICommand(ABC):
+    @staticmethod
+    @abstractmethod
+    def register_subcommand(parser: _SubParsersAction):
+        raise NotImplementedError()
+    @abstractmethod
+    def run(self):
+        raise NotImplementedError()

huggingface_hub/commands/_cli_utils.py ADDED Viewed

	@@ -0,0 +1,69 @@

+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains a utility for good-looking prints."""
+import os
+from typing import List, Union
+class ANSI:
+    """
+    Helper for en.wikipedia.org/wiki/ANSI_escape_code
+    """
+    _bold = "\u001b[1m"
+    _gray = "\u001b[90m"
+    _red = "\u001b[31m"
+    _reset = "\u001b[0m"
+    _yellow = "\u001b[33m"
+    @classmethod
+    def bold(cls, s: str) -> str:
+        return cls._format(s, cls._bold)
+    @classmethod
+    def gray(cls, s: str) -> str:
+        return cls._format(s, cls._gray)
+    @classmethod
+    def red(cls, s: str) -> str:
+        return cls._format(s, cls._bold + cls._red)
+    @classmethod
+    def yellow(cls, s: str) -> str:
+        return cls._format(s, cls._yellow)
+    @classmethod
+    def _format(cls, s: str, code: str) -> str:
+        if os.environ.get("NO_COLOR"):
+            # See https://no-color.org/
+            return s
+        return f"{code}{s}{cls._reset}"
+def tabulate(rows: List[List[Union[str, int]]], headers: List[str]) -> str:
+    """
+    Inspired by:
+    - stackoverflow.com/a/8356620/593036
+    - stackoverflow.com/questions/9535954/printing-lists-as-tabular-data
+    """
+    col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)]
+    row_format = ("{{:{}}} " * len(headers)).format(*col_widths)
+    lines = []
+    lines.append(row_format.format(*headers))
+    lines.append(row_format.format(*["-" * w for w in col_widths]))
+    for row in rows:
+        lines.append(row_format.format(*row))
+    return "\n".join(lines)

huggingface_hub/commands/delete_cache.py ADDED Viewed

	@@ -0,0 +1,428 @@

+# coding=utf-8
+# Copyright 2022-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains command to delete some revisions from the HF cache directory.
+Usage:
+    huggingface-cli delete-cache
+    huggingface-cli delete-cache --disable-tui
+    huggingface-cli delete-cache --dir ~/.cache/huggingface/hub
+NOTE:
+    This command is based on `InquirerPy` to build the multiselect menu in the terminal.
+    This dependency has to be installed with `pip install huggingface_hub[cli]`. Since
+    we want to avoid as much as possible cross-platform issues, I chose a library that
+    is built on top of `python-prompt-toolkit` which seems to be a reference in terminal
+    GUI (actively maintained on both Unix and Windows, 7.9k stars).
+    For the moment, the TUI feature is in beta.
+    See:
+    - https://github.com/kazhala/InquirerPy
+    - https://inquirerpy.readthedocs.io/en/latest/
+    - https://github.com/prompt-toolkit/python-prompt-toolkit
+    Other solutions could have been:
+    - `simple_term_menu`: would be good as well for our use case but some issues suggest
+      that Windows is less supported.
+      See: https://github.com/IngoMeyer441/simple-term-menu
+    - `PyInquirer`: very similar to `InquirerPy` but older and not maintained anymore.
+      In particular, no support of Python3.10.
+      See: https://github.com/CITGuru/PyInquirer
+    - `pick` (or `pickpack`): easy to use and flexible but built on top of Python's
+      standard library `curses` that is specific to Unix (not implemented on Windows).
+      See https://github.com/wong2/pick and https://github.com/anafvana/pickpack.
+    - `inquirer`: lot of traction (700 stars) but explicitly states "experimental
+      support of Windows". Not built on top of `python-prompt-toolkit`.
+      See https://github.com/magmax/python-inquirer
+TODO: add support for `huggingface-cli delete-cache aaaaaa bbbbbb cccccc (...)` ?
+TODO: add "--keep-last" arg to delete revisions that are not on `main` ref
+TODO: add "--filter" arg to filter repositories by name ?
+TODO: add "--sort" arg to sort by size ?
+TODO: add "--limit" arg to limit to X repos ?
+TODO: add "-y" arg for immediate deletion ?
+See discussions in https://github.com/huggingface/huggingface_hub/issues/1025.
+"""
+import os
+from argparse import Namespace, _SubParsersAction
+from functools import wraps
+from tempfile import mkstemp
+from typing import Any, Callable, Iterable, List, Optional, Union
+from ..utils import CachedRepoInfo, CachedRevisionInfo, HFCacheInfo, scan_cache_dir
+from . import BaseHuggingfaceCLICommand
+from ._cli_utils import ANSI
+try:
+    from InquirerPy import inquirer
+    from InquirerPy.base.control import Choice
+    from InquirerPy.separator import Separator
+    _inquirer_py_available = True
+except ImportError:
+    _inquirer_py_available = False
+def require_inquirer_py(fn: Callable) -> Callable:
+    """Decorator to flag methods that require `InquirerPy`."""
+    # TODO: refactor this + imports in a unified pattern across codebase
+    @wraps(fn)
+    def _inner(*args, **kwargs):
+        if not _inquirer_py_available:
+            raise ImportError(
+                "The `delete-cache` command requires extra dependencies to work with"
+                " the TUI.\nPlease run `pip install huggingface_hub[cli]` to install"
+                " them.\nOtherwise, disable TUI using the `--disable-tui` flag."
+            )
+        return fn(*args, **kwargs)
+    return _inner
+# Possibility for the user to cancel deletion
+_CANCEL_DELETION_STR = "CANCEL_DELETION"
+class DeleteCacheCommand(BaseHuggingfaceCLICommand):
+    @staticmethod
+    def register_subcommand(parser: _SubParsersAction):
+        delete_cache_parser = parser.add_parser("delete-cache", help="Delete revisions from the cache directory.")
+        delete_cache_parser.add_argument(
+            "--dir",
+            type=str,
+            default=None,
+            help="cache directory (optional). Default to the default HuggingFace cache.",
+        )
+        delete_cache_parser.add_argument(
+            "--disable-tui",
+            action="store_true",
+            help=(
+                "Disable Terminal User Interface (TUI) mode. Useful if your"
+                " platform/terminal doesn't support the multiselect menu."
+            ),
+        )
+        delete_cache_parser.set_defaults(func=DeleteCacheCommand)
+    def __init__(self, args: Namespace) -> None:
+        self.cache_dir: Optional[str] = args.dir
+        self.disable_tui: bool = args.disable_tui
+    def run(self):
+        """Run `delete-cache` command with or without TUI."""
+        # Scan cache directory
+        hf_cache_info = scan_cache_dir(self.cache_dir)
+        # Manual review from the user
+        if self.disable_tui:
+            selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[])
+        else:
+            selected_hashes = _manual_review_tui(hf_cache_info, preselected=[])
+        # If deletion is not cancelled
+        if len(selected_hashes) > 0 and _CANCEL_DELETION_STR not in selected_hashes:
+            confirm_message = _get_expectations_str(hf_cache_info, selected_hashes) + " Confirm deletion ?"
+            # Confirm deletion
+            if self.disable_tui:
+                confirmed = _ask_for_confirmation_no_tui(confirm_message)
+            else:
+                confirmed = _ask_for_confirmation_tui(confirm_message)
+            # Deletion is confirmed
+            if confirmed:
+                strategy = hf_cache_info.delete_revisions(*selected_hashes)
+                print("Start deletion.")
+                strategy.execute()
+                print(
+                    f"Done. Deleted {len(strategy.repos)} repo(s) and"
+                    f" {len(strategy.snapshots)} revision(s) for a total of"
+                    f" {strategy.expected_freed_size_str}."
+                )
+                return
+        # Deletion is cancelled
+        print("Deletion is cancelled. Do nothing.")
+@require_inquirer_py
+def _manual_review_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) -> List[str]:
+    """Ask the user for a manual review of the revisions to delete.
+    Displays a multi-select menu in the terminal (TUI).
+    """
+    # Define multiselect list
+    choices = _get_tui_choices_from_scan(repos=hf_cache_info.repos, preselected=preselected)
+    checkbox = inquirer.checkbox(
+        message="Select revisions to delete:",
+        choices=choices,  # List of revisions with some pre-selection
+        cycle=False,  # No loop between top and bottom
+        height=100,  # Large list if possible
+        # We use the instruction to display to the user the expected effect of the
+        # deletion.
+        instruction=_get_expectations_str(
+            hf_cache_info,
+            selected_hashes=[c.value for c in choices if isinstance(c, Choice) and c.enabled],
+        ),
+        # We use the long instruction to should keybindings instructions to the user
+        long_instruction="Press <space> to select, <enter> to validate and <ctrl+c> to quit without modification.",
+        # Message that is displayed once the user validates its selection.
+        transformer=lambda result: f"{len(result)} revision(s) selected.",
+    )
+    # Add a callback to update the information line when a revision is
+    # selected/unselected
+    def _update_expectations(_) -> None:
+        # Hacky way to dynamically set an instruction message to the checkbox when
+        # a revision hash is selected/unselected.
+        checkbox._instruction = _get_expectations_str(
+            hf_cache_info,
+            selected_hashes=[choice["value"] for choice in checkbox.content_control.choices if choice["enabled"]],
+        )
+    checkbox.kb_func_lookup["toggle"].append({"func": _update_expectations})
+    # Finally display the form to the user.
+    try:
+        return checkbox.execute()
+    except KeyboardInterrupt:
+        return []  # Quit without deletion
+@require_inquirer_py
+def _ask_for_confirmation_tui(message: str, default: bool = True) -> bool:
+    """Ask for confirmation using Inquirer."""
+    return inquirer.confirm(message, default=default).execute()
+def _get_tui_choices_from_scan(repos: Iterable[CachedRepoInfo], preselected: List[str]) -> List:
+    """Build a list of choices from the scanned repos.
+    Args:
+        repos (*Iterable[`CachedRepoInfo`]*):
+            List of scanned repos on which we want to delete revisions.
+        preselected (*List[`str`]*):
+            List of revision hashes that will be preselected.
+    Return:
+        The list of choices to pass to `inquirer.checkbox`.
+    """
+    choices: List[Union[Choice, Separator]] = []
+    # First choice is to cancel the deletion. If selected, nothing will be deleted,
+    # no matter the other selected items.
+    choices.append(
+        Choice(
+            _CANCEL_DELETION_STR,
+            name="None of the following (if selected, nothing will be deleted).",
+            enabled=False,
+        )
+    )
+    # Display a separator per repo and a Choice for each revisions of the repo
+    for repo in sorted(repos, key=_repo_sorting_order):
+        # Repo as separator
+        choices.append(
+            Separator(
+                f"\n{repo.repo_type.capitalize()} {repo.repo_id} ({repo.size_on_disk_str},"
+                f" used {repo.last_accessed_str})"
+            )
+        )
+        for revision in sorted(repo.revisions, key=_revision_sorting_order):
+            # Revision as choice
+            choices.append(
+                Choice(
+                    revision.commit_hash,
+                    name=(
+                        f"{revision.commit_hash[:8]}:"
+                        f" {', '.join(sorted(revision.refs)) or '(detached)'} #"
+                        f" modified {revision.last_modified_str}"
+                    ),
+                    enabled=revision.commit_hash in preselected,
+                )
+            )
+    # Return choices
+    return choices
+def _manual_review_no_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) -> List[str]:
+    """Ask the user for a manual review of the revisions to delete.
+    Used when TUI is disabled. Manual review happens in a separate tmp file that the
+    user can manually edit.
+    """
+    # 1. Generate temporary file with delete commands.
+    fd, tmp_path = mkstemp(suffix=".txt")  # suffix to make it easier to find by editors
+    os.close(fd)
+    lines = []
+    for repo in sorted(hf_cache_info.repos, key=_repo_sorting_order):
+        lines.append(
+            f"\n# {repo.repo_type.capitalize()} {repo.repo_id} ({repo.size_on_disk_str},"
+            f" used {repo.last_accessed_str})"
+        )
+        for revision in sorted(repo.revisions, key=_revision_sorting_order):
+            lines.append(
+                # Deselect by prepending a '#'
+                f"{'' if revision.commit_hash in preselected else '#'}   "
+                f" {revision.commit_hash} # Refs:"
+                # Print `refs` as comment on same line
+                f" {', '.join(sorted(revision.refs)) or '(detached)'} # modified"
+                # Print `last_modified` as comment on same line
+                f" {revision.last_modified_str}"
+            )
+    with open(tmp_path, "w") as f:
+        f.write(_MANUAL_REVIEW_NO_TUI_INSTRUCTIONS)
+        f.write("\n".join(lines))
+    # 2. Prompt instructions to user.
+    instructions = f"""
+    TUI is disabled. In order to select which revisions you want to delete, please edit
+    the following file using the text editor of your choice. Instructions for manual
+    editing are located at the beginning of the file. Edit the file, save it and confirm
+    to continue.
+    File to edit: {ANSI.bold(tmp_path)}
+    """
+    print("\n".join(line.strip() for line in instructions.strip().split("\n")))
+    # 3. Wait for user confirmation.
+    while True:
+        selected_hashes = _read_manual_review_tmp_file(tmp_path)
+        if _ask_for_confirmation_no_tui(
+            _get_expectations_str(hf_cache_info, selected_hashes) + " Continue ?",
+            default=False,
+        ):
+            break
+    # 4. Return selected_hashes
+    os.remove(tmp_path)
+    return selected_hashes
+def _ask_for_confirmation_no_tui(message: str, default: bool = True) -> bool:
+    """Ask for confirmation using pure-python."""
+    YES = ("y", "yes", "1")
+    NO = ("n", "no", "0")
+    DEFAULT = ""
+    ALL = YES + NO + (DEFAULT,)
+    full_message = message + (" (Y/n) " if default else " (y/N) ")
+    while True:
+        answer = input(full_message).lower()
+        if answer == DEFAULT:
+            return default
+        if answer in YES:
+            return True
+        if answer in NO:
+            return False
+        print(f"Invalid input. Must be one of {ALL}")
+def _get_expectations_str(hf_cache_info: HFCacheInfo, selected_hashes: List[str]) -> str:
+    """Format a string to display to the user how much space would be saved.
+    Example:
+    ```
+    >>> _get_expectations_str(hf_cache_info, selected_hashes)
+    '7 revisions selected counting for 4.3G.'
+    ```
+    """
+    if _CANCEL_DELETION_STR in selected_hashes:
+        return "Nothing will be deleted."
+    strategy = hf_cache_info.delete_revisions(*selected_hashes)
+    return f"{len(selected_hashes)} revisions selected counting for {strategy.expected_freed_size_str}."
+def _read_manual_review_tmp_file(tmp_path: str) -> List[str]:
+    """Read the manually reviewed instruction file and return a list of revision hash.
+    Example:
+        ```txt
+        # This is the tmp file content
+        ###
+        # Commented out line
+        123456789 # revision hash
+        # Something else
+        #      a_newer_hash # 2 days ago
+            an_older_hash # 3 days ago
+        ```
+        ```py
+        >>> _read_manual_review_tmp_file(tmp_path)
+        ['123456789', 'an_older_hash']
+        ```
+    """
+    with open(tmp_path) as f:
+        content = f.read()
+    # Split lines
+    lines = [line.strip() for line in content.split("\n")]
+    # Filter commented lines
+    selected_lines = [line for line in lines if not line.startswith("#")]
+    # Select only before comment
+    selected_hashes = [line.split("#")[0].strip() for line in selected_lines]
+    # Return revision hashes
+    return [hash for hash in selected_hashes if len(hash) > 0]
+_MANUAL_REVIEW_NO_TUI_INSTRUCTIONS = f"""
+# INSTRUCTIONS
+# ------------
+# This is a temporary file created by running `huggingface-cli delete-cache` with the
+# `--disable-tui` option. It contains a set of revisions that can be deleted from your
+# local cache directory.
+#
+# Please manually review the revisions you want to delete:
+#   - Revision hashes can be commented out with '#'.
+#   - Only non-commented revisions in this file will be deleted.
+#   - Revision hashes that are removed from this file are ignored as well.
+#   - If `{_CANCEL_DELETION_STR}` line is uncommented, the all cache deletion is cancelled and
+#     no changes will be applied.
+#
+# Once you've manually reviewed this file, please confirm deletion in the terminal. This
+# file will be automatically removed once done.
+# ------------
+# KILL SWITCH
+# ------------
+# Un-comment following line to completely cancel the deletion process
+# {_CANCEL_DELETION_STR}
+# ------------
+# REVISIONS
+# ------------
+""".strip()
+def _repo_sorting_order(repo: CachedRepoInfo) -> Any:
+    # First split by Dataset/Model, then sort by last accessed (oldest first)
+    return (repo.repo_type, repo.last_accessed)
+def _revision_sorting_order(revision: CachedRevisionInfo) -> Any:
+    # Sort by last modified (oldest first)
+    return revision.last_modified

huggingface_hub/commands/download.py ADDED Viewed

	@@ -0,0 +1,200 @@

+# coding=utf-8
+# Copyright 2023-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains command to download files from the Hub with the CLI.
+Usage:
+    huggingface-cli download --help
+    # Download file
+    huggingface-cli download gpt2 config.json
+    # Download entire repo
+    huggingface-cli download fffiloni/zeroscope --repo-type=space --revision=refs/pr/78
+    # Download repo with filters
+    huggingface-cli download gpt2 --include="*.safetensors"
+    # Download with token
+    huggingface-cli download Wauplin/private-model --token=hf_***
+    # Download quietly (no progress bar, no warnings, only the returned path)
+    huggingface-cli download gpt2 config.json --quiet
+    # Download to local dir
+    huggingface-cli download gpt2 --local-dir=./models/gpt2
+"""
+import warnings
+from argparse import Namespace, _SubParsersAction
+from typing import List, Optional
+from huggingface_hub import logging
+from huggingface_hub._snapshot_download import snapshot_download
+from huggingface_hub.commands import BaseHuggingfaceCLICommand
+from huggingface_hub.file_download import hf_hub_download
+from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
+logger = logging.get_logger(__name__)
+class DownloadCommand(BaseHuggingfaceCLICommand):
+    @staticmethod
+    def register_subcommand(parser: _SubParsersAction):
+        download_parser = parser.add_parser("download", help="Download files from the Hub")
+        download_parser.add_argument(
+            "repo_id", type=str, help="ID of the repo to download from (e.g. `username/repo-name`)."
+        )
+        download_parser.add_argument(
+            "filenames", type=str, nargs="*", help="Files to download (e.g. `config.json`, `data/metadata.jsonl`)."
+        )
+        download_parser.add_argument(
+            "--repo-type",
+            choices=["model", "dataset", "space"],
+            default="model",
+            help="Type of repo to download from (defaults to 'model').",
+        )
+        download_parser.add_argument(
+            "--revision",
+            type=str,
+            help="An optional Git revision id which can be a branch name, a tag, or a commit hash.",
+        )
+        download_parser.add_argument(
+            "--include", nargs="*", type=str, help="Glob patterns to match files to download."
+        )
+        download_parser.add_argument(
+            "--exclude", nargs="*", type=str, help="Glob patterns to exclude from files to download."
+        )
+        download_parser.add_argument(
+            "--cache-dir", type=str, help="Path to the directory where to save the downloaded files."
+        )
+        download_parser.add_argument(
+            "--local-dir",
+            type=str,
+            help=(
+                "If set, the downloaded file will be placed under this directory. Check out"
+                " https://huggingface.co/docs/huggingface_hub/guides/download#download-files-to-local-folder for more"
+                " details."
+            ),
+        )
+        download_parser.add_argument(
+            "--local-dir-use-symlinks",
+            choices=["auto", "True", "False"],
+            help=("Deprecated and ignored. Downloading to a local directory does not use symlinks anymore."),
+        )
+        download_parser.add_argument(
+            "--force-download",
+            action="store_true",
+            help="If True, the files will be downloaded even if they are already cached.",
+        )
+        download_parser.add_argument(
+            "--resume-download",
+            action="store_true",
+            help="Deprecated and ignored. Downloading a file to local dir always attempts to resume previously interrupted downloads (unless hf-transfer is enabled).",
+        )
+        download_parser.add_argument(
+            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
+        )
+        download_parser.add_argument(
+            "--quiet",
+            action="store_true",
+            help="If True, progress bars are disabled and only the path to the download files is printed.",
+        )
+        download_parser.add_argument(
+            "--max-workers",
+            type=int,
+            default=8,
+            help="Maximum number of workers to use for downloading files. Default is 8.",
+        )
+        download_parser.set_defaults(func=DownloadCommand)
+    def __init__(self, args: Namespace) -> None:
+        self.token = args.token
+        self.repo_id: str = args.repo_id
+        self.filenames: List[str] = args.filenames
+        self.repo_type: str = args.repo_type
+        self.revision: Optional[str] = args.revision
+        self.include: Optional[List[str]] = args.include
+        self.exclude: Optional[List[str]] = args.exclude
+        self.cache_dir: Optional[str] = args.cache_dir
+        self.local_dir: Optional[str] = args.local_dir
+        self.force_download: bool = args.force_download
+        self.resume_download: Optional[bool] = args.resume_download or None
+        self.quiet: bool = args.quiet
+        self.max_workers: int = args.max_workers
+        if args.local_dir_use_symlinks is not None:
+            warnings.warn(
+                "Ignoring --local-dir-use-symlinks. Downloading to a local directory does not use symlinks anymore.",
+                FutureWarning,
+            )
+    def run(self) -> None:
+        if self.quiet:
+            disable_progress_bars()
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                print(self._download())  # Print path to downloaded files
+            enable_progress_bars()
+        else:
+            logging.set_verbosity_info()
+            print(self._download())  # Print path to downloaded files
+            logging.set_verbosity_warning()
+    def _download(self) -> str:
+        # Warn user if patterns are ignored
+        if len(self.filenames) > 0:
+            if self.include is not None and len(self.include) > 0:
+                warnings.warn("Ignoring `--include` since filenames have being explicitly set.")
+            if self.exclude is not None and len(self.exclude) > 0:
+                warnings.warn("Ignoring `--exclude` since filenames have being explicitly set.")
+        # Single file to download: use `hf_hub_download`
+        if len(self.filenames) == 1:
+            return hf_hub_download(
+                repo_id=self.repo_id,
+                repo_type=self.repo_type,
+                revision=self.revision,
+                filename=self.filenames[0],
+                cache_dir=self.cache_dir,
+                resume_download=self.resume_download,
+                force_download=self.force_download,
+                token=self.token,
+                local_dir=self.local_dir,
+                library_name="huggingface-cli",
+            )
+        # Otherwise: use `snapshot_download` to ensure all files comes from same revision
+        elif len(self.filenames) == 0:
+            allow_patterns = self.include
+            ignore_patterns = self.exclude
+        else:
+            allow_patterns = self.filenames
+            ignore_patterns = None
+        return snapshot_download(
+            repo_id=self.repo_id,
+            repo_type=self.repo_type,
+            revision=self.revision,
+            allow_patterns=allow_patterns,
+            ignore_patterns=ignore_patterns,
+            resume_download=self.resume_download,
+            force_download=self.force_download,
+            cache_dir=self.cache_dir,
+            token=self.token,
+            local_dir=self.local_dir,
+            library_name="huggingface-cli",
+            max_workers=self.max_workers,
+        )

huggingface_hub/commands/env.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains command to print information about the environment.
+Usage:
+    huggingface-cli env
+"""
+from argparse import _SubParsersAction
+from ..utils import dump_environment_info
+from . import BaseHuggingfaceCLICommand
+class EnvironmentCommand(BaseHuggingfaceCLICommand):
+    def __init__(self, args):
+        self.args = args
+    @staticmethod
+    def register_subcommand(parser: _SubParsersAction):
+        env_parser = parser.add_parser("env", help="Print information about the environment.")
+        env_parser.set_defaults(func=EnvironmentCommand)
+    def run(self) -> None:
+        dump_environment_info()

huggingface_hub/commands/huggingface_cli.py ADDED Viewed

	@@ -0,0 +1,61 @@

+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from argparse import ArgumentParser
+from huggingface_hub.commands.delete_cache import DeleteCacheCommand
+from huggingface_hub.commands.download import DownloadCommand
+from huggingface_hub.commands.env import EnvironmentCommand
+from huggingface_hub.commands.lfs import LfsCommands
+from huggingface_hub.commands.repo_files import RepoFilesCommand
+from huggingface_hub.commands.scan_cache import ScanCacheCommand
+from huggingface_hub.commands.tag import TagCommands
+from huggingface_hub.commands.upload import UploadCommand
+from huggingface_hub.commands.upload_large_folder import UploadLargeFolderCommand
+from huggingface_hub.commands.user import UserCommands
+from huggingface_hub.commands.version import VersionCommand
+def main():
+    parser = ArgumentParser("huggingface-cli", usage="huggingface-cli <command> [<args>]")
+    commands_parser = parser.add_subparsers(help="huggingface-cli command helpers")
+    # Register commands
+    DownloadCommand.register_subcommand(commands_parser)
+    UploadCommand.register_subcommand(commands_parser)
+    RepoFilesCommand.register_subcommand(commands_parser)
+    EnvironmentCommand.register_subcommand(commands_parser)
+    UserCommands.register_subcommand(commands_parser)
+    LfsCommands.register_subcommand(commands_parser)
+    ScanCacheCommand.register_subcommand(commands_parser)
+    DeleteCacheCommand.register_subcommand(commands_parser)
+    TagCommands.register_subcommand(commands_parser)
+    VersionCommand.register_subcommand(commands_parser)
+    # Experimental
+    UploadLargeFolderCommand.register_subcommand(commands_parser)
+    # Let's go
+    args = parser.parse_args()
+    if not hasattr(args, "func"):
+        parser.print_help()
+        exit(1)
+    # Run
+    service = args.func(args)
+    service.run()
+if __name__ == "__main__":
+    main()