yangheng commited on
Commit
fca804e
·
1 Parent(s): c68f812
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +5 -3
  2. .gitignore +1 -0
  3. .gradio/certificate.pem +31 -0
  4. SuperResolutionAnimeDiffusion.zip → 1boy.png +2 -2
  5. 1girl.png +3 -0
  6. README.md +1 -12
  7. Waifu2x/model_check_points/CRAN_V2/CARN_adam_checkpoint.pt +3 -0
  8. Waifu2x/model_check_points/CRAN_V2/CARN_scheduler_last_iter.pt +3 -0
  9. Waifu2x/model_check_points/CRAN_V2/CRAN_V2_02_28_2019.pt +3 -0
  10. Waifu2x/model_check_points/CRAN_V2/ReadME.md +34 -27
  11. Waifu2x/model_check_points/CRAN_V2/test_loss.pt +3 -0
  12. Waifu2x/model_check_points/CRAN_V2/test_psnr.pt +3 -0
  13. Waifu2x/model_check_points/CRAN_V2/test_ssim.pt +3 -0
  14. Waifu2x/model_check_points/CRAN_V2/train_loss.pt +3 -0
  15. Waifu2x/model_check_points/CRAN_V2/train_psnr.pt +3 -0
  16. Waifu2x/model_check_points/CRAN_V2/train_ssim.pt +3 -0
  17. Waifu2x/model_check_points/DCSCN/DCSCN_model_387epos_L12_noise_1.pt +3 -0
  18. Waifu2x/model_check_points/DCSCN/DCSCN_weights_387epos_L12_noise_1.pt +3 -0
  19. Waifu2x/model_check_points/DCSCN/DCSCN_weights_45epos_L8_noise_1.pt +3 -0
  20. Waifu2x/model_check_points/DCSCN/ReadME.md +13 -0
  21. Waifu2x/model_check_points/ESPCN/ESPCN_7_weights_14epos.pk +3 -0
  22. Waifu2x/model_check_points/Upconv_7/anime.7z +3 -0
  23. Waifu2x/model_check_points/Upconv_7/photo.7z +3 -0
  24. Waifu2x/model_check_points/vgg_7/art.7z +3 -0
  25. Waifu2x/model_check_points/vgg_7/art_y.7z +3 -0
  26. Waifu2x/model_check_points/vgg_7/photo.7z +3 -0
  27. Waifu2x/model_check_points/vgg_7/ukbench.7z +3 -0
  28. app.py +641 -435
  29. gfpgan/weights/detection_Resnet50_Final.pth +3 -0
  30. gfpgan/weights/parsing_parsenet.pth +3 -0
  31. huggingface_hub/README.md +358 -0
  32. huggingface_hub/__init__.py +968 -0
  33. huggingface_hub/_commit_api.py +729 -0
  34. huggingface_hub/_commit_scheduler.py +327 -0
  35. huggingface_hub/_inference_endpoints.py +396 -0
  36. huggingface_hub/_local_folder.py +425 -0
  37. huggingface_hub/_login.py +397 -0
  38. huggingface_hub/_multi_commits.py +306 -0
  39. huggingface_hub/_snapshot_download.py +304 -0
  40. huggingface_hub/_space_api.py +160 -0
  41. huggingface_hub/_tensorboard_logger.py +195 -0
  42. huggingface_hub/_upload_large_folder.py +621 -0
  43. huggingface_hub/_webhooks_payload.py +137 -0
  44. huggingface_hub/_webhooks_server.py +386 -0
  45. huggingface_hub/commands/__init__.py +27 -0
  46. huggingface_hub/commands/_cli_utils.py +69 -0
  47. huggingface_hub/commands/delete_cache.py +428 -0
  48. huggingface_hub/commands/download.py +200 -0
  49. huggingface_hub/commands/env.py +36 -0
  50. huggingface_hub/commands/huggingface_cli.py +61 -0
.gitattributes CHANGED
@@ -29,8 +29,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.tgz filter=lfs diff=lfs merge=lfs -text
30
  *.wasm filter=lfs diff=lfs merge=lfs -text
31
  *.xz filter=lfs diff=lfs merge=lfs -text
 
32
  *.zst filter=lfs diff=lfs merge=lfs -text
33
  *tfevents* filter=lfs diff=lfs merge=lfs -text
34
- *zip filter=lfs diff=lfs merge=lfs -text
35
- SuperResolutionAnimeDiffusion.zip filter=lfs diff=lfs merge=lfs -text
36
- random_examples.zip filter=lfs diff=lfs merge=lfs -text
 
 
29
  *.tgz filter=lfs diff=lfs merge=lfs -text
30
  *.wasm filter=lfs diff=lfs merge=lfs -text
31
  *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ scenery.png filter=lfs diff=lfs merge=lfs -text
36
+ 1boy.png filter=lfs diff=lfs merge=lfs -text
37
+ 1girl.png filter=lfs diff=lfs merge=lfs -text
38
+ *.pk filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -10,6 +10,7 @@ integrated_datasets/
10
  *.state_dict
11
  *.config
12
  *.args
 
13
  *.gz
14
  *.bin
15
  *.result.txt
 
10
  *.state_dict
11
  *.config
12
  *.args
13
+ *.zip
14
  *.gz
15
  *.bin
16
  *.result.txt
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
SuperResolutionAnimeDiffusion.zip → 1boy.png RENAMED
File without changes
1girl.png ADDED

Git LFS Details

  • SHA256: c7d13eec13f7f7a98c225c9f2340461ad1bbfbc6ef7b44ecb96eb0ca73d2723d
  • Pointer size: 132 Bytes
  • Size of remote file: 2.11 MB
README.md CHANGED
@@ -1,15 +1,4 @@
1
- ---
2
- title: Anything V3.0
3
- emoji: 🏃
4
- colorFrom: gray
5
- colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 3.10.1
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- # If you have a GPU, try the [Stable Diffusion WebUI](https://github.com/yangheng95/stable-diffusion-webui)
13
 
14
 
15
  # [Online Web Demo](https://huggingface.co/spaces/yangheng/Super-Resolution-Anime-Diffusion)
 
1
+ # Super Resolution Anime Diffusion
 
 
 
 
 
 
 
 
 
 
 
2
 
3
 
4
  # [Online Web Demo](https://huggingface.co/spaces/yangheng/Super-Resolution-Anime-Diffusion)
Waifu2x/model_check_points/CRAN_V2/CARN_adam_checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:292f2be9ea173861e4a7f6cf580f04fe9a1fc6c78fdac6f182cbc051ea50791e
3
+ size 31734614
Waifu2x/model_check_points/CRAN_V2/CARN_scheduler_last_iter.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba2302e523d32bfeb9b542a9dc6aa5ecdb45babc793892153245d6c69ae23433
3
+ size 151
Waifu2x/model_check_points/CRAN_V2/CRAN_V2_02_28_2019.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b74e163d829f6f587e3fdb0b645342e494416accb1962cf0973354de5ec157ea
3
+ size 49895595
Waifu2x/model_check_points/CRAN_V2/ReadME.md CHANGED
@@ -1,34 +1,41 @@
1
- # Resume & Use Model Check Points
2
 
3
- This folder contains check points for models and their weights. They are generated from [PyTorch's pickle](https://pytorch.org/docs/master/notes/serialization.html).
4
 
5
- Model specifications are in each folder's ReadME.
6
-
7
- Pickle names with "model" contain the entire models, and they can be used as an freeze module by calling the "forward_checkpoint" function to generate images.
8
-
9
- Example:
10
  ```python
11
- import torch
12
- # No need to reconstruct the model
13
- model = torch.load("./DCSCN/DCSCN_model_387epos_L12_noise_1.pt")
14
- x = torch.randn((1,3,10,10)), torch.randn((1,3,20,20))
15
- out = model.forward_checkpoint(a)
16
- ```
 
 
 
17
 
18
- Pickle names with "weights" are model weights, and they are named dictionaries.
19
 
20
- Example:
21
- ```python
22
- model = DCSCN(*) # the setting must be the same to load check points weights.
23
- model.load_state_dict(torch.load("./DCSCN/DCSCN_weights_387epos_L12_noise_1.pt"))
24
- # then you can resume the model training
25
- ```
26
 
27
- Model check poins in Upconv_7 and vgg_7 are from [waifu2x's repo](https://github.com/nagadomi/waifu2x/tree/master/models). To load weights into a model, please use ```load_pre_train_weights``` function.
28
-
29
- Example:
30
  ```python
31
- model = UpConv_7()
32
- model.load_pre_train_weights(json_file=...)
33
- # then the model is ready to use
34
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model Specifications
2
 
 
3
 
 
 
 
 
 
4
  ```python
5
+ model_cran_v2 = CARN_V2(color_channels=3, mid_channels=64, conv=nn.Conv2d,
6
+ single_conv_size=3, single_conv_group=1,
7
+ scale=2, activation=nn.LeakyReLU(0.1),
8
+ SEBlock=True, repeat_blocks=3, atrous=(1, 1, 1))
9
+
10
+ model_cran_v2 = network_to_half(model_cran_v2)
11
+ checkpoint = "CARN_model_checkpoint.pt"
12
+ model_cran_v2.load_state_dict(torch.load(checkpoint, 'cpu'))
13
+ model_cran_v2 = model_cran_v2.float() # if use cpu
14
 
15
+ ````
16
 
17
+ To use pre-trained model for training
 
 
 
 
 
18
 
 
 
 
19
  ```python
20
+
21
+ model = CARN_V2(color_channels=3, mid_channels=64, conv=nn.Conv2d,
22
+ single_conv_size=3, single_conv_group=1,
23
+ scale=2, activation=nn.LeakyReLU(0.1),
24
+ SEBlock=True, repeat_blocks=3, atrous=(1, 1, 1))
25
+
26
+ model = network_to_half(model)
27
+ model = model.cuda()
28
+ model.load_state_dict(torch.load("CARN_model_checkpoint.pt"))
29
+
30
+ learning_rate = 1e-4
31
+ weight_decay = 1e-6
32
+ optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay, amsgrad=True)
33
+ optimizer = FP16_Optimizer(optimizer, static_loss_scale=128.0, verbose=False)
34
+ optimizer.load_state_dict(torch.load("CARN_adam_checkpoint.pt"))
35
+
36
+ last_iter = torch.load("CARN_scheduler_last_iter") # -1 if start from new
37
+ scheduler = CyclicLR(optimizer.optimizer, base_lr=1e-4, max_lr=4e-4,
38
+ step_size=3 * total_batch, mode="triangular",
39
+ last_batch_iteration=last_iter)
40
+
41
+ ```
Waifu2x/model_check_points/CRAN_V2/test_loss.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93f644a6a3f6636035980855f56ef3dbc8784679371b06b81e0e4d06067c142d
3
+ size 43507
Waifu2x/model_check_points/CRAN_V2/test_psnr.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae8f8d1a3d175e76dcbcdcf0cede898e8f2cf169f3eec14eeb75a4e19d8e2d6b
3
+ size 42563
Waifu2x/model_check_points/CRAN_V2/test_ssim.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:763ff936f536b12b37b351c09f3c1290fb2188399aea3d9ce3cf069bd0d135e7
3
+ size 43515
Waifu2x/model_check_points/CRAN_V2/train_loss.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85a86e94cd689adff04c4b22bf2534d17aa52af5e7309a82bc2a4f5c6c144900
3
+ size 15564175
Waifu2x/model_check_points/CRAN_V2/train_psnr.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d1e88646b74a054ddf20ba41368a01162e35d9c88ac72f392a6ba08a5c7ef3b
3
+ size 15564175
Waifu2x/model_check_points/CRAN_V2/train_ssim.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b8da8bc73f64997c5b2d15d6161b11dbd172258a62c88572c032feb73bd022b
3
+ size 15564175
Waifu2x/model_check_points/DCSCN/DCSCN_model_387epos_L12_noise_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aaf293584618b446868910a173de4eed2e054f33e325f9c93cabacb0937e6d5
3
+ size 7585347
Waifu2x/model_check_points/DCSCN/DCSCN_weights_387epos_L12_noise_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8faddf6e3bf6acf688642a99da23d5626a6173c1eb92d2cdd26a5d3dd6a73da4
3
+ size 7568033
Waifu2x/model_check_points/DCSCN/DCSCN_weights_45epos_L8_noise_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b8c7b3c6c4bc1b8d48186352f9d74b685210ca8a372a06bd8718c2d20e0769e
3
+ size 9746842
Waifu2x/model_check_points/DCSCN/ReadME.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model Specifications
2
+
3
+ ## 12 Layers Model
4
+
5
+ ```python
6
+ model = DCSCN(color_channel=3,
7
+ up_scale=2,
8
+ feature_layers=12,
9
+ first_feature_filters=196,
10
+ last_feature_filters=48,
11
+ reconstruction_filters=64,
12
+ up_sampler_filters=32)
13
+ ````
Waifu2x/model_check_points/ESPCN/ESPCN_7_weights_14epos.pk ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60088b9b7865535dae982af5f6ca2e361ecb6ce9ee1cc43c8ce4f6b1e1a4abe7
3
+ size 5388762
Waifu2x/model_check_points/Upconv_7/anime.7z ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b4514f546498bf8966dd74e806d2f4034573809f91ca02659710d666235266d
3
+ size 19867323
Waifu2x/model_check_points/Upconv_7/photo.7z ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7a173165da9b2b101f8964c55ce2472b3ce15a7a6f742804037e5c7a5a321ae
3
+ size 19872894
Waifu2x/model_check_points/vgg_7/art.7z ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae5e88101e4b5591e795ffa8661b36c4986bf9ce9e762a9e21d9f268a2a8effe
3
+ size 10456728
Waifu2x/model_check_points/vgg_7/art_y.7z ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f24fcbf0e0d2a9d9242e3188fe8fb3de82d77da82a5228664be4dc2a69aef7a
3
+ size 8281792
Waifu2x/model_check_points/vgg_7/photo.7z ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a96d475054665d050c370f3786097690920523c71231ef276ab2c7d011d305b1
3
+ size 10459233
Waifu2x/model_check_points/vgg_7/ukbench.7z ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05f6e10f467b10ab66a9a4d41443a7f280e67925eb50c96fc8e43287ce56e205
3
+ size 2088088
app.py CHANGED
@@ -1,20 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
- import random
3
  import zipfile
4
- import findfile
 
 
 
 
5
  import PIL.Image
6
  import autocuda
7
- from pyabsa.utils.pyabsa_utils import fprint
8
-
9
- try:
10
- for z_file in findfile.find_cwd_files(and_key=['.zip'],
11
- exclude_key=['.ignore', 'git', 'SuperResolutionAnimeDiffusion'],
12
- recursive=10):
13
- fprint(f"Extracting {z_file}...")
14
- with zipfile.ZipFile(z_file, 'r') as zip_ref:
15
- zip_ref.extractall(os.path.dirname(z_file))
16
- except Exception as e:
17
- os.system('unzip random_examples.zip')
18
 
19
  from diffusers import (
20
  AutoencoderKL,
@@ -27,59 +50,95 @@ import gradio as gr
27
  import torch
28
  from PIL import Image
29
  import utils
30
- import datetime
31
- import time
32
- import psutil
33
  from Waifu2x.magnify import ImageMagnifier
34
  from RealESRGANv030.interface import realEsrgan
35
 
36
- magnifier = ImageMagnifier()
 
 
 
 
 
 
 
 
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  start_time = time.time()
39
  is_colab = utils.is_google_colab()
40
-
41
- CUDA_VISIBLE_DEVICES = ""
42
  device = autocuda.auto_cuda()
43
-
44
  dtype = torch.float16 if device != "cpu" else torch.float32
45
 
46
-
 
 
 
 
 
 
47
 
48
  class Model:
49
- def __init__(self, name, path="", prefix=""):
 
50
  self.name = name
51
  self.path = path
52
  self.prefix = prefix
53
  self.pipe_t2i = None
54
  self.pipe_i2i = None
55
 
56
-
57
  models = [
58
- # Model("anything v3", "Linaqruf/anything-v3.0", "anything v3 style"),
59
- Model("anything v5", "stablediffusionapi/anything-v5", "anything v5 style"),
60
  ]
61
- # Model("Spider-Verse", "nitrosocke/spider-verse-diffusion", "spiderverse style "),
62
- # Model("Balloon Art", "Fictiverse/Stable_Diffusion_BalloonArt_Model", "BalloonArt "),
63
- # Model("Elden Ring", "nitrosocke/elden-ring-diffusion", "elden ring style "),
64
- # Model("Tron Legacy", "dallinmackay/Tron-Legacy-diffusion", "trnlgcy ")
65
- # Model("Pokémon", "lambdalabs/sd-pokemon-diffusers", ""),
66
- # Model("Pony Diffusion", "AstraliteHeart/pony-diffusion", ""),
67
- # Model("Robo Diffusion", "nousr/robo-diffusion", ""),
68
-
69
- scheduler = DPMSolverMultistepScheduler(
70
- beta_start=0.00085,
71
- beta_end=0.012,
72
- beta_schedule="scaled_linear",
73
- num_train_timesteps=1000,
74
- trained_betas=None,
75
- predict_epsilon=True,
76
- thresholding=False,
77
- algorithm_type="dpmsolver++",
78
- solver_type="midpoint",
79
- solver_order=2,
80
- # lower_order_final=True,
81
- )
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  custom_model = None
84
  if is_colab:
85
  models.insert(0, Model("Custom model"))
@@ -88,177 +147,198 @@ if is_colab:
88
  last_mode = "txt2img"
89
  current_model = models[1] if is_colab else models[0]
90
  current_model_path = current_model.path
 
91
 
92
- if is_colab:
93
- pipe = StableDiffusionPipeline.from_pretrained(
94
- current_model.path,
95
- torch_dtype=dtype,
96
- scheduler=scheduler,
97
- safety_checker=lambda images, clip_input: (images, False),
98
- )
99
 
100
- else: # download all models
101
- print(f"{datetime.datetime.now()} Downloading vae...")
102
- vae = AutoencoderKL.from_pretrained(
103
- current_model.path, subfolder="vae", torch_dtype=dtype
104
- )
105
- for model in models:
106
  try:
107
- print(f"{datetime.datetime.now()} Downloading {model.name} model...")
108
- unet = UNet2DConditionModel.from_pretrained(
109
- model.path, subfolder="unet", torch_dtype=dtype
110
- )
111
- model.pipe_t2i = StableDiffusionPipeline.from_pretrained(
112
- model.path,
113
- unet=unet,
114
- vae=vae,
115
- torch_dtype=dtype,
116
- scheduler=scheduler,
117
- safety_checker=None,
118
- )
119
- model.pipe_i2i = StableDiffusionImg2ImgPipeline.from_pretrained(
120
- model.path,
121
- unet=unet,
122
- vae=vae,
123
  torch_dtype=dtype,
124
  scheduler=scheduler,
125
  safety_checker=None,
126
  )
127
  except Exception as e:
128
- print(
129
- f"{datetime.datetime.now()} Failed to load model "
130
- + model.name
131
- + ": "
132
- + str(e)
 
 
133
  )
134
- models.remove(model)
135
- pipe = models[0].pipe_t2i
136
-
137
- # model.pipe_i2i = torch.compile(model.pipe_i2i)
138
- # model.pipe_t2i = torch.compile(model.pipe_t2i)
139
- if torch.cuda.is_available():
140
- pipe = pipe.to(device)
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
- # device = "GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"
144
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
- def error_str(error, title="Error"):
147
  return (
148
- f"""#### {title}
149
- {error}"""
150
- if error
151
- else ""
152
  )
153
 
154
-
155
- def custom_model_changed(path):
156
- models[0].path = path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  global current_model
158
- current_model = models[0]
159
-
 
160
 
161
- def on_model_change(model_name):
162
- prefix = (
163
- 'Enter prompt. "'
164
- + next((m.prefix for m in models if m.name == model_name), None)
165
- + '" is prefixed automatically'
166
- if model_name != models[0].name
167
- else "Don't forget to use the custom model prefix in the prompt!"
168
- )
169
 
170
- return (
171
- gr.update(visible=model_name == models[0].name),
172
- gr.update(placeholder=prefix),
173
- )
174
 
175
-
176
- def inference(
177
- model_name,
178
- prompt,
179
- guidance,
180
- steps,
181
- width=512,
182
- height=512,
183
- seed=0,
184
- img=None,
185
- strength=0.5,
186
- neg_prompt="",
187
- scale="ESRGAN4x",
188
- scale_factor=2,
189
- ):
190
- fprint(psutil.virtual_memory()) # print memory usage
191
-
192
- fprint(f"Prompt: {prompt}")
193
- global current_model
194
- for model in models:
195
- if model.name == model_name:
196
- current_model = model
197
- model_path = current_model.path
198
-
199
- generator = torch.Generator(device).manual_seed(seed) if seed != 0 else None
200
 
201
  try:
202
- if img is not None:
203
- return (
204
- img_to_img(
205
- model_path,
206
- prompt,
207
- neg_prompt,
208
- img,
209
- strength,
210
- guidance,
211
- steps,
212
- width,
213
- height,
214
- generator,
215
- scale,
216
- scale_factor,
217
- ),
218
- None,
219
  )
220
  else:
221
- return (
222
- txt_to_img(
223
- model_path,
224
- prompt,
225
- neg_prompt,
226
- guidance,
227
- steps,
228
- width,
229
- height,
230
- generator,
231
- scale,
232
- scale_factor,
233
- ),
234
- None,
235
  )
236
- except Exception as e:
237
- return None, error_str(e)
238
- # if img is not None:
239
- # return img_to_img(model_path, prompt, neg_prompt, img, strength, guidance, steps, width, height,
240
- # generator, scale, scale_factor), None
241
- # else:
242
- # return txt_to_img(model_path, prompt, neg_prompt, guidance, steps, width, height, generator, scale, scale_factor), None
243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
  def txt_to_img(
246
- model_path,
247
- prompt,
248
- neg_prompt,
249
- guidance,
250
- steps,
251
- width,
252
- height,
253
- generator,
254
- scale,
255
- scale_factor,
256
- ):
257
- print(f"{datetime.datetime.now()} txt_to_img, model: {current_model.name}")
258
-
259
- global last_mode
260
- global pipe
261
- global current_model_path
262
  if model_path != current_model_path or last_mode != "txt2img":
263
  current_model_path = model_path
264
 
@@ -267,70 +347,63 @@ def txt_to_img(
267
  current_model_path,
268
  torch_dtype=dtype,
269
  scheduler=scheduler,
270
- safety_checker=lambda images, clip_input: (images, False),
271
  )
272
  else:
273
- # pipe = pipe.to("cpu")
274
  pipe = current_model.pipe_t2i
275
 
276
  if torch.cuda.is_available():
277
  pipe = pipe.to(device)
278
  last_mode = "txt2img"
279
 
280
- prompt = current_model.prefix + prompt
 
 
 
 
 
281
  result = pipe(
282
- prompt,
283
  negative_prompt=neg_prompt,
284
- # num_images_per_prompt=n_images,
285
  num_inference_steps=int(steps),
286
  guidance_scale=guidance,
287
  width=width,
288
  height=height,
289
  generator=generator,
290
- )
291
 
292
- # result.images[0] = magnifier.magnify(result.images[0], scale_factor=scale_factor)
293
- # enhance resolution
 
 
294
  if scale_factor > 1:
295
- if scale == "ESRGAN4x":
296
- fp32 = True if device == "cpu" else False
297
- result.images[0] = realEsrgan(
298
- input_dir=result.images[0],
299
  suffix="",
300
  output_dir="imgs",
301
  fp32=fp32,
302
  outscale=scale_factor,
303
  )[0]
304
- else:
305
- result.images[0] = magnifier.magnify(
306
- result.images[0], scale_factor=scale_factor
307
- )
308
- # save image
309
- result.images[0].save(
310
- "imgs/result-{}.png".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
311
- )
312
- return replace_nsfw_images(result)
313
 
 
314
 
315
  def img_to_img(
316
- model_path,
317
- prompt,
318
- neg_prompt,
319
- img,
320
- strength,
321
- guidance,
322
- steps,
323
- width,
324
- height,
325
- generator,
326
- scale,
327
- scale_factor,
328
- ):
329
- fprint(f"{datetime.datetime.now()} img_to_img, model: {model_path}")
330
-
331
- global last_mode
332
- global pipe
333
- global current_model_path
334
  if model_path != current_model_path or last_mode != "img2img":
335
  current_model_path = model_path
336
 
@@ -339,263 +412,396 @@ def img_to_img(
339
  current_model_path,
340
  torch_dtype=dtype,
341
  scheduler=scheduler,
342
- safety_checker=lambda images, clip_input: (images, False),
343
  )
344
  else:
345
- # pipe = pipe.to("cpu")
346
  pipe = current_model.pipe_i2i
347
 
348
  if torch.cuda.is_available():
349
  pipe = pipe.to(device)
350
  last_mode = "img2img"
351
 
352
- prompt = current_model.prefix + prompt
 
 
 
353
  ratio = min(height / img.height, width / img.width)
354
  img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
 
 
 
 
 
 
 
355
  result = pipe(
356
- prompt,
357
  negative_prompt=neg_prompt,
358
- # num_images_per_prompt=n_images,
359
  image=img,
360
  num_inference_steps=int(steps),
361
  strength=strength,
362
  guidance_scale=guidance,
363
- # width=width,
364
- # height=height,
365
  generator=generator,
366
- )
 
 
 
 
 
367
  if scale_factor > 1:
368
- if scale == "ESRGAN4x":
369
- fp32 = True if device == "cpu" else False
370
- result.images[0] = realEsrgan(
371
- input_dir=result.images[0],
372
  suffix="",
373
  output_dir="imgs",
374
  fp32=fp32,
375
  outscale=scale_factor,
376
  )[0]
377
- else:
378
- result.images[0] = magnifier.magnify(
379
- result.images[0], scale_factor=scale_factor
380
- )
381
- # save image
382
- result.images[0].save(
383
- "imgs/result-{}.png".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
384
- )
385
- return replace_nsfw_images(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
 
 
 
387
 
388
- def replace_nsfw_images(results):
389
- if is_colab:
390
- return results.images[0]
391
- if hasattr(results, "nsfw_content_detected") and results.nsfw_content_detected:
392
- for i in range(len(results.images)):
393
- if results.nsfw_content_detected[i]:
394
- results.images[i] = Image.open("nsfw.png")
395
- return results.images[0]
396
 
 
 
 
 
 
 
 
 
 
397
 
398
- css = """.finetuned-diffusion-div div{display:inline-flex;align-items:center;gap:.8rem;font-size:1.75rem}.finetuned-diffusion-div div h1{font-weight:900;margin-bottom:7px}.finetuned-diffusion-div p{margin-bottom:10px;font-size:94%}a{text-decoration:underline}.tabs{margin-top:0;margin-bottom:0}#gallery{min-height:20rem}
399
- """
400
- with gr.Blocks(css=css) as demo:
401
- if not os.path.exists("imgs"):
402
- os.mkdir("imgs")
 
403
 
404
- gr.Markdown("# Super Resolution Anime Diffusion")
405
- gr.Markdown(
406
- "## Author: [yangheng95](https://github.com/yangheng95) Github:[Github](https://github.com/yangheng95/stable-diffusion-webui)"
407
- )
408
- gr.Markdown(
409
- "### This demo is running on a CPU, so it will take at least 20 minutes. "
410
- "If you have a GPU, you can clone from [Github](https://github.com/yangheng95/SuperResolutionAnimeDiffusion) and run it locally."
411
- )
412
- gr.Markdown(
413
- "### FYI: to generate a 512*512 image and magnify 4x, it only takes 5~8 seconds on a RTX 2080 GPU"
414
- )
415
- gr.Markdown(
416
- "### You can duplicate this demo on HuggingFace Spaces, click [here](https://huggingface.co/spaces/yangheng/Super-Resolution-Anime-Diffusion?duplicate=true)"
417
- )
418
 
419
- with gr.Row():
420
- with gr.Column(scale=55):
421
- with gr.Group():
422
- gr.Markdown("Text to image")
423
 
424
- model_name = gr.Dropdown(
425
- label="Model",
426
- choices=[m.name for m in models],
427
- value=current_model.name,
428
- )
429
 
430
- with gr.Box(visible=False) as custom_model_group:
431
- custom_model_path = gr.Textbox(
432
- label="Custom model path",
433
- placeholder="Path to model, e.g. nitrosocke/Arcane-Diffusion",
434
- interactive=True,
435
- )
436
- gr.HTML(
437
- "<div><font size='2'>Custom models have to be downloaded first, so give it some time.</font></div>"
438
- )
439
 
440
- with gr.Row():
441
- prompt = gr.Textbox(
442
- label="Prompt",
443
- show_label=False,
444
- max_lines=2,
445
- placeholder="Enter prompt. Style applied automatically",
446
- ).style(container=False)
447
- with gr.Row():
448
- generate = gr.Button(value="Generate")
449
 
450
- with gr.Row():
451
- with gr.Group():
452
- neg_prompt = gr.Textbox(
453
- label="Negative prompt",
454
- value="bad result, worst, random, invalid, inaccurate, imperfect, blurry, deformed,"
455
- " disfigured, mutation, mutated, ugly, out of focus, bad anatomy, text, error,"
456
- " extra digit, fewer digits, worst quality, low quality, normal quality, noise, "
457
- "jpeg artifact, compression artifact, signature, watermark, username, logo, "
458
- "low resolution, worst resolution, bad resolution, normal resolution, bad detail,"
459
- " bad details, bad lighting, bad shadow, bad shading, bad background,"
460
- " worst background.",
461
  )
462
 
463
- image_out = gr.Image(height="auto", width="auto")
464
- error_output = gr.Markdown()
 
 
 
465
 
466
- with gr.Row():
467
- gr.Markdown(
468
- "# Random Image Generation Preview (512*768)x4 magnified"
469
- )
470
- for f_img in findfile.find_cwd_files(".png", recursive=2):
471
- with gr.Row():
472
- image = gr.Image(height=512, value=PIL.Image.open(f_img))
473
- # gallery = gr.Gallery(
474
- # label="Generated images", show_label=False, elem_id="gallery"
475
- # ).style(grid=[1], height="auto")
476
 
477
- with gr.Column(scale=45):
478
- with gr.Group():
479
- gr.Markdown("Image to Image")
480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  with gr.Row():
482
- with gr.Group():
483
- image = gr.Image(
484
- label="Image", height=256, tool="editor", type="pil"
 
 
 
485
  )
486
- strength = gr.Slider(
487
- label="Transformation strength",
488
- minimum=0,
489
- maximum=1,
490
- step=0.01,
491
- value=0.5,
492
  )
493
 
494
- with gr.Row():
495
- with gr.Group():
496
- # n_images = gr.Slider(label="Images", value=1, minimum=1, maximum=4, step=1)
 
 
 
497
 
498
- with gr.Row():
499
- guidance = gr.Slider(
500
- label="Guidance scale", value=7.5, maximum=15
501
- )
502
- steps = gr.Slider(
503
- label="Steps", value=15, minimum=2, maximum=75, step=1
504
- )
505
 
506
- with gr.Row():
507
- width = gr.Slider(
508
- label="Width",
509
- value=512,
510
- minimum=64,
511
- maximum=1024,
512
- step=8,
513
- )
514
- height = gr.Slider(
515
- label="Height",
516
- value=768,
517
- minimum=64,
518
- maximum=1024,
519
- step=8,
520
- )
521
- with gr.Row():
522
- scale = gr.Radio(
523
- label="Scale",
524
- choices=["Waifu2x", "ESRGAN4x"],
525
- value="Waifu2x",
526
- )
527
- with gr.Row():
528
- scale_factor = gr.Slider(
529
- 1,
530
- 8,
531
- label="Scale factor (to magnify image) (1, 2, 4, 8)",
532
- value=1,
533
- step=1,
534
  )
535
 
536
- seed = gr.Slider(
537
- 0, 2147483647, label="Seed (0 = random)", value=0, step=1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
  )
539
 
540
- if is_colab:
541
- model_name.change(
542
- on_model_change,
543
- inputs=model_name,
544
- outputs=[custom_model_group, prompt],
545
- queue=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
546
  )
547
- custom_model_path.change(
548
- custom_model_changed, inputs=custom_model_path, outputs=None
 
 
 
 
549
  )
550
- # n_images.change(lambda n: gr.Gallery().style(grid=[2 if n > 1 else 1], height="auto"), inputs=n_images, outputs=gallery)
551
 
552
- gr.Markdown(
553
- "### based on [Anything V5]"
554
- )
 
 
555
 
556
- inputs = [
557
- model_name,
558
- prompt,
559
- guidance,
560
- steps,
561
- width,
562
- height,
563
- seed,
564
- image,
565
- strength,
566
- neg_prompt,
567
- scale,
568
- scale_factor,
569
- ]
570
- outputs = [image_out, error_output]
571
- prompt.submit(inference, inputs=inputs, outputs=outputs)
572
- generate.click(inference, inputs=inputs, outputs=outputs, api_name="generate")
573
-
574
- prompt_keys = [
575
- "girl",
576
- "lovely",
577
- "cute",
578
- "beautiful eyes",
579
- "cumulonimbus clouds",
580
- random.choice(["dress"]),
581
- random.choice(["white hair"]),
582
- random.choice(["blue eyes"]),
583
- random.choice(["flower meadow"]),
584
- random.choice(["Elif", "Angel"]),
585
- ]
586
- prompt.value = ",".join(prompt_keys)
587
- ex = gr.Examples(
588
- [
589
- [models[0].name, prompt.value, 7.5, 15],
590
- ],
591
- inputs=[model_name, prompt, guidance, steps, seed],
592
- outputs=outputs,
593
- fn=inference,
594
- cache_examples=False,
595
- )
 
 
 
 
 
 
 
 
596
 
597
- print(f"Space built in {time.time() - start_time:.2f} seconds")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
 
599
- if not is_colab:
600
- demo.queue(concurrency_count=2)
601
- demo.launch(debug=is_colab, enable_queue=True, share=is_colab)
 
1
+ """
2
+ Super Resolution Anime Diffusion - Enhanced WebUI
3
+
4
+ This is an enhanced version of the original Super Resolution Anime Diffusion project by yangheng95.
5
+ The WebUI has been improved with modern Gradio API implementation, better user experience,
6
+ and comprehensive documentation.
7
+
8
+ Key Contributions:
9
+ - Updated to use modern Gradio Blocks API for better interface organization
10
+ - Added tabbed interface for Text-to-Image, Image-to-Image, and Gallery views
11
+ - Improved error handling and user feedback with progress indicators
12
+ - Enhanced UI styling with custom CSS and responsive design
13
+ - Better parameter organization with collapsible accordions
14
+ - Real-time system information display
15
+
16
+ Instructions:
17
+ 1. Choose between Text-to-Image or Image-to-Image tabs
18
+ 2. Select a model from the dropdown (or provide custom model path)
19
+ 3. Enter your prompt and adjust parameters as needed
20
+ 4. For Image-to-Image: upload a base image to transform
21
+ 5. Configure super-resolution settings (method and scale factor)
22
+ 6. Click Generate to create high-quality anime images with automatic upscaling
23
+
24
+ Original Author: yangheng95
25
+ Original Repository: https://github.com/yangheng95/SuperResolutionAnimeDiffusion
26
+ License: Creative ML Open RAIL-M
27
+ Enhanced WebUI by AI Assistant
28
+ """
29
+
30
  import os
31
+ import sys
32
  import zipfile
33
+ from typing import Optional, List, Tuple
34
+ from datetime import datetime
35
+ import time
36
+ import psutil
37
+
38
  import PIL.Image
39
  import autocuda
40
+ import findfile
 
 
 
 
 
 
 
 
 
 
41
 
42
  from diffusers import (
43
  AutoencoderKL,
 
50
  import torch
51
  from PIL import Image
52
  import utils
 
 
 
53
  from Waifu2x.magnify import ImageMagnifier
54
  from RealESRGANv030.interface import realEsrgan
55
 
56
+ sys.path.append(os.path.dirname(__file__)) # Ensure current directory is in path
57
+ os.environ["PYTHONPATH"] = os.path.dirname(__file__)
58
+
59
+ # Application Configuration
60
+ APP_TITLE = "🎨 Super Resolution Anime Diffusion"
61
+ APP_DESCRIPTION = """
62
+ Generate high-quality anime images with automatic super resolution enhancement.
63
+ Combines Stable Diffusion models with advanced upscaling techniques (RealESRGAN & Waifu2x).
64
+ """
65
 
66
+ CONTRIBUTION_INFO = """
67
+ ### 🤝 Enhanced Features
68
+ This interface improves upon the original work with:
69
+ - **Modern UI**: Clean tabbed interface with Gradio Blocks
70
+ - **Better UX**: Progress tracking and real-time feedback
71
+ - **Enhanced Parameters**: Organized controls with descriptions
72
+ - **Gallery View**: Browse and manage generated images
73
+ - **Error Handling**: Comprehensive error reporting and recovery
74
+ """
75
+
76
+ INSTRUCTIONS = """
77
+ ### 🚀 How to Use
78
+ 1. **Select Mode**: Choose Text-to-Image or Image-to-Image tab
79
+ 2. **Pick Model**: Select from available models or use custom path
80
+ 3. **Create Prompt**: Describe your desired image (use negative prompt to avoid elements)
81
+ 4. **Upload Image**: For img2img mode, provide base image
82
+ 5. **Adjust Settings**: Fine-tune resolution, steps, and guidance
83
+ 6. **Set Upscaling**: Choose super-resolution method and scale
84
+ 7. **Generate**: Click the generate button and wait for results!
85
+ """
86
+
87
+ COPYRIGHT_INFO = """
88
+ **Original Author**: [yangheng95](https://github.com/yangheng95) |
89
+ **Repository**: [SuperResolutionAnimeDiffusion](https://github.com/yangheng95/SuperResolutionAnimeDiffusion) |
90
+ **License**: Creative ML Open RAIL-M | **Enhanced by**: AI Assistant
91
+ """
92
+
93
+ DEFAULT_NEGATIVE_PROMPT = "bad result, worst, random, invalid, inaccurate, imperfect, blurry, deformed, disfigured, mutation, mutated, ugly, out of focus, bad anatomy, text, error, extra digit, fewer digits, worst quality, low quality, normal quality, noise, jpeg artifact, compression artifact, signature, watermark, username, logo, low resolution, worst resolution, bad resolution, normal resolution, bad detail, bad details, bad lighting, bad shadow, bad shading, bad background, worst background"
94
+
95
+ # Initialization
96
+ magnifier = ImageMagnifier()
97
  start_time = time.time()
98
  is_colab = utils.is_google_colab()
 
 
99
  device = autocuda.auto_cuda()
 
100
  dtype = torch.float16 if device != "cpu" else torch.float32
101
 
102
+ # Extract zip files if needed
103
+ for z_file in findfile.find_cwd_files(and_key=['.zip'], exclude_key=['.ignore'], recursive=1):
104
+ try:
105
+ with zipfile.ZipFile(z_file, 'r') as zip_ref:
106
+ zip_ref.extractall()
107
+ except Exception as e:
108
+ print(f"Warning: Could not extract {z_file}: {e}")
109
 
110
  class Model:
111
+ """Model configuration class"""
112
+ def __init__(self, name: str, path: str = "", prefix: str = ""):
113
  self.name = name
114
  self.path = path
115
  self.prefix = prefix
116
  self.pipe_t2i = None
117
  self.pipe_i2i = None
118
 
119
+ # Model configurations
120
  models = [
121
+ Model("Anything v4.5", "xyn-ai/anything-v4.0", "anything v4.5 style"),
 
122
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
+ # Scheduler configuration
125
+ scheduler = DPMSolverMultistepScheduler.from_config({
126
+ "beta_start": 0.00085,
127
+ "beta_end": 0.012,
128
+ "beta_schedule": "scaled_linear",
129
+ "num_train_timesteps": 1000,
130
+ "trained_betas": None,
131
+ "prediction_type": "epsilon",
132
+ "thresholding": False,
133
+ "algorithm_type": "dpmsolver++",
134
+ "solver_type": "midpoint",
135
+ "solver_order": 2,
136
+ "use_karras_sigmas": False,
137
+ "timestep_spacing": "leading",
138
+ "steps_offset": 1
139
+ })
140
+
141
+ # Global state
142
  custom_model = None
143
  if is_colab:
144
  models.insert(0, Model("Custom model"))
 
147
  last_mode = "txt2img"
148
  current_model = models[1] if is_colab else models[0]
149
  current_model_path = current_model.path
150
+ pipe = None
151
 
152
+ def initialize_models():
153
+ """Initialize diffusion models with error handling"""
154
+ global pipe
 
 
 
 
155
 
156
+ if is_colab:
 
 
 
 
 
157
  try:
158
+ pipe = StableDiffusionPipeline.from_pretrained(
159
+ current_model.path,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  torch_dtype=dtype,
161
  scheduler=scheduler,
162
  safety_checker=None,
163
  )
164
  except Exception as e:
165
+ print(f"Failed to initialize model: {e}")
166
+ return
167
+ else:
168
+ print(f"{datetime.now()} Loading models...")
169
+ try:
170
+ vae = AutoencoderKL.from_pretrained(
171
+ current_model.path, subfolder="vae", torch_dtype=dtype
172
  )
 
 
 
 
 
 
 
173
 
174
+ for model in models[:]:
175
+ try:
176
+ print(f"Loading {model.name}...")
177
+ unet = UNet2DConditionModel.from_pretrained(
178
+ model.path, subfolder="unet", torch_dtype=dtype
179
+ )
180
+ model.pipe_t2i = StableDiffusionPipeline.from_pretrained(
181
+ model.path,
182
+ unet=unet,
183
+ vae=vae,
184
+ torch_dtype=dtype,
185
+ scheduler=scheduler,
186
+ safety_checker=None,
187
+ )
188
+ model.pipe_i2i = StableDiffusionImg2ImgPipeline.from_pretrained(
189
+ model.path,
190
+ unet=unet,
191
+ vae=vae,
192
+ torch_dtype=dtype,
193
+ scheduler=scheduler,
194
+ safety_checker=None,
195
+ )
196
+ print(f"✅ {model.name} loaded successfully")
197
+ except Exception as e:
198
+ print(f"❌ Failed to load {model.name}: {e}")
199
+ models.remove(model)
200
 
201
+ if models:
202
+ pipe = models[0].pipe_t2i
203
+ except Exception as e:
204
+ print(f"Failed to initialize models: {e}")
205
+ return
206
+
207
+ if torch.cuda.is_available() and pipe:
208
+ pipe = pipe.to(device)
209
+
210
+ def get_system_info() -> str:
211
+ """Get system information"""
212
+ gpu_name = "CPU"
213
+ if torch.cuda.is_available():
214
+ gpu_name = torch.cuda.get_device_name()
215
+
216
+ memory = psutil.virtual_memory()
217
+ return f"🖥️ Device: {gpu_name} | 💾 RAM: {memory.available // (1024**3):.1f}GB"
218
+
219
+ def error_str(error: Exception, title: str = "Error") -> str:
220
+ """Format error messages"""
221
+ return f"### ❌ {title}\n```\n{str(error)}\n```"
222
+
223
+ def custom_model_changed(path: str) -> str:
224
+ """Handle custom model path changes"""
225
+ if custom_model and path.strip():
226
+ models[0].path = path.strip()
227
+ global current_model
228
+ current_model = models[0]
229
+ return "✅ Custom model path updated"
230
+ return "❌ Please enter a valid model path"
231
+
232
+ def on_model_change(model_name: str) -> Tuple[gr.update, gr.update]:
233
+ """Handle model selection changes"""
234
+ selected_model = next((m for m in models if m.name == model_name), None)
235
+
236
+ if selected_model and selected_model != models[0] if custom_model else True:
237
+ prefix_text = f'Prompt (automatically prefixed with "{selected_model.prefix}")'
238
+ is_custom = False
239
+ else:
240
+ prefix_text = "Enter prompt (remember to include model-specific prefix)"
241
+ is_custom = True
242
 
 
243
  return (
244
+ gr.update(visible=is_custom),
245
+ gr.update(placeholder=prefix_text),
 
 
246
  )
247
 
248
+ def generate_image(
249
+ mode: str,
250
+ model_name: str,
251
+ prompt: str,
252
+ negative_prompt: str,
253
+ width: int,
254
+ height: int,
255
+ guidance_scale: float,
256
+ num_steps: int,
257
+ seed: int,
258
+ image: Optional[PIL.Image.Image],
259
+ strength: float,
260
+ scale_method: str,
261
+ scale_factor: int,
262
+ progress=gr.Progress()
263
+ ) -> Tuple[Optional[PIL.Image.Image], str]:
264
+ """Main image generation function"""
265
+
266
+ if progress:
267
+ progress(0, desc="Starting generation...")
268
+
269
+ # Validation
270
+ if not prompt.strip():
271
+ return None, "❌ Please enter a prompt"
272
+
273
+ if mode == "img2img" and image is None:
274
+ return None, "❌ Please upload an image for Image-to-Image mode"
275
+
276
+ # Find model
277
  global current_model
278
+ selected_model = next((m for m in models if m.name == model_name), None)
279
+ if not selected_model:
280
+ return None, error_str(ValueError(f"Model '{model_name}' not found"))
281
 
282
+ current_model = selected_model
 
 
 
 
 
 
 
283
 
284
+ if progress:
285
+ progress(0.1, desc=f"Using {model_name}")
 
 
286
 
287
+ # Setup generator
288
+ if seed <= 0:
289
+ seed = torch.randint(0, 2**32-1, (1,)).item()
290
+ generator = torch.Generator(device).manual_seed(seed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
  try:
293
+ if mode == "img2img":
294
+ result_image = img_to_img(
295
+ current_model.path, prompt, negative_prompt, image, strength,
296
+ guidance_scale, num_steps, width, height, generator,
297
+ scale_method, scale_factor, progress
 
 
 
 
 
 
 
 
 
 
 
 
298
  )
299
  else:
300
+ result_image = txt_to_img(
301
+ current_model.path, prompt, negative_prompt, guidance_scale,
302
+ num_steps, width, height, generator, scale_method, scale_factor,
303
+ progress
 
 
 
 
 
 
 
 
 
 
304
  )
 
 
 
 
 
 
 
305
 
306
+ if progress:
307
+ progress(1.0, desc="Complete!")
308
+
309
+ # Save result
310
+ timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
311
+ os.makedirs("imgs", exist_ok=True)
312
+ filename = f"imgs/result-{timestamp}.png"
313
+ result_image.save(filename)
314
+
315
+ info = f"""### ✅ Generation Complete
316
+ - **Mode**: {mode}
317
+ - **Model**: {model_name}
318
+ - **Resolution**: {result_image.size[0]}x{result_image.size[1]}
319
+ - **Scale**: {scale_factor}x ({scale_method})
320
+ - **Seed**: {seed}
321
+ - **Saved**: {filename}"""
322
+
323
+ return result_image, info
324
+
325
+ except Exception as e:
326
+ print(f"Generation error: {e}")
327
+ return None, error_str(e, "Generation Failed")
328
 
329
  def txt_to_img(
330
+ model_path: str, prompt: str, neg_prompt: str, guidance: float,
331
+ steps: int, width: int, height: int, generator, scale: str,
332
+ scale_factor: int, progress
333
+ ) -> PIL.Image.Image:
334
+ """Text-to-image generation"""
335
+
336
+ global last_mode, pipe, current_model_path
337
+
338
+ if progress:
339
+ progress(0.2, desc="Loading pipeline...")
340
+
341
+ # Load pipeline if needed
 
 
 
 
342
  if model_path != current_model_path or last_mode != "txt2img":
343
  current_model_path = model_path
344
 
 
347
  current_model_path,
348
  torch_dtype=dtype,
349
  scheduler=scheduler,
350
+ safety_checker=None,
351
  )
352
  else:
 
353
  pipe = current_model.pipe_t2i
354
 
355
  if torch.cuda.is_available():
356
  pipe = pipe.to(device)
357
  last_mode = "txt2img"
358
 
359
+ if progress:
360
+ progress(0.4, desc="Generating image...")
361
+
362
+ # Add model prefix
363
+ full_prompt = f"{current_model.prefix}, {prompt}" if current_model.prefix else prompt
364
+
365
  result = pipe(
366
+ full_prompt,
367
  negative_prompt=neg_prompt,
 
368
  num_inference_steps=int(steps),
369
  guidance_scale=guidance,
370
  width=width,
371
  height=height,
372
  generator=generator,
373
+ ).images[0]
374
 
375
+ if progress:
376
+ progress(0.7, desc="Applying super resolution...")
377
+
378
+ # Apply super resolution
379
  if scale_factor > 1:
380
+ if scale == "RealESRGAN":
381
+ fp32 = device == "cpu"
382
+ result = realEsrgan(
383
+ input_dir=result,
384
  suffix="",
385
  output_dir="imgs",
386
  fp32=fp32,
387
  outscale=scale_factor,
388
  )[0]
389
+ else: # Waifu2x
390
+ result = magnifier.magnify(result, scale_factor=scale_factor)
 
 
 
 
 
 
 
391
 
392
+ return result
393
 
394
  def img_to_img(
395
+ model_path: str, prompt: str, neg_prompt: str, img: PIL.Image.Image,
396
+ strength: float, guidance: float, steps: int, width: int, height: int,
397
+ generator, scale: str, scale_factor: int, progress
398
+ ) -> PIL.Image.Image:
399
+ """Image-to-image generation"""
400
+
401
+ global last_mode, pipe, current_model_path
402
+
403
+ if progress:
404
+ progress(0.2, desc="Loading pipeline...")
405
+
406
+ # Load pipeline if needed
 
 
 
 
 
 
407
  if model_path != current_model_path or last_mode != "img2img":
408
  current_model_path = model_path
409
 
 
412
  current_model_path,
413
  torch_dtype=dtype,
414
  scheduler=scheduler,
415
+ safety_checker=None,
416
  )
417
  else:
 
418
  pipe = current_model.pipe_i2i
419
 
420
  if torch.cuda.is_available():
421
  pipe = pipe.to(device)
422
  last_mode = "img2img"
423
 
424
+ # Resize input image
425
+ if progress:
426
+ progress(0.3, desc="Processing input image...")
427
+
428
  ratio = min(height / img.height, width / img.width)
429
  img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
430
+
431
+ # Add model prefix
432
+ full_prompt = f"{current_model.prefix}, {prompt}" if current_model.prefix else prompt
433
+
434
+ if progress:
435
+ progress(0.4, desc="Transforming image...")
436
+
437
  result = pipe(
438
+ full_prompt,
439
  negative_prompt=neg_prompt,
 
440
  image=img,
441
  num_inference_steps=int(steps),
442
  strength=strength,
443
  guidance_scale=guidance,
 
 
444
  generator=generator,
445
+ ).images[0]
446
+
447
+ if progress:
448
+ progress(0.7, desc="Applying super resolution...")
449
+
450
+ # Apply super resolution
451
  if scale_factor > 1:
452
+ if scale == "RealESRGAN":
453
+ fp32 = device == "cpu"
454
+ result = realEsrgan(
455
+ input_dir=result,
456
  suffix="",
457
  output_dir="imgs",
458
  fp32=fp32,
459
  outscale=scale_factor,
460
  )[0]
461
+ else: # Waifu2x
462
+ result = magnifier.magnify(result, scale_factor=scale_factor)
463
+
464
+ return result
465
+
466
+ def load_example_images() -> List[str]:
467
+ """Load example images for gallery"""
468
+ example_images = []
469
+ for f_img in findfile.find_cwd_files(".png", recursive=2):
470
+ if "result-" in os.path.basename(f_img) or "random_examples" in f_img:
471
+ example_images.append(f_img)
472
+ return example_images[:12] # Limit examples
473
+
474
+ # Custom CSS for styling
475
+ custom_css = """
476
+ .gradio-container {
477
+ font-family: 'Segoe UI', system-ui, sans-serif;
478
+ max-width: 1400px;
479
+ margin: 0 auto;
480
+ }
481
+
482
+ .header-section {
483
+ text-align: center;
484
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
485
+ color: white;
486
+ padding: 2rem;
487
+ border-radius: 15px;
488
+ margin-bottom: 2rem;
489
+ }
490
+
491
+ .info-card {
492
+ background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
493
+ color: white;
494
+ padding: 1.5rem;
495
+ border-radius: 10px;
496
+ margin: 1rem 0;
497
+ }
498
+
499
+ .status-info {
500
+ background: #e8f5e8;
501
+ border-left: 4px solid #4CAF50;
502
+ padding: 1rem;
503
+ border-radius: 5px;
504
+ margin: 1rem 0;
505
+ }
506
+
507
+ .generate-btn {
508
+ background: linear-gradient(45deg, #FF6B6B, #4ECDC4) !important;
509
+ border: none !important;
510
+ border-radius: 25px !important;
511
+ padding: 15px 30px !important;
512
+ font-size: 16px !important;
513
+ font-weight: bold !important;
514
+ color: white !important;
515
+ transition: all 0.3s ease !important;
516
+ }
517
+
518
+ .generate-btn:hover {
519
+ transform: translateY(-2px) !important;
520
+ box-shadow: 0 10px 20px rgba(0,0,0,0.2) !important;
521
+ }
522
+ """
523
 
524
+ def create_interface():
525
+ """Create the Gradio interface"""
526
 
527
+ with gr.Blocks(title=APP_TITLE, css=custom_css) as demo:
 
 
 
 
 
 
 
528
 
529
+ # Header
530
+ with gr.Row():
531
+ gr.HTML(f"""
532
+ <div class="header-section">
533
+ <h1 style="font-size: 2.5rem; margin-bottom: 1rem;">{APP_TITLE}</h1>
534
+ <p style="font-size: 1.2rem; margin-bottom: 1rem;">{APP_DESCRIPTION}</p>
535
+ <div style="font-size: 1rem;">{get_system_info()}</div>
536
+ </div>
537
+ """)
538
 
539
+ # Info sections
540
+ with gr.Row():
541
+ with gr.Column():
542
+ gr.Markdown(INSTRUCTIONS, elem_classes=["info-card"])
543
+ with gr.Column():
544
+ gr.Markdown(CONTRIBUTION_INFO, elem_classes=["info-card"])
545
 
546
+ # Copyright
547
+ gr.Markdown(f"### 📄 {COPYRIGHT_INFO}", elem_classes=["status-info"])
 
 
 
 
 
 
 
 
 
 
 
 
548
 
549
+ # Main interface
550
+ with gr.Tabs():
 
 
551
 
552
+ # Text-to-Image Tab
553
+ with gr.TabItem("🎨 Text-to-Image"):
554
+ with gr.Row():
555
+ with gr.Column(scale=1):
 
556
 
557
+ # Model selection
558
+ model_dropdown = gr.Dropdown(
559
+ choices=[m.name for m in models],
560
+ value=current_model.name,
561
+ label="🤖 Model Selection"
562
+ )
 
 
 
563
 
564
+ # Custom model path
565
+ custom_model_path = gr.Textbox(
566
+ label="🔗 Custom Model Path (HuggingFace)",
567
+ placeholder="username/model-name",
568
+ visible=custom_model is not None
569
+ )
 
 
 
570
 
571
+ # Prompts
572
+ prompt_txt2img = gr.Textbox(
573
+ label="✨ Prompt",
574
+ placeholder="Describe your desired image...",
575
+ lines=3
 
 
 
 
 
 
576
  )
577
 
578
+ negative_prompt_txt2img = gr.Textbox(
579
+ label="🚫 Negative Prompt",
580
+ value=DEFAULT_NEGATIVE_PROMPT,
581
+ lines=2
582
+ )
583
 
584
+ # Parameters
585
+ with gr.Accordion("🎛️ Generation Parameters", open=False):
586
+ with gr.Row():
587
+ width_txt2img = gr.Slider(256, 1024, 512, step=64, label="Width")
588
+ height_txt2img = gr.Slider(256, 1024, 512, step=64, label="Height")
 
 
 
 
 
589
 
590
+ with gr.Row():
591
+ guidance_scale_txt2img = gr.Slider(1, 20, 7.5, step=0.5, label="Guidance Scale")
592
+ num_steps_txt2img = gr.Slider(10, 50, 20, label="Steps")
593
 
594
+ seed_txt2img = gr.Number(label="Seed (-1 for random)", value=-1, precision=0)
595
+
596
+ # Super Resolution
597
+ with gr.Accordion("🔍 Super Resolution", open=True):
598
+ scale_method_txt2img = gr.Radio(
599
+ choices=["RealESRGAN", "Waifu2x"],
600
+ value="RealESRGAN",
601
+ label="Method"
602
+ )
603
+ scale_factor_txt2img = gr.Slider(1, 4, 2, step=1, label="Scale Factor")
604
+
605
+ with gr.Column(scale=1):
606
+ # Generate button
607
+ generate_btn_txt2img = gr.Button(
608
+ "🎨 Generate Image",
609
+ variant="primary",
610
+ elem_classes=["generate-btn"]
611
+ )
612
+
613
+ # Output
614
+ output_image_txt2img = gr.Image(label="Generated Image", type="pil")
615
+ output_info_txt2img = gr.Markdown("Ready to generate! 🚀")
616
+
617
+ # Image-to-Image Tab
618
+ with gr.TabItem("🖼️ Image-to-Image"):
619
  with gr.Row():
620
+ with gr.Column(scale=1):
621
+
622
+ # Input image
623
+ input_image_img2img = gr.Image(
624
+ label="📤 Input Image",
625
+ type="pil"
626
  )
627
+
628
+ # Model selection
629
+ model_dropdown_img2img = gr.Dropdown(
630
+ choices=[m.name for m in models],
631
+ value=current_model.name,
632
+ label="🤖 Model Selection"
633
  )
634
 
635
+ # Prompts
636
+ prompt_img2img = gr.Textbox(
637
+ label=" Transformation Prompt",
638
+ placeholder="How to transform the image...",
639
+ lines=3
640
+ )
641
 
642
+ negative_prompt_img2img = gr.Textbox(
643
+ label="🚫 Negative Prompt",
644
+ value=DEFAULT_NEGATIVE_PROMPT,
645
+ lines=2
646
+ )
 
 
647
 
648
+ # Parameters
649
+ with gr.Accordion("🎛️ Generation Parameters", open=False):
650
+ with gr.Row():
651
+ width_img2img = gr.Slider(256, 1024, 512, step=64, label="Width")
652
+ height_img2img = gr.Slider(256, 1024, 512, step=64, label="Height")
653
+
654
+ strength_img2img = gr.Slider(
655
+ 0.1, 1.0, 0.75, step=0.05,
656
+ label="Strength (how much to change)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
657
  )
658
 
659
+ with gr.Row():
660
+ guidance_scale_img2img = gr.Slider(1, 20, 7.5, step=0.5, label="Guidance")
661
+ num_steps_img2img = gr.Slider(10, 50, 20, label="Steps")
662
+
663
+ seed_img2img = gr.Number(label="Seed (-1 for random)", value=-1, precision=0)
664
+
665
+ # Super Resolution
666
+ with gr.Accordion("🔍 Super Resolution", open=True):
667
+ scale_method_img2img = gr.Radio(
668
+ choices=["RealESRGAN", "Waifu2x"],
669
+ value="RealESRGAN",
670
+ label="Method"
671
+ )
672
+ scale_factor_img2img = gr.Slider(1, 4, 2, step=1, label="Scale Factor")
673
+
674
+ with gr.Column(scale=1):
675
+ # Generate button
676
+ generate_btn_img2img = gr.Button(
677
+ "🖼️ Transform Image",
678
+ variant="primary",
679
+ elem_classes=["generate-btn"]
680
  )
681
 
682
+ # Output
683
+ output_image_img2img = gr.Image(label="Transformed Image", type="pil")
684
+ output_info_img2img = gr.Markdown("Upload an image to transform! 🖼️")
685
+
686
+ # Gallery Tab
687
+ with gr.TabItem("🖼️ Gallery"):
688
+ gr.Markdown("### 🎨 Generated Images")
689
+
690
+ with gr.Row():
691
+ refresh_gallery_btn = gr.Button("🔄 Refresh Gallery", variant="secondary")
692
+
693
+ example_gallery = gr.Gallery(
694
+ value=load_example_images(),
695
+ label="Results Gallery",
696
+ show_label=False,
697
+ columns=4,
698
+ height="auto"
699
+ )
700
+
701
+ # Event handlers
702
+
703
+ # Model changes
704
+ model_dropdown.change(
705
+ fn=on_model_change,
706
+ inputs=[model_dropdown],
707
+ outputs=[custom_model_path, prompt_txt2img]
708
  )
709
+
710
+ # Sync models between tabs
711
+ model_dropdown.change(
712
+ fn=lambda x: gr.update(value=x),
713
+ inputs=[model_dropdown],
714
+ outputs=[model_dropdown_img2img]
715
  )
 
716
 
717
+ model_dropdown_img2img.change(
718
+ fn=lambda x: gr.update(value=x),
719
+ inputs=[model_dropdown_img2img],
720
+ outputs=[model_dropdown]
721
+ )
722
 
723
+ # Custom model path
724
+ if custom_model:
725
+ custom_model_path.change(
726
+ fn=custom_model_changed,
727
+ inputs=[custom_model_path],
728
+ outputs=[output_info_txt2img]
729
+ )
730
+
731
+ # Generation events
732
+ generate_btn_txt2img.click(
733
+ fn=generate_image,
734
+ inputs=[
735
+ gr.State("txt2img"),
736
+ model_dropdown,
737
+ prompt_txt2img,
738
+ negative_prompt_txt2img,
739
+ width_txt2img,
740
+ height_txt2img,
741
+ guidance_scale_txt2img,
742
+ num_steps_txt2img,
743
+ seed_txt2img,
744
+ gr.State(None), # No input image for txt2img
745
+ gr.State(0.75), # Default strength
746
+ scale_method_txt2img,
747
+ scale_factor_txt2img
748
+ ],
749
+ outputs=[output_image_txt2img, output_info_txt2img]
750
+ )
751
+
752
+ generate_btn_img2img.click(
753
+ fn=generate_image,
754
+ inputs=[
755
+ gr.State("img2img"),
756
+ model_dropdown_img2img,
757
+ prompt_img2img,
758
+ negative_prompt_img2img,
759
+ width_img2img,
760
+ height_img2img,
761
+ guidance_scale_img2img,
762
+ num_steps_img2img,
763
+ seed_img2img,
764
+ input_image_img2img,
765
+ strength_img2img,
766
+ scale_method_img2img,
767
+ scale_factor_img2img
768
+ ],
769
+ outputs=[output_image_img2img, output_info_img2img]
770
+ )
771
 
772
+ # Gallery refresh
773
+ refresh_gallery_btn.click(
774
+ fn=load_example_images,
775
+ outputs=[example_gallery]
776
+ )
777
+
778
+ return demo
779
+
780
+ if __name__ == "__main__":
781
+ # Initialize
782
+ print(f"🚀 Starting {APP_TITLE}...")
783
+ print(f"⏱️ Initialization time: {time.time() - start_time:.2f}s")
784
+ print(f"🖥️ {get_system_info()}")
785
+
786
+ # Ensure output directory
787
+ os.makedirs("imgs", exist_ok=True)
788
+
789
+ # Initialize models
790
+ initialize_models()
791
+
792
+ # Create and launch interface
793
+ demo = create_interface()
794
+
795
+ # Launch settings
796
+ launch_kwargs = {
797
+ "share": False,
798
+ "server_name": "0.0.0.0",
799
+ "server_port": 7860,
800
+ "show_error": True,
801
+ }
802
+
803
+ if is_colab:
804
+ launch_kwargs["share"] = True
805
 
806
+ print("🌐 Launching WebUI...")
807
+ demo.launch(**launch_kwargs)
 
gfpgan/weights/detection_Resnet50_Final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d1de9c2944f2ccddca5f5e010ea5ae64a39845a86311af6fdf30841b0a5a16d
3
+ size 109497761
gfpgan/weights/parsing_parsenet.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d558d8d0e42c20224f13cf5a29c79eba2d59913419f945545d8cf7b72920de2
3
+ size 85331193
huggingface_hub/README.md ADDED
@@ -0,0 +1,358 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Hub Client library
2
+
3
+ ## Download files from the Hub
4
+
5
+ The `hf_hub_download()` function is the main function to download files from the Hub. One
6
+ advantage of using it is that files are cached locally, so you won't have to
7
+ download the files multiple times. If there are changes in the repository, the
8
+ files will be automatically downloaded again.
9
+
10
+
11
+ ### `hf_hub_download`
12
+
13
+ The function takes the following parameters, downloads the remote file,
14
+ stores it to disk (in a version-aware way) and returns its local file path.
15
+
16
+ Parameters:
17
+ - a `repo_id` (a user or organization name and a repo name, separated by `/`, like `julien-c/EsperBERTo-small`)
18
+ - a `filename` (like `pytorch_model.bin`)
19
+ - an optional Git revision id (can be a branch name, a tag, or a commit hash)
20
+ - a `cache_dir` which you can specify if you want to control where on disk the
21
+ files are cached.
22
+
23
+ ```python
24
+ from huggingface_hub import hf_hub_download
25
+ hf_hub_download("lysandre/arxiv-nlp", filename="config.json")
26
+ ```
27
+
28
+ ### `snapshot_download`
29
+
30
+ Using `hf_hub_download()` works well when you know which files you want to download;
31
+ for example a model file alongside a configuration file, both with static names.
32
+ There are cases in which you will prefer to download all the files of the remote
33
+ repository at a specified revision. That's what `snapshot_download()` does. It
34
+ downloads and stores a remote repository to disk (in a versioning-aware way) and
35
+ returns its local file path.
36
+
37
+ Parameters:
38
+ - a `repo_id` in the format `namespace/repository`
39
+ - a `revision` on which the repository will be downloaded
40
+ - a `cache_dir` which you can specify if you want to control where on disk the
41
+ files are cached
42
+
43
+ ### `hf_hub_url`
44
+
45
+ Internally, the library uses `hf_hub_url()` to return the URL to download the actual files:
46
+ `https://huggingface.co/julien-c/EsperBERTo-small/resolve/main/pytorch_model.bin`
47
+
48
+
49
+ Parameters:
50
+ - a `repo_id` (a user or organization name and a repo name separated by a `/`, like `julien-c/EsperBERTo-small`)
51
+ - a `filename` (like `pytorch_model.bin`)
52
+ - an optional `subfolder`, corresponding to a folder inside the model repo
53
+ - an optional `repo_type`, such as `dataset` or `space`
54
+ - an optional Git revision id (can be a branch name, a tag, or a commit hash)
55
+
56
+ If you check out this URL's headers with a `HEAD` http request (which you can do
57
+ from the command line with `curl -I`) for a few different files, you'll see
58
+ that:
59
+ - small files are returned directly
60
+ - large files (i.e. the ones stored through
61
+ [git-lfs](https://git-lfs.github.com/)) are returned via a redirect to a
62
+ Cloudfront URL. Cloudfront is a Content Delivery Network, or CDN, that ensures
63
+ that downloads are as fast as possible from anywhere on the globe.
64
+
65
+ <br>
66
+
67
+ ## Publish files to the Hub
68
+
69
+ If you've used Git before, this will be very easy since Git is used to manage
70
+ files in the Hub. You can find a step-by-step guide on how to upload your model
71
+ to the Hub: https://huggingface.co/docs/hub/adding-a-model.
72
+
73
+
74
+ ### API utilities in `hf_api.py`
75
+
76
+ You don't need them for the standard publishing workflow (ie. using git command line), however, if you need a
77
+ programmatic way of creating a repo, deleting it (`⚠️ caution`), pushing a
78
+ single file to a repo or listing models from the Hub, you'll find helpers in
79
+ `hf_api.py`. Some example functionality available with the `HfApi` class:
80
+
81
+ * `whoami()`
82
+ * `create_repo()`
83
+ * `list_repo_files()`
84
+ * `list_repo_objects()`
85
+ * `delete_repo()`
86
+ * `update_repo_visibility()`
87
+ * `create_commit()`
88
+ * `upload_file()`
89
+ * `delete_file()`
90
+ * `delete_folder()`
91
+
92
+ Those API utilities are also exposed through the `huggingface-cli` CLI:
93
+
94
+ ```bash
95
+ huggingface-cli login
96
+ huggingface-cli logout
97
+ huggingface-cli whoami
98
+ huggingface-cli repo create
99
+ ```
100
+
101
+ With the `HfApi` class there are methods to query models, datasets, and metrics by specific tags (e.g. if you want to list models compatible with your library):
102
+ - **Models**:
103
+ - `list_models()`
104
+ - `model_info()`
105
+ - `get_model_tags()`
106
+ - **Datasets**:
107
+ - `list_datasets()`
108
+ - `dataset_info()`
109
+ - `get_dataset_tags()`
110
+ - **Spaces**:
111
+ - `list_spaces()`
112
+ - `space_info()`
113
+
114
+ These lightly wrap around the API Endpoints. Documentation for valid parameters and descriptions can be found [here](https://huggingface.co/docs/hub/endpoints).
115
+
116
+
117
+ ### Advanced programmatic repository management
118
+
119
+ The `Repository` class helps manage both offline Git repositories and Hugging
120
+ Face Hub repositories. Using the `Repository` class requires `git` and `git-lfs`
121
+ to be installed.
122
+
123
+ Instantiate a `Repository` object by calling it with a path to a local Git
124
+ clone/repository:
125
+
126
+ ```python
127
+ >>> from huggingface_hub import Repository
128
+ >>> repo = Repository("<path>/<to>/<folder>")
129
+ ```
130
+
131
+ The `Repository` takes a `clone_from` string as parameter. This can stay as
132
+ `None` for offline management, but can also be set to any URL pointing to a Git
133
+ repo to clone that repository in the specified directory:
134
+
135
+ ```python
136
+ >>> repo = Repository("huggingface-hub", clone_from="https://github.com/huggingface/huggingface_hub")
137
+ ```
138
+
139
+ The `clone_from` method can also take any Hugging Face model ID as input, and
140
+ will clone that repository:
141
+
142
+ ```python
143
+ >>> repo = Repository("w2v2", clone_from="facebook/wav2vec2-large-960h-lv60")
144
+ ```
145
+
146
+ If the repository you're cloning is one of yours or one of your organisation's,
147
+ then having the ability to commit and push to that repository is important. In
148
+ order to do that, you should make sure to be logged-in using `huggingface-cli
149
+ login`, and to have the `token` parameter set to `True` (the default)
150
+ when instantiating the `Repository` object:
151
+
152
+ ```python
153
+ >>> repo = Repository("my-model", clone_from="<user>/<model_id>", token=True)
154
+ ```
155
+
156
+ This works for models, datasets and spaces repositories; but you will need to
157
+ explicitely specify the type for the last two options:
158
+
159
+ ```python
160
+ >>> repo = Repository("my-dataset", clone_from="<user>/<dataset_id>", token=True, repo_type="dataset")
161
+ ```
162
+
163
+ You can also change between branches:
164
+
165
+ ```python
166
+ >>> repo = Repository("huggingface-hub", clone_from="<user>/<dataset_id>", revision='branch1')
167
+ >>> repo.git_checkout("branch2")
168
+ ```
169
+
170
+ The `clone_from` method can also take any Hugging Face model ID as input, and
171
+ will clone that repository:
172
+
173
+ ```python
174
+ >>> repo = Repository("w2v2", clone_from="facebook/wav2vec2-large-960h-lv60")
175
+ ```
176
+
177
+ Finally, you can choose to specify the Git username and email attributed to that
178
+ clone directly by using the `git_user` and `git_email` parameters. When
179
+ committing to that repository, Git will therefore be aware of who you are and
180
+ who will be the author of the commits:
181
+
182
+ ```python
183
+ >>> repo = Repository(
184
+ ... "my-dataset",
185
+ ... clone_from="<user>/<dataset_id>",
186
+ ... token=True,
187
+ ... repo_type="dataset",
188
+ ... git_user="MyName",
189
+ ... git_email="[email protected]"
190
+ ... )
191
+ ```
192
+
193
+ The repository can be managed through this object, through wrappers of
194
+ traditional Git methods:
195
+
196
+ - `git_add(pattern: str, auto_lfs_track: bool)`. The `auto_lfs_track` flag
197
+ triggers auto tracking of large files (>10MB) with `git-lfs`
198
+ - `git_commit(commit_message: str)`
199
+ - `git_pull(rebase: bool)`
200
+ - `git_push()`
201
+ - `git_checkout(branch)`
202
+
203
+ The `git_push` method has a parameter `blocking` which is `True` by default. When set to `False`, the push will
204
+ happen behind the scenes - which can be helpful if you would like your script to continue on while the push is
205
+ happening.
206
+
207
+ LFS-tracking methods:
208
+
209
+ - `lfs_track(pattern: Union[str, List[str]], filename: bool)`. Setting
210
+ `filename` to `True` will use the `--filename` parameter, which will consider
211
+ the pattern(s) as filenames, even if they contain special glob characters.
212
+ - `lfs_untrack()`.
213
+ - `auto_track_large_files()`: automatically tracks files that are larger than
214
+ 10MB. Make sure to call this after adding files to the index.
215
+
216
+ On top of these unitary methods lie some useful additional methods:
217
+
218
+ - `push_to_hub(commit_message)`: consecutively does `git_add`, `git_commit` and
219
+ `git_push`.
220
+ - `commit(commit_message: str, track_large_files: bool)`: this is a context
221
+ manager utility that handles committing to a repository. This automatically
222
+ tracks large files (>10Mb) with `git-lfs`. The `track_large_files` argument can
223
+ be set to `False` if you wish to ignore that behavior.
224
+
225
+ These two methods also have support for the `blocking` parameter.
226
+
227
+ Examples using the `commit` context manager:
228
+ ```python
229
+ >>> with Repository("text-files", clone_from="<user>/text-files", token=True).commit("My first file :)"):
230
+ ... with open("file.txt", "w+") as f:
231
+ ... f.write(json.dumps({"hey": 8}))
232
+ ```
233
+
234
+ ```python
235
+ >>> import torch
236
+ >>> model = torch.nn.Transformer()
237
+ >>> with Repository("torch-model", clone_from="<user>/torch-model", token=True).commit("My cool model :)"):
238
+ ... torch.save(model.state_dict(), "model.pt")
239
+ ```
240
+
241
+ ### Non-blocking behavior
242
+
243
+ The pushing methods have access to a `blocking` boolean parameter to indicate whether the push should happen
244
+ asynchronously.
245
+
246
+ In order to see if the push has finished or its status code (to spot a failure), one should use the `command_queue`
247
+ property on the `Repository` object.
248
+
249
+ For example:
250
+
251
+ ```python
252
+ from huggingface_hub import Repository
253
+
254
+ repo = Repository("<local_folder>", clone_from="<user>/<model_name>")
255
+
256
+ with repo.commit("Commit message", blocking=False):
257
+ # Save data
258
+
259
+ last_command = repo.command_queue[-1]
260
+
261
+ # Status of the push command
262
+ last_command.status
263
+ # Will return the status code
264
+ # -> -1 will indicate the push is still ongoing
265
+ # -> 0 will indicate the push has completed successfully
266
+ # -> non-zero code indicates the error code if there was an error
267
+
268
+ # if there was an error, the stderr may be inspected
269
+ last_command.stderr
270
+
271
+ # Whether the command finished or if it is still ongoing
272
+ last_command.is_done
273
+
274
+ # Whether the command errored-out.
275
+ last_command.failed
276
+ ```
277
+
278
+ When using `blocking=False`, the commands will be tracked and your script will exit only when all pushes are done, even
279
+ if other errors happen in your script (a failed push counts as done).
280
+
281
+
282
+ ### Need to upload very large (>5GB) files?
283
+
284
+ To upload large files (>5GB 🔥) from git command-line, you need to install the custom transfer agent
285
+ for git-lfs, bundled in this package.
286
+
287
+ To install, just run:
288
+
289
+ ```bash
290
+ $ huggingface-cli lfs-enable-largefiles
291
+ ```
292
+
293
+ This should be executed once for each model repo that contains a model file
294
+ >5GB. If you just try to push a file bigger than 5GB without running that
295
+ command, you will get an error with a message reminding you to run it.
296
+
297
+ Finally, there's a `huggingface-cli lfs-multipart-upload` command but that one
298
+ is internal (called by lfs directly) and is not meant to be called by the user.
299
+
300
+ <br>
301
+
302
+ ## Using the Inference API wrapper
303
+
304
+ `huggingface_hub` comes with a wrapper client to make calls to the Inference
305
+ API! You can find some examples below, but we encourage you to visit the
306
+ Inference API
307
+ [documentation](https://api-inference.huggingface.co/docs/python/html/detailed_parameters.html)
308
+ to review the specific parameters for the different tasks.
309
+
310
+ When you instantiate the wrapper to the Inference API, you specify the model
311
+ repository id. The pipeline (`text-classification`, `text-to-speech`, etc) is
312
+ automatically extracted from the
313
+ [repository](https://huggingface.co/docs/hub/main#how-is-a-models-type-of-inference-api-and-widget-determined),
314
+ but you can also override it as shown below.
315
+
316
+
317
+ ### Examples
318
+
319
+ Here is a basic example of calling the Inference API for a `fill-mask` task
320
+ using the `bert-base-uncased` model. The `fill-mask` task only expects a string
321
+ (or list of strings) as input.
322
+
323
+ ```python
324
+ from huggingface_hub.inference_api import InferenceApi
325
+ inference = InferenceApi("bert-base-uncased", token=API_TOKEN)
326
+ inference(inputs="The goal of life is [MASK].")
327
+ >> [{'sequence': 'the goal of life is life.', 'score': 0.10933292657136917, 'token': 2166, 'token_str': 'life'}]
328
+ ```
329
+
330
+ This is an example of a task (`question-answering`) which requires a dictionary
331
+ as input thas has the `question` and `context` keys.
332
+
333
+ ```python
334
+ inference = InferenceApi("deepset/roberta-base-squad2", token=API_TOKEN)
335
+ inputs = {"question":"What's my name?", "context":"My name is Clara and I live in Berkeley."}
336
+ inference(inputs)
337
+ >> {'score': 0.9326569437980652, 'start': 11, 'end': 16, 'answer': 'Clara'}
338
+ ```
339
+
340
+ Some tasks might also require additional params in the request. Here is an
341
+ example using a `zero-shot-classification` model.
342
+
343
+ ```python
344
+ inference = InferenceApi("typeform/distilbert-base-uncased-mnli", token=API_TOKEN)
345
+ inputs = "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!"
346
+ params = {"candidate_labels":["refund", "legal", "faq"]}
347
+ inference(inputs, params)
348
+ >> {'sequence': 'Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!', 'labels': ['refund', 'faq', 'legal'], 'scores': [0.9378499388694763, 0.04914155602455139, 0.013008488342165947]}
349
+ ```
350
+
351
+ Finally, there are some models that might support multiple tasks. For example,
352
+ `sentence-transformers` models can do `sentence-similarity` and
353
+ `feature-extraction`. You can override the configured task when initializing the
354
+ API.
355
+
356
+ ```python
357
+ inference = InferenceApi("bert-base-uncased", task="feature-extraction", token=API_TOKEN)
358
+ ```
huggingface_hub/__init__.py ADDED
@@ -0,0 +1,968 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # ***********
16
+ # `huggingface_hub` init has 2 modes:
17
+ # - Normal usage:
18
+ # If imported to use it, all modules and functions are lazy-loaded. This means
19
+ # they exist at top level in module but are imported only the first time they are
20
+ # used. This way, `from huggingface_hub import something` will import `something`
21
+ # quickly without the hassle of importing all the features from `huggingface_hub`.
22
+ # - Static check:
23
+ # If statically analyzed, all modules and functions are loaded normally. This way
24
+ # static typing check works properly as well as autocomplete in text editors and
25
+ # IDEs.
26
+ #
27
+ # The static model imports are done inside the `if TYPE_CHECKING:` statement at
28
+ # the bottom of this file. Since module/functions imports are duplicated, it is
29
+ # mandatory to make sure to add them twice when adding one. This is checked in the
30
+ # `make quality` command.
31
+ #
32
+ # To update the static imports, please run the following command and commit the changes.
33
+ # ```
34
+ # # Use script
35
+ # python utils/check_static_imports.py --update-file
36
+ #
37
+ # # Or run style on codebase
38
+ # make style
39
+ # ```
40
+ #
41
+ # ***********
42
+ # Lazy loader vendored from https://github.com/scientific-python/lazy_loader
43
+ import importlib
44
+ import os
45
+ import sys
46
+ from typing import TYPE_CHECKING
47
+
48
+
49
+ __version__ = "0.25.2"
50
+
51
+ # Alphabetical order of definitions is ensured in tests
52
+ # WARNING: any comment added in this dictionary definition will be lost when
53
+ # re-generating the file !
54
+ _SUBMOD_ATTRS = {
55
+ "_commit_scheduler": [
56
+ "CommitScheduler",
57
+ ],
58
+ "_inference_endpoints": [
59
+ "InferenceEndpoint",
60
+ "InferenceEndpointError",
61
+ "InferenceEndpointStatus",
62
+ "InferenceEndpointTimeoutError",
63
+ "InferenceEndpointType",
64
+ ],
65
+ "_login": [
66
+ "interpreter_login",
67
+ "login",
68
+ "logout",
69
+ "notebook_login",
70
+ ],
71
+ "_multi_commits": [
72
+ "MultiCommitException",
73
+ "plan_multi_commits",
74
+ ],
75
+ "_snapshot_download": [
76
+ "snapshot_download",
77
+ ],
78
+ "_space_api": [
79
+ "SpaceHardware",
80
+ "SpaceRuntime",
81
+ "SpaceStage",
82
+ "SpaceStorage",
83
+ "SpaceVariable",
84
+ ],
85
+ "_tensorboard_logger": [
86
+ "HFSummaryWriter",
87
+ ],
88
+ "_webhooks_payload": [
89
+ "WebhookPayload",
90
+ "WebhookPayloadComment",
91
+ "WebhookPayloadDiscussion",
92
+ "WebhookPayloadDiscussionChanges",
93
+ "WebhookPayloadEvent",
94
+ "WebhookPayloadMovedTo",
95
+ "WebhookPayloadRepo",
96
+ "WebhookPayloadUrl",
97
+ "WebhookPayloadWebhook",
98
+ ],
99
+ "_webhooks_server": [
100
+ "WebhooksServer",
101
+ "webhook_endpoint",
102
+ ],
103
+ "community": [
104
+ "Discussion",
105
+ "DiscussionComment",
106
+ "DiscussionCommit",
107
+ "DiscussionEvent",
108
+ "DiscussionStatusChange",
109
+ "DiscussionTitleChange",
110
+ "DiscussionWithDetails",
111
+ ],
112
+ "constants": [
113
+ "CONFIG_NAME",
114
+ "FLAX_WEIGHTS_NAME",
115
+ "HUGGINGFACE_CO_URL_HOME",
116
+ "HUGGINGFACE_CO_URL_TEMPLATE",
117
+ "PYTORCH_WEIGHTS_NAME",
118
+ "REPO_TYPE_DATASET",
119
+ "REPO_TYPE_MODEL",
120
+ "REPO_TYPE_SPACE",
121
+ "TF2_WEIGHTS_NAME",
122
+ "TF_WEIGHTS_NAME",
123
+ ],
124
+ "fastai_utils": [
125
+ "_save_pretrained_fastai",
126
+ "from_pretrained_fastai",
127
+ "push_to_hub_fastai",
128
+ ],
129
+ "file_download": [
130
+ "HfFileMetadata",
131
+ "_CACHED_NO_EXIST",
132
+ "cached_download",
133
+ "get_hf_file_metadata",
134
+ "hf_hub_download",
135
+ "hf_hub_url",
136
+ "try_to_load_from_cache",
137
+ ],
138
+ "hf_api": [
139
+ "Collection",
140
+ "CollectionItem",
141
+ "CommitInfo",
142
+ "CommitOperation",
143
+ "CommitOperationAdd",
144
+ "CommitOperationCopy",
145
+ "CommitOperationDelete",
146
+ "DatasetInfo",
147
+ "GitCommitInfo",
148
+ "GitRefInfo",
149
+ "GitRefs",
150
+ "HfApi",
151
+ "ModelInfo",
152
+ "RepoUrl",
153
+ "SpaceInfo",
154
+ "User",
155
+ "UserLikes",
156
+ "WebhookInfo",
157
+ "WebhookWatchedItem",
158
+ "accept_access_request",
159
+ "add_collection_item",
160
+ "add_space_secret",
161
+ "add_space_variable",
162
+ "auth_check",
163
+ "cancel_access_request",
164
+ "change_discussion_status",
165
+ "comment_discussion",
166
+ "create_branch",
167
+ "create_collection",
168
+ "create_commit",
169
+ "create_commits_on_pr",
170
+ "create_discussion",
171
+ "create_inference_endpoint",
172
+ "create_pull_request",
173
+ "create_repo",
174
+ "create_tag",
175
+ "create_webhook",
176
+ "dataset_info",
177
+ "delete_branch",
178
+ "delete_collection",
179
+ "delete_collection_item",
180
+ "delete_file",
181
+ "delete_folder",
182
+ "delete_inference_endpoint",
183
+ "delete_repo",
184
+ "delete_space_secret",
185
+ "delete_space_storage",
186
+ "delete_space_variable",
187
+ "delete_tag",
188
+ "delete_webhook",
189
+ "disable_webhook",
190
+ "duplicate_space",
191
+ "edit_discussion_comment",
192
+ "enable_webhook",
193
+ "file_exists",
194
+ "get_collection",
195
+ "get_dataset_tags",
196
+ "get_discussion_details",
197
+ "get_full_repo_name",
198
+ "get_inference_endpoint",
199
+ "get_model_tags",
200
+ "get_paths_info",
201
+ "get_repo_discussions",
202
+ "get_safetensors_metadata",
203
+ "get_space_runtime",
204
+ "get_space_variables",
205
+ "get_token_permission",
206
+ "get_user_overview",
207
+ "get_webhook",
208
+ "grant_access",
209
+ "like",
210
+ "list_accepted_access_requests",
211
+ "list_collections",
212
+ "list_datasets",
213
+ "list_inference_endpoints",
214
+ "list_liked_repos",
215
+ "list_metrics",
216
+ "list_models",
217
+ "list_organization_members",
218
+ "list_pending_access_requests",
219
+ "list_rejected_access_requests",
220
+ "list_repo_commits",
221
+ "list_repo_files",
222
+ "list_repo_likers",
223
+ "list_repo_refs",
224
+ "list_repo_tree",
225
+ "list_spaces",
226
+ "list_user_followers",
227
+ "list_user_following",
228
+ "list_webhooks",
229
+ "merge_pull_request",
230
+ "model_info",
231
+ "move_repo",
232
+ "parse_safetensors_file_metadata",
233
+ "pause_inference_endpoint",
234
+ "pause_space",
235
+ "preupload_lfs_files",
236
+ "reject_access_request",
237
+ "rename_discussion",
238
+ "repo_exists",
239
+ "repo_info",
240
+ "repo_type_and_id_from_hf_id",
241
+ "request_space_hardware",
242
+ "request_space_storage",
243
+ "restart_space",
244
+ "resume_inference_endpoint",
245
+ "revision_exists",
246
+ "run_as_future",
247
+ "scale_to_zero_inference_endpoint",
248
+ "set_space_sleep_time",
249
+ "space_info",
250
+ "super_squash_history",
251
+ "unlike",
252
+ "update_collection_item",
253
+ "update_collection_metadata",
254
+ "update_inference_endpoint",
255
+ "update_repo_settings",
256
+ "update_repo_visibility",
257
+ "update_webhook",
258
+ "upload_file",
259
+ "upload_folder",
260
+ "upload_large_folder",
261
+ "whoami",
262
+ ],
263
+ "hf_file_system": [
264
+ "HfFileSystem",
265
+ "HfFileSystemFile",
266
+ "HfFileSystemResolvedPath",
267
+ "HfFileSystemStreamFile",
268
+ ],
269
+ "hub_mixin": [
270
+ "ModelHubMixin",
271
+ "PyTorchModelHubMixin",
272
+ ],
273
+ "inference._client": [
274
+ "InferenceClient",
275
+ "InferenceTimeoutError",
276
+ ],
277
+ "inference._generated._async_client": [
278
+ "AsyncInferenceClient",
279
+ ],
280
+ "inference._generated.types": [
281
+ "AudioClassificationInput",
282
+ "AudioClassificationOutputElement",
283
+ "AudioClassificationParameters",
284
+ "AudioToAudioInput",
285
+ "AudioToAudioOutputElement",
286
+ "AutomaticSpeechRecognitionGenerationParameters",
287
+ "AutomaticSpeechRecognitionInput",
288
+ "AutomaticSpeechRecognitionOutput",
289
+ "AutomaticSpeechRecognitionOutputChunk",
290
+ "AutomaticSpeechRecognitionParameters",
291
+ "ChatCompletionInput",
292
+ "ChatCompletionInputFunctionDefinition",
293
+ "ChatCompletionInputFunctionName",
294
+ "ChatCompletionInputGrammarType",
295
+ "ChatCompletionInputMessage",
296
+ "ChatCompletionInputMessageChunk",
297
+ "ChatCompletionInputTool",
298
+ "ChatCompletionInputToolTypeClass",
299
+ "ChatCompletionInputURL",
300
+ "ChatCompletionOutput",
301
+ "ChatCompletionOutputComplete",
302
+ "ChatCompletionOutputFunctionDefinition",
303
+ "ChatCompletionOutputLogprob",
304
+ "ChatCompletionOutputLogprobs",
305
+ "ChatCompletionOutputMessage",
306
+ "ChatCompletionOutputToolCall",
307
+ "ChatCompletionOutputTopLogprob",
308
+ "ChatCompletionOutputUsage",
309
+ "ChatCompletionStreamOutput",
310
+ "ChatCompletionStreamOutputChoice",
311
+ "ChatCompletionStreamOutputDelta",
312
+ "ChatCompletionStreamOutputDeltaToolCall",
313
+ "ChatCompletionStreamOutputFunction",
314
+ "ChatCompletionStreamOutputLogprob",
315
+ "ChatCompletionStreamOutputLogprobs",
316
+ "ChatCompletionStreamOutputTopLogprob",
317
+ "DepthEstimationInput",
318
+ "DepthEstimationOutput",
319
+ "DocumentQuestionAnsweringInput",
320
+ "DocumentQuestionAnsweringInputData",
321
+ "DocumentQuestionAnsweringOutputElement",
322
+ "DocumentQuestionAnsweringParameters",
323
+ "FeatureExtractionInput",
324
+ "FillMaskInput",
325
+ "FillMaskOutputElement",
326
+ "FillMaskParameters",
327
+ "ImageClassificationInput",
328
+ "ImageClassificationOutputElement",
329
+ "ImageClassificationParameters",
330
+ "ImageSegmentationInput",
331
+ "ImageSegmentationOutputElement",
332
+ "ImageSegmentationParameters",
333
+ "ImageToImageInput",
334
+ "ImageToImageOutput",
335
+ "ImageToImageParameters",
336
+ "ImageToImageTargetSize",
337
+ "ImageToTextGenerationParameters",
338
+ "ImageToTextInput",
339
+ "ImageToTextOutput",
340
+ "ImageToTextParameters",
341
+ "ObjectDetectionBoundingBox",
342
+ "ObjectDetectionInput",
343
+ "ObjectDetectionOutputElement",
344
+ "ObjectDetectionParameters",
345
+ "QuestionAnsweringInput",
346
+ "QuestionAnsweringInputData",
347
+ "QuestionAnsweringOutputElement",
348
+ "QuestionAnsweringParameters",
349
+ "SentenceSimilarityInput",
350
+ "SentenceSimilarityInputData",
351
+ "SummarizationGenerationParameters",
352
+ "SummarizationInput",
353
+ "SummarizationOutput",
354
+ "TableQuestionAnsweringInput",
355
+ "TableQuestionAnsweringInputData",
356
+ "TableQuestionAnsweringOutputElement",
357
+ "Text2TextGenerationInput",
358
+ "Text2TextGenerationOutput",
359
+ "Text2TextGenerationParameters",
360
+ "TextClassificationInput",
361
+ "TextClassificationOutputElement",
362
+ "TextClassificationParameters",
363
+ "TextGenerationInput",
364
+ "TextGenerationInputGenerateParameters",
365
+ "TextGenerationInputGrammarType",
366
+ "TextGenerationOutput",
367
+ "TextGenerationOutputBestOfSequence",
368
+ "TextGenerationOutputDetails",
369
+ "TextGenerationOutputPrefillToken",
370
+ "TextGenerationOutputToken",
371
+ "TextGenerationStreamOutput",
372
+ "TextGenerationStreamOutputStreamDetails",
373
+ "TextGenerationStreamOutputToken",
374
+ "TextToAudioGenerationParameters",
375
+ "TextToAudioInput",
376
+ "TextToAudioOutput",
377
+ "TextToAudioParameters",
378
+ "TextToImageInput",
379
+ "TextToImageOutput",
380
+ "TextToImageParameters",
381
+ "TextToImageTargetSize",
382
+ "TokenClassificationInput",
383
+ "TokenClassificationOutputElement",
384
+ "TokenClassificationParameters",
385
+ "TranslationGenerationParameters",
386
+ "TranslationInput",
387
+ "TranslationOutput",
388
+ "VideoClassificationInput",
389
+ "VideoClassificationOutputElement",
390
+ "VideoClassificationParameters",
391
+ "VisualQuestionAnsweringInput",
392
+ "VisualQuestionAnsweringInputData",
393
+ "VisualQuestionAnsweringOutputElement",
394
+ "VisualQuestionAnsweringParameters",
395
+ "ZeroShotClassificationInput",
396
+ "ZeroShotClassificationInputData",
397
+ "ZeroShotClassificationOutputElement",
398
+ "ZeroShotClassificationParameters",
399
+ "ZeroShotImageClassificationInput",
400
+ "ZeroShotImageClassificationInputData",
401
+ "ZeroShotImageClassificationOutputElement",
402
+ "ZeroShotImageClassificationParameters",
403
+ "ZeroShotObjectDetectionBoundingBox",
404
+ "ZeroShotObjectDetectionInput",
405
+ "ZeroShotObjectDetectionInputData",
406
+ "ZeroShotObjectDetectionOutputElement",
407
+ ],
408
+ "inference_api": [
409
+ "InferenceApi",
410
+ ],
411
+ "keras_mixin": [
412
+ "KerasModelHubMixin",
413
+ "from_pretrained_keras",
414
+ "push_to_hub_keras",
415
+ "save_pretrained_keras",
416
+ ],
417
+ "repocard": [
418
+ "DatasetCard",
419
+ "ModelCard",
420
+ "RepoCard",
421
+ "SpaceCard",
422
+ "metadata_eval_result",
423
+ "metadata_load",
424
+ "metadata_save",
425
+ "metadata_update",
426
+ ],
427
+ "repocard_data": [
428
+ "CardData",
429
+ "DatasetCardData",
430
+ "EvalResult",
431
+ "ModelCardData",
432
+ "SpaceCardData",
433
+ ],
434
+ "repository": [
435
+ "Repository",
436
+ ],
437
+ "serialization": [
438
+ "StateDictSplit",
439
+ "get_tf_storage_size",
440
+ "get_torch_storage_id",
441
+ "get_torch_storage_size",
442
+ "save_torch_model",
443
+ "save_torch_state_dict",
444
+ "split_state_dict_into_shards_factory",
445
+ "split_tf_state_dict_into_shards",
446
+ "split_torch_state_dict_into_shards",
447
+ ],
448
+ "utils": [
449
+ "CacheNotFound",
450
+ "CachedFileInfo",
451
+ "CachedRepoInfo",
452
+ "CachedRevisionInfo",
453
+ "CorruptedCacheException",
454
+ "DeleteCacheStrategy",
455
+ "HFCacheInfo",
456
+ "HfFolder",
457
+ "cached_assets_path",
458
+ "configure_http_backend",
459
+ "dump_environment_info",
460
+ "get_session",
461
+ "get_token",
462
+ "logging",
463
+ "scan_cache_dir",
464
+ ],
465
+ }
466
+
467
+
468
+ def _attach(package_name, submodules=None, submod_attrs=None):
469
+ """Attach lazily loaded submodules, functions, or other attributes.
470
+
471
+ Typically, modules import submodules and attributes as follows:
472
+
473
+ ```py
474
+ import mysubmodule
475
+ import anothersubmodule
476
+
477
+ from .foo import someattr
478
+ ```
479
+
480
+ The idea is to replace a package's `__getattr__`, `__dir__`, and
481
+ `__all__`, such that all imports work exactly the way they would
482
+ with normal imports, except that the import occurs upon first use.
483
+
484
+ The typical way to call this function, replacing the above imports, is:
485
+
486
+ ```python
487
+ __getattr__, __dir__, __all__ = lazy.attach(
488
+ __name__,
489
+ ['mysubmodule', 'anothersubmodule'],
490
+ {'foo': ['someattr']}
491
+ )
492
+ ```
493
+ This functionality requires Python 3.7 or higher.
494
+
495
+ Args:
496
+ package_name (`str`):
497
+ Typically use `__name__`.
498
+ submodules (`set`):
499
+ List of submodules to attach.
500
+ submod_attrs (`dict`):
501
+ Dictionary of submodule -> list of attributes / functions.
502
+ These attributes are imported as they are used.
503
+
504
+ Returns:
505
+ __getattr__, __dir__, __all__
506
+
507
+ """
508
+ if submod_attrs is None:
509
+ submod_attrs = {}
510
+
511
+ if submodules is None:
512
+ submodules = set()
513
+ else:
514
+ submodules = set(submodules)
515
+
516
+ attr_to_modules = {attr: mod for mod, attrs in submod_attrs.items() for attr in attrs}
517
+
518
+ __all__ = list(submodules | attr_to_modules.keys())
519
+
520
+ def __getattr__(name):
521
+ if name in submodules:
522
+ try:
523
+ return importlib.import_module(f"{package_name}.{name}")
524
+ except Exception as e:
525
+ print(f"Error importing {package_name}.{name}: {e}")
526
+ raise
527
+ elif name in attr_to_modules:
528
+ submod_path = f"{package_name}.{attr_to_modules[name]}"
529
+ try:
530
+ submod = importlib.import_module(submod_path)
531
+ except Exception as e:
532
+ print(f"Error importing {submod_path}: {e}")
533
+ raise
534
+ attr = getattr(submod, name)
535
+
536
+ # If the attribute lives in a file (module) with the same
537
+ # name as the attribute, ensure that the attribute and *not*
538
+ # the module is accessible on the package.
539
+ if name == attr_to_modules[name]:
540
+ pkg = sys.modules[package_name]
541
+ pkg.__dict__[name] = attr
542
+
543
+ return attr
544
+ else:
545
+ raise AttributeError(f"No {package_name} attribute {name}")
546
+
547
+ def __dir__():
548
+ return __all__
549
+
550
+ return __getattr__, __dir__, list(__all__)
551
+
552
+
553
+ __getattr__, __dir__, __all__ = _attach(__name__, submodules=[], submod_attrs=_SUBMOD_ATTRS)
554
+
555
+ if os.environ.get("EAGER_IMPORT", ""):
556
+ for attr in __all__:
557
+ __getattr__(attr)
558
+
559
+ # WARNING: any content below this statement is generated automatically. Any manual edit
560
+ # will be lost when re-generating this file !
561
+ #
562
+ # To update the static imports, please run the following command and commit the changes.
563
+ # ```
564
+ # # Use script
565
+ # python utils/check_static_imports.py --update-file
566
+ #
567
+ # # Or run style on codebase
568
+ # make style
569
+ # ```
570
+ if TYPE_CHECKING: # pragma: no cover
571
+ from ._commit_scheduler import CommitScheduler # noqa: F401
572
+ from ._inference_endpoints import (
573
+ InferenceEndpoint, # noqa: F401
574
+ InferenceEndpointError, # noqa: F401
575
+ InferenceEndpointStatus, # noqa: F401
576
+ InferenceEndpointTimeoutError, # noqa: F401
577
+ InferenceEndpointType, # noqa: F401
578
+ )
579
+ from ._login import (
580
+ interpreter_login, # noqa: F401
581
+ login, # noqa: F401
582
+ logout, # noqa: F401
583
+ notebook_login, # noqa: F401
584
+ )
585
+ from ._multi_commits import (
586
+ MultiCommitException, # noqa: F401
587
+ plan_multi_commits, # noqa: F401
588
+ )
589
+ from ._snapshot_download import snapshot_download # noqa: F401
590
+ from ._space_api import (
591
+ SpaceHardware, # noqa: F401
592
+ SpaceRuntime, # noqa: F401
593
+ SpaceStage, # noqa: F401
594
+ SpaceStorage, # noqa: F401
595
+ SpaceVariable, # noqa: F401
596
+ )
597
+ from ._tensorboard_logger import HFSummaryWriter # noqa: F401
598
+ from ._webhooks_payload import (
599
+ WebhookPayload, # noqa: F401
600
+ WebhookPayloadComment, # noqa: F401
601
+ WebhookPayloadDiscussion, # noqa: F401
602
+ WebhookPayloadDiscussionChanges, # noqa: F401
603
+ WebhookPayloadEvent, # noqa: F401
604
+ WebhookPayloadMovedTo, # noqa: F401
605
+ WebhookPayloadRepo, # noqa: F401
606
+ WebhookPayloadUrl, # noqa: F401
607
+ WebhookPayloadWebhook, # noqa: F401
608
+ )
609
+ from ._webhooks_server import (
610
+ WebhooksServer, # noqa: F401
611
+ webhook_endpoint, # noqa: F401
612
+ )
613
+ from .community import (
614
+ Discussion, # noqa: F401
615
+ DiscussionComment, # noqa: F401
616
+ DiscussionCommit, # noqa: F401
617
+ DiscussionEvent, # noqa: F401
618
+ DiscussionStatusChange, # noqa: F401
619
+ DiscussionTitleChange, # noqa: F401
620
+ DiscussionWithDetails, # noqa: F401
621
+ )
622
+ from .constants import (
623
+ CONFIG_NAME, # noqa: F401
624
+ FLAX_WEIGHTS_NAME, # noqa: F401
625
+ HUGGINGFACE_CO_URL_HOME, # noqa: F401
626
+ HUGGINGFACE_CO_URL_TEMPLATE, # noqa: F401
627
+ PYTORCH_WEIGHTS_NAME, # noqa: F401
628
+ REPO_TYPE_DATASET, # noqa: F401
629
+ REPO_TYPE_MODEL, # noqa: F401
630
+ REPO_TYPE_SPACE, # noqa: F401
631
+ TF2_WEIGHTS_NAME, # noqa: F401
632
+ TF_WEIGHTS_NAME, # noqa: F401
633
+ )
634
+ from .fastai_utils import (
635
+ _save_pretrained_fastai, # noqa: F401
636
+ from_pretrained_fastai, # noqa: F401
637
+ push_to_hub_fastai, # noqa: F401
638
+ )
639
+ from .file_download import (
640
+ _CACHED_NO_EXIST, # noqa: F401
641
+ HfFileMetadata, # noqa: F401
642
+ cached_download, # noqa: F401
643
+ get_hf_file_metadata, # noqa: F401
644
+ hf_hub_download, # noqa: F401
645
+ hf_hub_url, # noqa: F401
646
+ try_to_load_from_cache, # noqa: F401
647
+ )
648
+ from .hf_api import (
649
+ Collection, # noqa: F401
650
+ CollectionItem, # noqa: F401
651
+ CommitInfo, # noqa: F401
652
+ CommitOperation, # noqa: F401
653
+ CommitOperationAdd, # noqa: F401
654
+ CommitOperationCopy, # noqa: F401
655
+ CommitOperationDelete, # noqa: F401
656
+ DatasetInfo, # noqa: F401
657
+ GitCommitInfo, # noqa: F401
658
+ GitRefInfo, # noqa: F401
659
+ GitRefs, # noqa: F401
660
+ HfApi, # noqa: F401
661
+ ModelInfo, # noqa: F401
662
+ RepoUrl, # noqa: F401
663
+ SpaceInfo, # noqa: F401
664
+ User, # noqa: F401
665
+ UserLikes, # noqa: F401
666
+ WebhookInfo, # noqa: F401
667
+ WebhookWatchedItem, # noqa: F401
668
+ accept_access_request, # noqa: F401
669
+ add_collection_item, # noqa: F401
670
+ add_space_secret, # noqa: F401
671
+ add_space_variable, # noqa: F401
672
+ auth_check, # noqa: F401
673
+ cancel_access_request, # noqa: F401
674
+ change_discussion_status, # noqa: F401
675
+ comment_discussion, # noqa: F401
676
+ create_branch, # noqa: F401
677
+ create_collection, # noqa: F401
678
+ create_commit, # noqa: F401
679
+ create_commits_on_pr, # noqa: F401
680
+ create_discussion, # noqa: F401
681
+ create_inference_endpoint, # noqa: F401
682
+ create_pull_request, # noqa: F401
683
+ create_repo, # noqa: F401
684
+ create_tag, # noqa: F401
685
+ create_webhook, # noqa: F401
686
+ dataset_info, # noqa: F401
687
+ delete_branch, # noqa: F401
688
+ delete_collection, # noqa: F401
689
+ delete_collection_item, # noqa: F401
690
+ delete_file, # noqa: F401
691
+ delete_folder, # noqa: F401
692
+ delete_inference_endpoint, # noqa: F401
693
+ delete_repo, # noqa: F401
694
+ delete_space_secret, # noqa: F401
695
+ delete_space_storage, # noqa: F401
696
+ delete_space_variable, # noqa: F401
697
+ delete_tag, # noqa: F401
698
+ delete_webhook, # noqa: F401
699
+ disable_webhook, # noqa: F401
700
+ duplicate_space, # noqa: F401
701
+ edit_discussion_comment, # noqa: F401
702
+ enable_webhook, # noqa: F401
703
+ file_exists, # noqa: F401
704
+ get_collection, # noqa: F401
705
+ get_dataset_tags, # noqa: F401
706
+ get_discussion_details, # noqa: F401
707
+ get_full_repo_name, # noqa: F401
708
+ get_inference_endpoint, # noqa: F401
709
+ get_model_tags, # noqa: F401
710
+ get_paths_info, # noqa: F401
711
+ get_repo_discussions, # noqa: F401
712
+ get_safetensors_metadata, # noqa: F401
713
+ get_space_runtime, # noqa: F401
714
+ get_space_variables, # noqa: F401
715
+ get_token_permission, # noqa: F401
716
+ get_user_overview, # noqa: F401
717
+ get_webhook, # noqa: F401
718
+ grant_access, # noqa: F401
719
+ like, # noqa: F401
720
+ list_accepted_access_requests, # noqa: F401
721
+ list_collections, # noqa: F401
722
+ list_datasets, # noqa: F401
723
+ list_inference_endpoints, # noqa: F401
724
+ list_liked_repos, # noqa: F401
725
+ list_metrics, # noqa: F401
726
+ list_models, # noqa: F401
727
+ list_organization_members, # noqa: F401
728
+ list_pending_access_requests, # noqa: F401
729
+ list_rejected_access_requests, # noqa: F401
730
+ list_repo_commits, # noqa: F401
731
+ list_repo_files, # noqa: F401
732
+ list_repo_likers, # noqa: F401
733
+ list_repo_refs, # noqa: F401
734
+ list_repo_tree, # noqa: F401
735
+ list_spaces, # noqa: F401
736
+ list_user_followers, # noqa: F401
737
+ list_user_following, # noqa: F401
738
+ list_webhooks, # noqa: F401
739
+ merge_pull_request, # noqa: F401
740
+ model_info, # noqa: F401
741
+ move_repo, # noqa: F401
742
+ parse_safetensors_file_metadata, # noqa: F401
743
+ pause_inference_endpoint, # noqa: F401
744
+ pause_space, # noqa: F401
745
+ preupload_lfs_files, # noqa: F401
746
+ reject_access_request, # noqa: F401
747
+ rename_discussion, # noqa: F401
748
+ repo_exists, # noqa: F401
749
+ repo_info, # noqa: F401
750
+ repo_type_and_id_from_hf_id, # noqa: F401
751
+ request_space_hardware, # noqa: F401
752
+ request_space_storage, # noqa: F401
753
+ restart_space, # noqa: F401
754
+ resume_inference_endpoint, # noqa: F401
755
+ revision_exists, # noqa: F401
756
+ run_as_future, # noqa: F401
757
+ scale_to_zero_inference_endpoint, # noqa: F401
758
+ set_space_sleep_time, # noqa: F401
759
+ space_info, # noqa: F401
760
+ super_squash_history, # noqa: F401
761
+ unlike, # noqa: F401
762
+ update_collection_item, # noqa: F401
763
+ update_collection_metadata, # noqa: F401
764
+ update_inference_endpoint, # noqa: F401
765
+ update_repo_settings, # noqa: F401
766
+ update_repo_visibility, # noqa: F401
767
+ update_webhook, # noqa: F401
768
+ upload_file, # noqa: F401
769
+ upload_folder, # noqa: F401
770
+ upload_large_folder, # noqa: F401
771
+ whoami, # noqa: F401
772
+ )
773
+ from .hf_file_system import (
774
+ HfFileSystem, # noqa: F401
775
+ HfFileSystemFile, # noqa: F401
776
+ HfFileSystemResolvedPath, # noqa: F401
777
+ HfFileSystemStreamFile, # noqa: F401
778
+ )
779
+ from .hub_mixin import (
780
+ ModelHubMixin, # noqa: F401
781
+ PyTorchModelHubMixin, # noqa: F401
782
+ )
783
+ from .inference._client import (
784
+ InferenceClient, # noqa: F401
785
+ InferenceTimeoutError, # noqa: F401
786
+ )
787
+ from .inference._generated._async_client import AsyncInferenceClient # noqa: F401
788
+ from .inference._generated.types import (
789
+ AudioClassificationInput, # noqa: F401
790
+ AudioClassificationOutputElement, # noqa: F401
791
+ AudioClassificationParameters, # noqa: F401
792
+ AudioToAudioInput, # noqa: F401
793
+ AudioToAudioOutputElement, # noqa: F401
794
+ AutomaticSpeechRecognitionGenerationParameters, # noqa: F401
795
+ AutomaticSpeechRecognitionInput, # noqa: F401
796
+ AutomaticSpeechRecognitionOutput, # noqa: F401
797
+ AutomaticSpeechRecognitionOutputChunk, # noqa: F401
798
+ AutomaticSpeechRecognitionParameters, # noqa: F401
799
+ ChatCompletionInput, # noqa: F401
800
+ ChatCompletionInputFunctionDefinition, # noqa: F401
801
+ ChatCompletionInputFunctionName, # noqa: F401
802
+ ChatCompletionInputGrammarType, # noqa: F401
803
+ ChatCompletionInputMessage, # noqa: F401
804
+ ChatCompletionInputMessageChunk, # noqa: F401
805
+ ChatCompletionInputTool, # noqa: F401
806
+ ChatCompletionInputToolTypeClass, # noqa: F401
807
+ ChatCompletionInputURL, # noqa: F401
808
+ ChatCompletionOutput, # noqa: F401
809
+ ChatCompletionOutputComplete, # noqa: F401
810
+ ChatCompletionOutputFunctionDefinition, # noqa: F401
811
+ ChatCompletionOutputLogprob, # noqa: F401
812
+ ChatCompletionOutputLogprobs, # noqa: F401
813
+ ChatCompletionOutputMessage, # noqa: F401
814
+ ChatCompletionOutputToolCall, # noqa: F401
815
+ ChatCompletionOutputTopLogprob, # noqa: F401
816
+ ChatCompletionOutputUsage, # noqa: F401
817
+ ChatCompletionStreamOutput, # noqa: F401
818
+ ChatCompletionStreamOutputChoice, # noqa: F401
819
+ ChatCompletionStreamOutputDelta, # noqa: F401
820
+ ChatCompletionStreamOutputDeltaToolCall, # noqa: F401
821
+ ChatCompletionStreamOutputFunction, # noqa: F401
822
+ ChatCompletionStreamOutputLogprob, # noqa: F401
823
+ ChatCompletionStreamOutputLogprobs, # noqa: F401
824
+ ChatCompletionStreamOutputTopLogprob, # noqa: F401
825
+ DepthEstimationInput, # noqa: F401
826
+ DepthEstimationOutput, # noqa: F401
827
+ DocumentQuestionAnsweringInput, # noqa: F401
828
+ DocumentQuestionAnsweringInputData, # noqa: F401
829
+ DocumentQuestionAnsweringOutputElement, # noqa: F401
830
+ DocumentQuestionAnsweringParameters, # noqa: F401
831
+ FeatureExtractionInput, # noqa: F401
832
+ FillMaskInput, # noqa: F401
833
+ FillMaskOutputElement, # noqa: F401
834
+ FillMaskParameters, # noqa: F401
835
+ ImageClassificationInput, # noqa: F401
836
+ ImageClassificationOutputElement, # noqa: F401
837
+ ImageClassificationParameters, # noqa: F401
838
+ ImageSegmentationInput, # noqa: F401
839
+ ImageSegmentationOutputElement, # noqa: F401
840
+ ImageSegmentationParameters, # noqa: F401
841
+ ImageToImageInput, # noqa: F401
842
+ ImageToImageOutput, # noqa: F401
843
+ ImageToImageParameters, # noqa: F401
844
+ ImageToImageTargetSize, # noqa: F401
845
+ ImageToTextGenerationParameters, # noqa: F401
846
+ ImageToTextInput, # noqa: F401
847
+ ImageToTextOutput, # noqa: F401
848
+ ImageToTextParameters, # noqa: F401
849
+ ObjectDetectionBoundingBox, # noqa: F401
850
+ ObjectDetectionInput, # noqa: F401
851
+ ObjectDetectionOutputElement, # noqa: F401
852
+ ObjectDetectionParameters, # noqa: F401
853
+ QuestionAnsweringInput, # noqa: F401
854
+ QuestionAnsweringInputData, # noqa: F401
855
+ QuestionAnsweringOutputElement, # noqa: F401
856
+ QuestionAnsweringParameters, # noqa: F401
857
+ SentenceSimilarityInput, # noqa: F401
858
+ SentenceSimilarityInputData, # noqa: F401
859
+ SummarizationGenerationParameters, # noqa: F401
860
+ SummarizationInput, # noqa: F401
861
+ SummarizationOutput, # noqa: F401
862
+ TableQuestionAnsweringInput, # noqa: F401
863
+ TableQuestionAnsweringInputData, # noqa: F401
864
+ TableQuestionAnsweringOutputElement, # noqa: F401
865
+ Text2TextGenerationInput, # noqa: F401
866
+ Text2TextGenerationOutput, # noqa: F401
867
+ Text2TextGenerationParameters, # noqa: F401
868
+ TextClassificationInput, # noqa: F401
869
+ TextClassificationOutputElement, # noqa: F401
870
+ TextClassificationParameters, # noqa: F401
871
+ TextGenerationInput, # noqa: F401
872
+ TextGenerationInputGenerateParameters, # noqa: F401
873
+ TextGenerationInputGrammarType, # noqa: F401
874
+ TextGenerationOutput, # noqa: F401
875
+ TextGenerationOutputBestOfSequence, # noqa: F401
876
+ TextGenerationOutputDetails, # noqa: F401
877
+ TextGenerationOutputPrefillToken, # noqa: F401
878
+ TextGenerationOutputToken, # noqa: F401
879
+ TextGenerationStreamOutput, # noqa: F401
880
+ TextGenerationStreamOutputStreamDetails, # noqa: F401
881
+ TextGenerationStreamOutputToken, # noqa: F401
882
+ TextToAudioGenerationParameters, # noqa: F401
883
+ TextToAudioInput, # noqa: F401
884
+ TextToAudioOutput, # noqa: F401
885
+ TextToAudioParameters, # noqa: F401
886
+ TextToImageInput, # noqa: F401
887
+ TextToImageOutput, # noqa: F401
888
+ TextToImageParameters, # noqa: F401
889
+ TextToImageTargetSize, # noqa: F401
890
+ TokenClassificationInput, # noqa: F401
891
+ TokenClassificationOutputElement, # noqa: F401
892
+ TokenClassificationParameters, # noqa: F401
893
+ TranslationGenerationParameters, # noqa: F401
894
+ TranslationInput, # noqa: F401
895
+ TranslationOutput, # noqa: F401
896
+ VideoClassificationInput, # noqa: F401
897
+ VideoClassificationOutputElement, # noqa: F401
898
+ VideoClassificationParameters, # noqa: F401
899
+ VisualQuestionAnsweringInput, # noqa: F401
900
+ VisualQuestionAnsweringInputData, # noqa: F401
901
+ VisualQuestionAnsweringOutputElement, # noqa: F401
902
+ VisualQuestionAnsweringParameters, # noqa: F401
903
+ ZeroShotClassificationInput, # noqa: F401
904
+ ZeroShotClassificationInputData, # noqa: F401
905
+ ZeroShotClassificationOutputElement, # noqa: F401
906
+ ZeroShotClassificationParameters, # noqa: F401
907
+ ZeroShotImageClassificationInput, # noqa: F401
908
+ ZeroShotImageClassificationInputData, # noqa: F401
909
+ ZeroShotImageClassificationOutputElement, # noqa: F401
910
+ ZeroShotImageClassificationParameters, # noqa: F401
911
+ ZeroShotObjectDetectionBoundingBox, # noqa: F401
912
+ ZeroShotObjectDetectionInput, # noqa: F401
913
+ ZeroShotObjectDetectionInputData, # noqa: F401
914
+ ZeroShotObjectDetectionOutputElement, # noqa: F401
915
+ )
916
+ from .inference_api import InferenceApi # noqa: F401
917
+ from .keras_mixin import (
918
+ KerasModelHubMixin, # noqa: F401
919
+ from_pretrained_keras, # noqa: F401
920
+ push_to_hub_keras, # noqa: F401
921
+ save_pretrained_keras, # noqa: F401
922
+ )
923
+ from .repocard import (
924
+ DatasetCard, # noqa: F401
925
+ ModelCard, # noqa: F401
926
+ RepoCard, # noqa: F401
927
+ SpaceCard, # noqa: F401
928
+ metadata_eval_result, # noqa: F401
929
+ metadata_load, # noqa: F401
930
+ metadata_save, # noqa: F401
931
+ metadata_update, # noqa: F401
932
+ )
933
+ from .repocard_data import (
934
+ CardData, # noqa: F401
935
+ DatasetCardData, # noqa: F401
936
+ EvalResult, # noqa: F401
937
+ ModelCardData, # noqa: F401
938
+ SpaceCardData, # noqa: F401
939
+ )
940
+ from .repository import Repository # noqa: F401
941
+ from .serialization import (
942
+ StateDictSplit, # noqa: F401
943
+ get_tf_storage_size, # noqa: F401
944
+ get_torch_storage_id, # noqa: F401
945
+ get_torch_storage_size, # noqa: F401
946
+ save_torch_model, # noqa: F401
947
+ save_torch_state_dict, # noqa: F401
948
+ split_state_dict_into_shards_factory, # noqa: F401
949
+ split_tf_state_dict_into_shards, # noqa: F401
950
+ split_torch_state_dict_into_shards, # noqa: F401
951
+ )
952
+ from .utils import (
953
+ CachedFileInfo, # noqa: F401
954
+ CachedRepoInfo, # noqa: F401
955
+ CachedRevisionInfo, # noqa: F401
956
+ CacheNotFound, # noqa: F401
957
+ CorruptedCacheException, # noqa: F401
958
+ DeleteCacheStrategy, # noqa: F401
959
+ HFCacheInfo, # noqa: F401
960
+ HfFolder, # noqa: F401
961
+ cached_assets_path, # noqa: F401
962
+ configure_http_backend, # noqa: F401
963
+ dump_environment_info, # noqa: F401
964
+ get_session, # noqa: F401
965
+ get_token, # noqa: F401
966
+ logging, # noqa: F401
967
+ scan_cache_dir, # noqa: F401
968
+ )
huggingface_hub/_commit_api.py ADDED
@@ -0,0 +1,729 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Type definitions and utilities for the `create_commit` API
3
+ """
4
+
5
+ import base64
6
+ import io
7
+ import os
8
+ import warnings
9
+ from collections import defaultdict
10
+ from contextlib import contextmanager
11
+ from dataclasses import dataclass, field
12
+ from itertools import groupby
13
+ from pathlib import Path, PurePosixPath
14
+ from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Iterable, Iterator, List, Literal, Optional, Tuple, Union
15
+
16
+ from tqdm.contrib.concurrent import thread_map
17
+
18
+ from . import constants
19
+ from .errors import EntryNotFoundError
20
+ from .file_download import hf_hub_url
21
+ from .lfs import UploadInfo, lfs_upload, post_lfs_batch_info
22
+ from .utils import (
23
+ FORBIDDEN_FOLDERS,
24
+ chunk_iterable,
25
+ get_session,
26
+ hf_raise_for_status,
27
+ logging,
28
+ sha,
29
+ tqdm_stream_file,
30
+ validate_hf_hub_args,
31
+ )
32
+ from .utils import tqdm as hf_tqdm
33
+
34
+
35
+ if TYPE_CHECKING:
36
+ pass
37
+
38
+
39
+ logger = logging.get_logger(__name__)
40
+
41
+
42
+ UploadMode = Literal["lfs", "regular"]
43
+
44
+ # Max is 1,000 per request on the Hub for HfApi.get_paths_info
45
+ # Otherwise we get:
46
+ # HfHubHTTPError: 413 Client Error: Payload Too Large for url: https://huggingface.co/api/datasets/xxx (Request ID: xxx)\n\ntoo many parameters
47
+ # See https://github.com/huggingface/huggingface_hub/issues/1503
48
+ FETCH_LFS_BATCH_SIZE = 500
49
+
50
+
51
+ @dataclass
52
+ class CommitOperationDelete:
53
+ """
54
+ Data structure holding necessary info to delete a file or a folder from a repository
55
+ on the Hub.
56
+
57
+ Args:
58
+ path_in_repo (`str`):
59
+ Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
60
+ for a file or `"checkpoints/1fec34a/"` for a folder.
61
+ is_folder (`bool` or `Literal["auto"]`, *optional*)
62
+ Whether the Delete Operation applies to a folder or not. If "auto", the path
63
+ type (file or folder) is guessed automatically by looking if path ends with
64
+ a "/" (folder) or not (file). To explicitly set the path type, you can set
65
+ `is_folder=True` or `is_folder=False`.
66
+ """
67
+
68
+ path_in_repo: str
69
+ is_folder: Union[bool, Literal["auto"]] = "auto"
70
+
71
+ def __post_init__(self):
72
+ self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
73
+
74
+ if self.is_folder == "auto":
75
+ self.is_folder = self.path_in_repo.endswith("/")
76
+ if not isinstance(self.is_folder, bool):
77
+ raise ValueError(
78
+ f"Wrong value for `is_folder`. Must be one of [`True`, `False`, `'auto'`]. Got '{self.is_folder}'."
79
+ )
80
+
81
+
82
+ @dataclass
83
+ class CommitOperationCopy:
84
+ """
85
+ Data structure holding necessary info to copy a file in a repository on the Hub.
86
+
87
+ Limitations:
88
+ - Only LFS files can be copied. To copy a regular file, you need to download it locally and re-upload it
89
+ - Cross-repository copies are not supported.
90
+
91
+ Note: you can combine a [`CommitOperationCopy`] and a [`CommitOperationDelete`] to rename an LFS file on the Hub.
92
+
93
+ Args:
94
+ src_path_in_repo (`str`):
95
+ Relative filepath in the repo of the file to be copied, e.g. `"checkpoints/1fec34a/weights.bin"`.
96
+ path_in_repo (`str`):
97
+ Relative filepath in the repo where to copy the file, e.g. `"checkpoints/1fec34a/weights_copy.bin"`.
98
+ src_revision (`str`, *optional*):
99
+ The git revision of the file to be copied. Can be any valid git revision.
100
+ Default to the target commit revision.
101
+ """
102
+
103
+ src_path_in_repo: str
104
+ path_in_repo: str
105
+ src_revision: Optional[str] = None
106
+
107
+ def __post_init__(self):
108
+ self.src_path_in_repo = _validate_path_in_repo(self.src_path_in_repo)
109
+ self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
110
+
111
+
112
+ @dataclass
113
+ class CommitOperationAdd:
114
+ """
115
+ Data structure holding necessary info to upload a file to a repository on the Hub.
116
+
117
+ Args:
118
+ path_in_repo (`str`):
119
+ Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
120
+ path_or_fileobj (`str`, `Path`, `bytes`, or `BinaryIO`):
121
+ Either:
122
+ - a path to a local file (as `str` or `pathlib.Path`) to upload
123
+ - a buffer of bytes (`bytes`) holding the content of the file to upload
124
+ - a "file object" (subclass of `io.BufferedIOBase`), typically obtained
125
+ with `open(path, "rb")`. It must support `seek()` and `tell()` methods.
126
+
127
+ Raises:
128
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
129
+ If `path_or_fileobj` is not one of `str`, `Path`, `bytes` or `io.BufferedIOBase`.
130
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
131
+ If `path_or_fileobj` is a `str` or `Path` but not a path to an existing file.
132
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
133
+ If `path_or_fileobj` is a `io.BufferedIOBase` but it doesn't support both
134
+ `seek()` and `tell()`.
135
+ """
136
+
137
+ path_in_repo: str
138
+ path_or_fileobj: Union[str, Path, bytes, BinaryIO]
139
+ upload_info: UploadInfo = field(init=False, repr=False)
140
+
141
+ # Internal attributes
142
+
143
+ # set to "lfs" or "regular" once known
144
+ _upload_mode: Optional[UploadMode] = field(init=False, repr=False, default=None)
145
+
146
+ # set to True if .gitignore rules prevent the file from being uploaded as LFS
147
+ # (server-side check)
148
+ _should_ignore: Optional[bool] = field(init=False, repr=False, default=None)
149
+
150
+ # set to the remote OID of the file if it has already been uploaded
151
+ # useful to determine if a commit will be empty or not
152
+ _remote_oid: Optional[str] = field(init=False, repr=False, default=None)
153
+
154
+ # set to True once the file has been uploaded as LFS
155
+ _is_uploaded: bool = field(init=False, repr=False, default=False)
156
+
157
+ # set to True once the file has been committed
158
+ _is_committed: bool = field(init=False, repr=False, default=False)
159
+
160
+ def __post_init__(self) -> None:
161
+ """Validates `path_or_fileobj` and compute `upload_info`."""
162
+ self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
163
+
164
+ # Validate `path_or_fileobj` value
165
+ if isinstance(self.path_or_fileobj, Path):
166
+ self.path_or_fileobj = str(self.path_or_fileobj)
167
+ if isinstance(self.path_or_fileobj, str):
168
+ path_or_fileobj = os.path.normpath(os.path.expanduser(self.path_or_fileobj))
169
+ if not os.path.isfile(path_or_fileobj):
170
+ raise ValueError(f"Provided path: '{path_or_fileobj}' is not a file on the local file system")
171
+ elif not isinstance(self.path_or_fileobj, (io.BufferedIOBase, bytes)):
172
+ # ^^ Inspired from: https://stackoverflow.com/questions/44584829/how-to-determine-if-file-is-opened-in-binary-or-text-mode
173
+ raise ValueError(
174
+ "path_or_fileobj must be either an instance of str, bytes or"
175
+ " io.BufferedIOBase. If you passed a file-like object, make sure it is"
176
+ " in binary mode."
177
+ )
178
+ if isinstance(self.path_or_fileobj, io.BufferedIOBase):
179
+ try:
180
+ self.path_or_fileobj.tell()
181
+ self.path_or_fileobj.seek(0, os.SEEK_CUR)
182
+ except (OSError, AttributeError) as exc:
183
+ raise ValueError(
184
+ "path_or_fileobj is a file-like object but does not implement seek() and tell()"
185
+ ) from exc
186
+
187
+ # Compute "upload_info" attribute
188
+ if isinstance(self.path_or_fileobj, str):
189
+ self.upload_info = UploadInfo.from_path(self.path_or_fileobj)
190
+ elif isinstance(self.path_or_fileobj, bytes):
191
+ self.upload_info = UploadInfo.from_bytes(self.path_or_fileobj)
192
+ else:
193
+ self.upload_info = UploadInfo.from_fileobj(self.path_or_fileobj)
194
+
195
+ @contextmanager
196
+ def as_file(self, with_tqdm: bool = False) -> Iterator[BinaryIO]:
197
+ """
198
+ A context manager that yields a file-like object allowing to read the underlying
199
+ data behind `path_or_fileobj`.
200
+
201
+ Args:
202
+ with_tqdm (`bool`, *optional*, defaults to `False`):
203
+ If True, iterating over the file object will display a progress bar. Only
204
+ works if the file-like object is a path to a file. Pure bytes and buffers
205
+ are not supported.
206
+
207
+ Example:
208
+
209
+ ```python
210
+ >>> operation = CommitOperationAdd(
211
+ ... path_in_repo="remote/dir/weights.h5",
212
+ ... path_or_fileobj="./local/weights.h5",
213
+ ... )
214
+ CommitOperationAdd(path_in_repo='remote/dir/weights.h5', path_or_fileobj='./local/weights.h5')
215
+
216
+ >>> with operation.as_file() as file:
217
+ ... content = file.read()
218
+
219
+ >>> with operation.as_file(with_tqdm=True) as file:
220
+ ... while True:
221
+ ... data = file.read(1024)
222
+ ... if not data:
223
+ ... break
224
+ config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
225
+
226
+ >>> with operation.as_file(with_tqdm=True) as file:
227
+ ... requests.put(..., data=file)
228
+ config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
229
+ ```
230
+ """
231
+ if isinstance(self.path_or_fileobj, str) or isinstance(self.path_or_fileobj, Path):
232
+ if with_tqdm:
233
+ with tqdm_stream_file(self.path_or_fileobj) as file:
234
+ yield file
235
+ else:
236
+ with open(self.path_or_fileobj, "rb") as file:
237
+ yield file
238
+ elif isinstance(self.path_or_fileobj, bytes):
239
+ yield io.BytesIO(self.path_or_fileobj)
240
+ elif isinstance(self.path_or_fileobj, io.BufferedIOBase):
241
+ prev_pos = self.path_or_fileobj.tell()
242
+ yield self.path_or_fileobj
243
+ self.path_or_fileobj.seek(prev_pos, io.SEEK_SET)
244
+
245
+ def b64content(self) -> bytes:
246
+ """
247
+ The base64-encoded content of `path_or_fileobj`
248
+
249
+ Returns: `bytes`
250
+ """
251
+ with self.as_file() as file:
252
+ return base64.b64encode(file.read())
253
+
254
+ @property
255
+ def _local_oid(self) -> Optional[str]:
256
+ """Return the OID of the local file.
257
+
258
+ This OID is then compared to `self._remote_oid` to check if the file has changed compared to the remote one.
259
+ If the file did not change, we won't upload it again to prevent empty commits.
260
+
261
+ For LFS files, the OID corresponds to the SHA256 of the file content (used a LFS ref).
262
+ For regular files, the OID corresponds to the SHA1 of the file content.
263
+ Note: this is slightly different to git OID computation since the oid of an LFS file is usually the git-SHA1 of the
264
+ pointer file content (not the actual file content). However, using the SHA256 is enough to detect changes
265
+ and more convenient client-side.
266
+ """
267
+ if self._upload_mode is None:
268
+ return None
269
+ elif self._upload_mode == "lfs":
270
+ return self.upload_info.sha256.hex()
271
+ else:
272
+ # Regular file => compute sha1
273
+ # => no need to read by chunk since the file is guaranteed to be <=5MB.
274
+ with self.as_file() as file:
275
+ return sha.git_hash(file.read())
276
+
277
+
278
+ def _validate_path_in_repo(path_in_repo: str) -> str:
279
+ # Validate `path_in_repo` value to prevent a server-side issue
280
+ if path_in_repo.startswith("/"):
281
+ path_in_repo = path_in_repo[1:]
282
+ if path_in_repo == "." or path_in_repo == ".." or path_in_repo.startswith("../"):
283
+ raise ValueError(f"Invalid `path_in_repo` in CommitOperation: '{path_in_repo}'")
284
+ if path_in_repo.startswith("./"):
285
+ path_in_repo = path_in_repo[2:]
286
+ for forbidden in FORBIDDEN_FOLDERS:
287
+ if any(part == forbidden for part in path_in_repo.split("/")):
288
+ raise ValueError(
289
+ f"Invalid `path_in_repo` in CommitOperation: cannot update files under a '{forbidden}/' folder (path:"
290
+ f" '{path_in_repo}')."
291
+ )
292
+ return path_in_repo
293
+
294
+
295
+ CommitOperation = Union[CommitOperationAdd, CommitOperationCopy, CommitOperationDelete]
296
+
297
+
298
+ def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None:
299
+ """
300
+ Warn user when a list of operations is expected to overwrite itself in a single
301
+ commit.
302
+
303
+ Rules:
304
+ - If a filepath is updated by multiple `CommitOperationAdd` operations, a warning
305
+ message is triggered.
306
+ - If a filepath is updated at least once by a `CommitOperationAdd` and then deleted
307
+ by a `CommitOperationDelete`, a warning is triggered.
308
+ - If a `CommitOperationDelete` deletes a filepath that is then updated by a
309
+ `CommitOperationAdd`, no warning is triggered. This is usually useless (no need to
310
+ delete before upload) but can happen if a user deletes an entire folder and then
311
+ add new files to it.
312
+ """
313
+ nb_additions_per_path: Dict[str, int] = defaultdict(int)
314
+ for operation in operations:
315
+ path_in_repo = operation.path_in_repo
316
+ if isinstance(operation, CommitOperationAdd):
317
+ if nb_additions_per_path[path_in_repo] > 0:
318
+ warnings.warn(
319
+ "About to update multiple times the same file in the same commit:"
320
+ f" '{path_in_repo}'. This can cause undesired inconsistencies in"
321
+ " your repo."
322
+ )
323
+ nb_additions_per_path[path_in_repo] += 1
324
+ for parent in PurePosixPath(path_in_repo).parents:
325
+ # Also keep track of number of updated files per folder
326
+ # => warns if deleting a folder overwrite some contained files
327
+ nb_additions_per_path[str(parent)] += 1
328
+ if isinstance(operation, CommitOperationDelete):
329
+ if nb_additions_per_path[str(PurePosixPath(path_in_repo))] > 0:
330
+ if operation.is_folder:
331
+ warnings.warn(
332
+ "About to delete a folder containing files that have just been"
333
+ f" updated within the same commit: '{path_in_repo}'. This can"
334
+ " cause undesired inconsistencies in your repo."
335
+ )
336
+ else:
337
+ warnings.warn(
338
+ "About to delete a file that have just been updated within the"
339
+ f" same commit: '{path_in_repo}'. This can cause undesired"
340
+ " inconsistencies in your repo."
341
+ )
342
+
343
+
344
+ @validate_hf_hub_args
345
+ def _upload_lfs_files(
346
+ *,
347
+ additions: List[CommitOperationAdd],
348
+ repo_type: str,
349
+ repo_id: str,
350
+ headers: Dict[str, str],
351
+ endpoint: Optional[str] = None,
352
+ num_threads: int = 5,
353
+ revision: Optional[str] = None,
354
+ ):
355
+ """
356
+ Uploads the content of `additions` to the Hub using the large file storage protocol.
357
+
358
+ Relevant external documentation:
359
+ - LFS Batch API: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md
360
+
361
+ Args:
362
+ additions (`List` of `CommitOperationAdd`):
363
+ The files to be uploaded
364
+ repo_type (`str`):
365
+ Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
366
+ repo_id (`str`):
367
+ A namespace (user or an organization) and a repo name separated
368
+ by a `/`.
369
+ headers (`Dict[str, str]`):
370
+ Headers to use for the request, including authorization headers and user agent.
371
+ num_threads (`int`, *optional*):
372
+ The number of concurrent threads to use when uploading. Defaults to 5.
373
+ revision (`str`, *optional*):
374
+ The git revision to upload to.
375
+
376
+ Raises:
377
+ [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
378
+ If an upload failed for any reason
379
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
380
+ If the server returns malformed responses
381
+ [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
382
+ If the LFS batch endpoint returned an HTTP error.
383
+ """
384
+ # Step 1: retrieve upload instructions from the LFS batch endpoint.
385
+ # Upload instructions are retrieved by chunk of 256 files to avoid reaching
386
+ # the payload limit.
387
+ batch_actions: List[Dict] = []
388
+ for chunk in chunk_iterable(additions, chunk_size=256):
389
+ batch_actions_chunk, batch_errors_chunk = post_lfs_batch_info(
390
+ upload_infos=[op.upload_info for op in chunk],
391
+ repo_id=repo_id,
392
+ repo_type=repo_type,
393
+ revision=revision,
394
+ endpoint=endpoint,
395
+ headers=headers,
396
+ token=None, # already passed in 'headers'
397
+ )
398
+
399
+ # If at least 1 error, we do not retrieve information for other chunks
400
+ if batch_errors_chunk:
401
+ message = "\n".join(
402
+ [
403
+ f'Encountered error for file with OID {err.get("oid")}: `{err.get("error", {}).get("message")}'
404
+ for err in batch_errors_chunk
405
+ ]
406
+ )
407
+ raise ValueError(f"LFS batch endpoint returned errors:\n{message}")
408
+
409
+ batch_actions += batch_actions_chunk
410
+ oid2addop = {add_op.upload_info.sha256.hex(): add_op for add_op in additions}
411
+
412
+ # Step 2: ignore files that have already been uploaded
413
+ filtered_actions = []
414
+ for action in batch_actions:
415
+ if action.get("actions") is None:
416
+ logger.debug(
417
+ f"Content of file {oid2addop[action['oid']].path_in_repo} is already"
418
+ " present upstream - skipping upload."
419
+ )
420
+ else:
421
+ filtered_actions.append(action)
422
+
423
+ if len(filtered_actions) == 0:
424
+ logger.debug("No LFS files to upload.")
425
+ return
426
+
427
+ # Step 3: upload files concurrently according to these instructions
428
+ def _wrapped_lfs_upload(batch_action) -> None:
429
+ try:
430
+ operation = oid2addop[batch_action["oid"]]
431
+ lfs_upload(operation=operation, lfs_batch_action=batch_action, headers=headers, endpoint=endpoint)
432
+ except Exception as exc:
433
+ raise RuntimeError(f"Error while uploading '{operation.path_in_repo}' to the Hub.") from exc
434
+
435
+ if constants.HF_HUB_ENABLE_HF_TRANSFER:
436
+ logger.debug(f"Uploading {len(filtered_actions)} LFS files to the Hub using `hf_transfer`.")
437
+ for action in hf_tqdm(filtered_actions, name="huggingface_hub.lfs_upload"):
438
+ _wrapped_lfs_upload(action)
439
+ elif len(filtered_actions) == 1:
440
+ logger.debug("Uploading 1 LFS file to the Hub")
441
+ _wrapped_lfs_upload(filtered_actions[0])
442
+ else:
443
+ logger.debug(
444
+ f"Uploading {len(filtered_actions)} LFS files to the Hub using up to {num_threads} threads concurrently"
445
+ )
446
+ thread_map(
447
+ _wrapped_lfs_upload,
448
+ filtered_actions,
449
+ desc=f"Upload {len(filtered_actions)} LFS files",
450
+ max_workers=num_threads,
451
+ tqdm_class=hf_tqdm,
452
+ )
453
+
454
+
455
+ def _validate_preupload_info(preupload_info: dict):
456
+ files = preupload_info.get("files")
457
+ if not isinstance(files, list):
458
+ raise ValueError("preupload_info is improperly formatted")
459
+ for file_info in files:
460
+ if not (
461
+ isinstance(file_info, dict)
462
+ and isinstance(file_info.get("path"), str)
463
+ and isinstance(file_info.get("uploadMode"), str)
464
+ and (file_info["uploadMode"] in ("lfs", "regular"))
465
+ ):
466
+ raise ValueError("preupload_info is improperly formatted:")
467
+ return preupload_info
468
+
469
+
470
+ @validate_hf_hub_args
471
+ def _fetch_upload_modes(
472
+ additions: Iterable[CommitOperationAdd],
473
+ repo_type: str,
474
+ repo_id: str,
475
+ headers: Dict[str, str],
476
+ revision: str,
477
+ endpoint: Optional[str] = None,
478
+ create_pr: bool = False,
479
+ gitignore_content: Optional[str] = None,
480
+ ) -> None:
481
+ """
482
+ Requests the Hub "preupload" endpoint to determine whether each input file should be uploaded as a regular git blob
483
+ or as git LFS blob. Input `additions` are mutated in-place with the upload mode.
484
+
485
+ Args:
486
+ additions (`Iterable` of :class:`CommitOperationAdd`):
487
+ Iterable of :class:`CommitOperationAdd` describing the files to
488
+ upload to the Hub.
489
+ repo_type (`str`):
490
+ Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
491
+ repo_id (`str`):
492
+ A namespace (user or an organization) and a repo name separated
493
+ by a `/`.
494
+ headers (`Dict[str, str]`):
495
+ Headers to use for the request, including authorization headers and user agent.
496
+ revision (`str`):
497
+ The git revision to upload the files to. Can be any valid git revision.
498
+ gitignore_content (`str`, *optional*):
499
+ The content of the `.gitignore` file to know which files should be ignored. The order of priority
500
+ is to first check if `gitignore_content` is passed, then check if the `.gitignore` file is present
501
+ in the list of files to commit and finally default to the `.gitignore` file already hosted on the Hub
502
+ (if any).
503
+ Raises:
504
+ [`~utils.HfHubHTTPError`]
505
+ If the Hub API returned an error.
506
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
507
+ If the Hub API response is improperly formatted.
508
+ """
509
+ endpoint = endpoint if endpoint is not None else constants.ENDPOINT
510
+
511
+ # Fetch upload mode (LFS or regular) chunk by chunk.
512
+ upload_modes: Dict[str, UploadMode] = {}
513
+ should_ignore_info: Dict[str, bool] = {}
514
+ oid_info: Dict[str, Optional[str]] = {}
515
+
516
+ for chunk in chunk_iterable(additions, 256):
517
+ payload: Dict = {
518
+ "files": [
519
+ {
520
+ "path": op.path_in_repo,
521
+ "sample": base64.b64encode(op.upload_info.sample).decode("ascii"),
522
+ "size": op.upload_info.size,
523
+ }
524
+ for op in chunk
525
+ ]
526
+ }
527
+ if gitignore_content is not None:
528
+ payload["gitIgnore"] = gitignore_content
529
+
530
+ resp = get_session().post(
531
+ f"{endpoint}/api/{repo_type}s/{repo_id}/preupload/{revision}",
532
+ json=payload,
533
+ headers=headers,
534
+ params={"create_pr": "1"} if create_pr else None,
535
+ )
536
+ hf_raise_for_status(resp)
537
+ preupload_info = _validate_preupload_info(resp.json())
538
+ upload_modes.update(**{file["path"]: file["uploadMode"] for file in preupload_info["files"]})
539
+ should_ignore_info.update(**{file["path"]: file["shouldIgnore"] for file in preupload_info["files"]})
540
+ oid_info.update(**{file["path"]: file.get("oid") for file in preupload_info["files"]})
541
+
542
+ # Set upload mode for each addition operation
543
+ for addition in additions:
544
+ addition._upload_mode = upload_modes[addition.path_in_repo]
545
+ addition._should_ignore = should_ignore_info[addition.path_in_repo]
546
+ addition._remote_oid = oid_info[addition.path_in_repo]
547
+
548
+ # Empty files cannot be uploaded as LFS (S3 would fail with a 501 Not Implemented)
549
+ # => empty files are uploaded as "regular" to still allow users to commit them.
550
+ for addition in additions:
551
+ if addition.upload_info.size == 0:
552
+ addition._upload_mode = "regular"
553
+
554
+
555
+ @validate_hf_hub_args
556
+ def _fetch_files_to_copy(
557
+ copies: Iterable[CommitOperationCopy],
558
+ repo_type: str,
559
+ repo_id: str,
560
+ headers: Dict[str, str],
561
+ revision: str,
562
+ endpoint: Optional[str] = None,
563
+ ) -> Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]]:
564
+ """
565
+ Fetch information about the files to copy.
566
+
567
+ For LFS files, we only need their metadata (file size and sha256) while for regular files
568
+ we need to download the raw content from the Hub.
569
+
570
+ Args:
571
+ copies (`Iterable` of :class:`CommitOperationCopy`):
572
+ Iterable of :class:`CommitOperationCopy` describing the files to
573
+ copy on the Hub.
574
+ repo_type (`str`):
575
+ Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
576
+ repo_id (`str`):
577
+ A namespace (user or an organization) and a repo name separated
578
+ by a `/`.
579
+ headers (`Dict[str, str]`):
580
+ Headers to use for the request, including authorization headers and user agent.
581
+ revision (`str`):
582
+ The git revision to upload the files to. Can be any valid git revision.
583
+
584
+ Returns: `Dict[Tuple[str, Optional[str]], Union[RepoFile, bytes]]]`
585
+ Key is the file path and revision of the file to copy.
586
+ Value is the raw content as bytes (for regular files) or the file information as a RepoFile (for LFS files).
587
+
588
+ Raises:
589
+ [`~utils.HfHubHTTPError`]
590
+ If the Hub API returned an error.
591
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
592
+ If the Hub API response is improperly formatted.
593
+ """
594
+ from .hf_api import HfApi, RepoFolder
595
+
596
+ hf_api = HfApi(endpoint=endpoint, headers=headers)
597
+ files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]] = {}
598
+ for src_revision, operations in groupby(copies, key=lambda op: op.src_revision):
599
+ operations = list(operations) # type: ignore
600
+ paths = [op.src_path_in_repo for op in operations]
601
+ for offset in range(0, len(paths), FETCH_LFS_BATCH_SIZE):
602
+ src_repo_files = hf_api.get_paths_info(
603
+ repo_id=repo_id,
604
+ paths=paths[offset : offset + FETCH_LFS_BATCH_SIZE],
605
+ revision=src_revision or revision,
606
+ repo_type=repo_type,
607
+ )
608
+ for src_repo_file in src_repo_files:
609
+ if isinstance(src_repo_file, RepoFolder):
610
+ raise NotImplementedError("Copying a folder is not implemented.")
611
+ if src_repo_file.lfs:
612
+ files_to_copy[(src_repo_file.path, src_revision)] = src_repo_file
613
+ else:
614
+ # TODO: (optimization) download regular files to copy concurrently
615
+ url = hf_hub_url(
616
+ endpoint=endpoint,
617
+ repo_type=repo_type,
618
+ repo_id=repo_id,
619
+ revision=src_revision or revision,
620
+ filename=src_repo_file.path,
621
+ )
622
+ response = get_session().get(url, headers=headers)
623
+ hf_raise_for_status(response)
624
+ files_to_copy[(src_repo_file.path, src_revision)] = response.content
625
+ for operation in operations:
626
+ if (operation.src_path_in_repo, src_revision) not in files_to_copy:
627
+ raise EntryNotFoundError(
628
+ f"Cannot copy {operation.src_path_in_repo} at revision "
629
+ f"{src_revision or revision}: file is missing on repo."
630
+ )
631
+ return files_to_copy
632
+
633
+
634
+ def _prepare_commit_payload(
635
+ operations: Iterable[CommitOperation],
636
+ files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]],
637
+ commit_message: str,
638
+ commit_description: Optional[str] = None,
639
+ parent_commit: Optional[str] = None,
640
+ ) -> Iterable[Dict[str, Any]]:
641
+ """
642
+ Builds the payload to POST to the `/commit` API of the Hub.
643
+
644
+ Payload is returned as an iterator so that it can be streamed as a ndjson in the
645
+ POST request.
646
+
647
+ For more information, see:
648
+ - https://github.com/huggingface/huggingface_hub/issues/1085#issuecomment-1265208073
649
+ - http://ndjson.org/
650
+ """
651
+ commit_description = commit_description if commit_description is not None else ""
652
+
653
+ # 1. Send a header item with the commit metadata
654
+ header_value = {"summary": commit_message, "description": commit_description}
655
+ if parent_commit is not None:
656
+ header_value["parentCommit"] = parent_commit
657
+ yield {"key": "header", "value": header_value}
658
+
659
+ nb_ignored_files = 0
660
+
661
+ # 2. Send operations, one per line
662
+ for operation in operations:
663
+ # Skip ignored files
664
+ if isinstance(operation, CommitOperationAdd) and operation._should_ignore:
665
+ logger.debug(f"Skipping file '{operation.path_in_repo}' in commit (ignored by gitignore file).")
666
+ nb_ignored_files += 1
667
+ continue
668
+
669
+ # 2.a. Case adding a regular file
670
+ if isinstance(operation, CommitOperationAdd) and operation._upload_mode == "regular":
671
+ yield {
672
+ "key": "file",
673
+ "value": {
674
+ "content": operation.b64content().decode(),
675
+ "path": operation.path_in_repo,
676
+ "encoding": "base64",
677
+ },
678
+ }
679
+ # 2.b. Case adding an LFS file
680
+ elif isinstance(operation, CommitOperationAdd) and operation._upload_mode == "lfs":
681
+ yield {
682
+ "key": "lfsFile",
683
+ "value": {
684
+ "path": operation.path_in_repo,
685
+ "algo": "sha256",
686
+ "oid": operation.upload_info.sha256.hex(),
687
+ "size": operation.upload_info.size,
688
+ },
689
+ }
690
+ # 2.c. Case deleting a file or folder
691
+ elif isinstance(operation, CommitOperationDelete):
692
+ yield {
693
+ "key": "deletedFolder" if operation.is_folder else "deletedFile",
694
+ "value": {"path": operation.path_in_repo},
695
+ }
696
+ # 2.d. Case copying a file or folder
697
+ elif isinstance(operation, CommitOperationCopy):
698
+ file_to_copy = files_to_copy[(operation.src_path_in_repo, operation.src_revision)]
699
+ if isinstance(file_to_copy, bytes):
700
+ yield {
701
+ "key": "file",
702
+ "value": {
703
+ "content": base64.b64encode(file_to_copy).decode(),
704
+ "path": operation.path_in_repo,
705
+ "encoding": "base64",
706
+ },
707
+ }
708
+ elif file_to_copy.lfs:
709
+ yield {
710
+ "key": "lfsFile",
711
+ "value": {
712
+ "path": operation.path_in_repo,
713
+ "algo": "sha256",
714
+ "oid": file_to_copy.lfs.sha256,
715
+ },
716
+ }
717
+ else:
718
+ raise ValueError(
719
+ "Malformed files_to_copy (should be raw file content as bytes or RepoFile objects with LFS info."
720
+ )
721
+ # 2.e. Never expected to happen
722
+ else:
723
+ raise ValueError(
724
+ f"Unknown operation to commit. Operation: {operation}. Upload mode:"
725
+ f" {getattr(operation, '_upload_mode', None)}"
726
+ )
727
+
728
+ if nb_ignored_files > 0:
729
+ logger.info(f"Skipped {nb_ignored_files} file(s) in commit (ignored by gitignore file).")
huggingface_hub/_commit_scheduler.py ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import atexit
2
+ import logging
3
+ import os
4
+ import time
5
+ from concurrent.futures import Future
6
+ from dataclasses import dataclass
7
+ from io import SEEK_END, SEEK_SET, BytesIO
8
+ from pathlib import Path
9
+ from threading import Lock, Thread
10
+ from typing import Dict, List, Optional, Union
11
+
12
+ from .hf_api import DEFAULT_IGNORE_PATTERNS, CommitInfo, CommitOperationAdd, HfApi
13
+ from .utils import filter_repo_objects
14
+
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @dataclass(frozen=True)
20
+ class _FileToUpload:
21
+ """Temporary dataclass to store info about files to upload. Not meant to be used directly."""
22
+
23
+ local_path: Path
24
+ path_in_repo: str
25
+ size_limit: int
26
+ last_modified: float
27
+
28
+
29
+ class CommitScheduler:
30
+ """
31
+ Scheduler to upload a local folder to the Hub at regular intervals (e.g. push to hub every 5 minutes).
32
+
33
+ The scheduler is started when instantiated and run indefinitely. At the end of your script, a last commit is
34
+ triggered. Checkout the [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#scheduled-uploads)
35
+ to learn more about how to use it.
36
+
37
+ Args:
38
+ repo_id (`str`):
39
+ The id of the repo to commit to.
40
+ folder_path (`str` or `Path`):
41
+ Path to the local folder to upload regularly.
42
+ every (`int` or `float`, *optional*):
43
+ The number of minutes between each commit. Defaults to 5 minutes.
44
+ path_in_repo (`str`, *optional*):
45
+ Relative path of the directory in the repo, for example: `"checkpoints/"`. Defaults to the root folder
46
+ of the repository.
47
+ repo_type (`str`, *optional*):
48
+ The type of the repo to commit to. Defaults to `model`.
49
+ revision (`str`, *optional*):
50
+ The revision of the repo to commit to. Defaults to `main`.
51
+ private (`bool`, *optional*):
52
+ Whether to make the repo private. Defaults to `False`. This value is ignored if the repo already exist.
53
+ token (`str`, *optional*):
54
+ The token to use to commit to the repo. Defaults to the token saved on the machine.
55
+ allow_patterns (`List[str]` or `str`, *optional*):
56
+ If provided, only files matching at least one pattern are uploaded.
57
+ ignore_patterns (`List[str]` or `str`, *optional*):
58
+ If provided, files matching any of the patterns are not uploaded.
59
+ squash_history (`bool`, *optional*):
60
+ Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
61
+ useful to avoid degraded performances on the repo when it grows too large.
62
+ hf_api (`HfApi`, *optional*):
63
+ The [`HfApi`] client to use to commit to the Hub. Can be set with custom settings (user agent, token,...).
64
+
65
+ Example:
66
+ ```py
67
+ >>> from pathlib import Path
68
+ >>> from huggingface_hub import CommitScheduler
69
+
70
+ # Scheduler uploads every 10 minutes
71
+ >>> csv_path = Path("watched_folder/data.csv")
72
+ >>> CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path=csv_path.parent, every=10)
73
+
74
+ >>> with csv_path.open("a") as f:
75
+ ... f.write("first line")
76
+
77
+ # Some time later (...)
78
+ >>> with csv_path.open("a") as f:
79
+ ... f.write("second line")
80
+ ```
81
+ """
82
+
83
+ def __init__(
84
+ self,
85
+ *,
86
+ repo_id: str,
87
+ folder_path: Union[str, Path],
88
+ every: Union[int, float] = 5,
89
+ path_in_repo: Optional[str] = None,
90
+ repo_type: Optional[str] = None,
91
+ revision: Optional[str] = None,
92
+ private: bool = False,
93
+ token: Optional[str] = None,
94
+ allow_patterns: Optional[Union[List[str], str]] = None,
95
+ ignore_patterns: Optional[Union[List[str], str]] = None,
96
+ squash_history: bool = False,
97
+ hf_api: Optional["HfApi"] = None,
98
+ ) -> None:
99
+ self.api = hf_api or HfApi(token=token)
100
+
101
+ # Folder
102
+ self.folder_path = Path(folder_path).expanduser().resolve()
103
+ self.path_in_repo = path_in_repo or ""
104
+ self.allow_patterns = allow_patterns
105
+
106
+ if ignore_patterns is None:
107
+ ignore_patterns = []
108
+ elif isinstance(ignore_patterns, str):
109
+ ignore_patterns = [ignore_patterns]
110
+ self.ignore_patterns = ignore_patterns + DEFAULT_IGNORE_PATTERNS
111
+
112
+ if self.folder_path.is_file():
113
+ raise ValueError(f"'folder_path' must be a directory, not a file: '{self.folder_path}'.")
114
+ self.folder_path.mkdir(parents=True, exist_ok=True)
115
+
116
+ # Repository
117
+ repo_url = self.api.create_repo(repo_id=repo_id, private=private, repo_type=repo_type, exist_ok=True)
118
+ self.repo_id = repo_url.repo_id
119
+ self.repo_type = repo_type
120
+ self.revision = revision
121
+ self.token = token
122
+
123
+ # Keep track of already uploaded files
124
+ self.last_uploaded: Dict[Path, float] = {} # key is local path, value is timestamp
125
+
126
+ # Scheduler
127
+ if not every > 0:
128
+ raise ValueError(f"'every' must be a positive integer, not '{every}'.")
129
+ self.lock = Lock()
130
+ self.every = every
131
+ self.squash_history = squash_history
132
+
133
+ logger.info(f"Scheduled job to push '{self.folder_path}' to '{self.repo_id}' every {self.every} minutes.")
134
+ self._scheduler_thread = Thread(target=self._run_scheduler, daemon=True)
135
+ self._scheduler_thread.start()
136
+ atexit.register(self._push_to_hub)
137
+
138
+ self.__stopped = False
139
+
140
+ def stop(self) -> None:
141
+ """Stop the scheduler.
142
+
143
+ A stopped scheduler cannot be restarted. Mostly for tests purposes.
144
+ """
145
+ self.__stopped = True
146
+
147
+ def _run_scheduler(self) -> None:
148
+ """Dumb thread waiting between each scheduled push to Hub."""
149
+ while True:
150
+ self.last_future = self.trigger()
151
+ time.sleep(self.every * 60)
152
+ if self.__stopped:
153
+ break
154
+
155
+ def trigger(self) -> Future:
156
+ """Trigger a `push_to_hub` and return a future.
157
+
158
+ This method is automatically called every `every` minutes. You can also call it manually to trigger a commit
159
+ immediately, without waiting for the next scheduled commit.
160
+ """
161
+ return self.api.run_as_future(self._push_to_hub)
162
+
163
+ def _push_to_hub(self) -> Optional[CommitInfo]:
164
+ if self.__stopped: # If stopped, already scheduled commits are ignored
165
+ return None
166
+
167
+ logger.info("(Background) scheduled commit triggered.")
168
+ try:
169
+ value = self.push_to_hub()
170
+ if self.squash_history:
171
+ logger.info("(Background) squashing repo history.")
172
+ self.api.super_squash_history(repo_id=self.repo_id, repo_type=self.repo_type, branch=self.revision)
173
+ return value
174
+ except Exception as e:
175
+ logger.error(f"Error while pushing to Hub: {e}") # Depending on the setup, error might be silenced
176
+ raise
177
+
178
+ def push_to_hub(self) -> Optional[CommitInfo]:
179
+ """
180
+ Push folder to the Hub and return the commit info.
181
+
182
+ <Tip warning={true}>
183
+
184
+ This method is not meant to be called directly. It is run in the background by the scheduler, respecting a
185
+ queue mechanism to avoid concurrent commits. Making a direct call to the method might lead to concurrency
186
+ issues.
187
+
188
+ </Tip>
189
+
190
+ The default behavior of `push_to_hub` is to assume an append-only folder. It lists all files in the folder and
191
+ uploads only changed files. If no changes are found, the method returns without committing anything. If you want
192
+ to change this behavior, you can inherit from [`CommitScheduler`] and override this method. This can be useful
193
+ for example to compress data together in a single file before committing. For more details and examples, check
194
+ out our [integration guide](https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#scheduled-uploads).
195
+ """
196
+ # Check files to upload (with lock)
197
+ with self.lock:
198
+ logger.debug("Listing files to upload for scheduled commit.")
199
+
200
+ # List files from folder (taken from `_prepare_upload_folder_additions`)
201
+ relpath_to_abspath = {
202
+ path.relative_to(self.folder_path).as_posix(): path
203
+ for path in sorted(self.folder_path.glob("**/*")) # sorted to be deterministic
204
+ if path.is_file()
205
+ }
206
+ prefix = f"{self.path_in_repo.strip('/')}/" if self.path_in_repo else ""
207
+
208
+ # Filter with pattern + filter out unchanged files + retrieve current file size
209
+ files_to_upload: List[_FileToUpload] = []
210
+ for relpath in filter_repo_objects(
211
+ relpath_to_abspath.keys(), allow_patterns=self.allow_patterns, ignore_patterns=self.ignore_patterns
212
+ ):
213
+ local_path = relpath_to_abspath[relpath]
214
+ stat = local_path.stat()
215
+ if self.last_uploaded.get(local_path) is None or self.last_uploaded[local_path] != stat.st_mtime:
216
+ files_to_upload.append(
217
+ _FileToUpload(
218
+ local_path=local_path,
219
+ path_in_repo=prefix + relpath,
220
+ size_limit=stat.st_size,
221
+ last_modified=stat.st_mtime,
222
+ )
223
+ )
224
+
225
+ # Return if nothing to upload
226
+ if len(files_to_upload) == 0:
227
+ logger.debug("Dropping schedule commit: no changed file to upload.")
228
+ return None
229
+
230
+ # Convert `_FileToUpload` as `CommitOperationAdd` (=> compute file shas + limit to file size)
231
+ logger.debug("Removing unchanged files since previous scheduled commit.")
232
+ add_operations = [
233
+ CommitOperationAdd(
234
+ # Cap the file to its current size, even if the user append data to it while a scheduled commit is happening
235
+ path_or_fileobj=PartialFileIO(file_to_upload.local_path, size_limit=file_to_upload.size_limit),
236
+ path_in_repo=file_to_upload.path_in_repo,
237
+ )
238
+ for file_to_upload in files_to_upload
239
+ ]
240
+
241
+ # Upload files (append mode expected - no need for lock)
242
+ logger.debug("Uploading files for scheduled commit.")
243
+ commit_info = self.api.create_commit(
244
+ repo_id=self.repo_id,
245
+ repo_type=self.repo_type,
246
+ operations=add_operations,
247
+ commit_message="Scheduled Commit",
248
+ revision=self.revision,
249
+ )
250
+
251
+ # Successful commit: keep track of the latest "last_modified" for each file
252
+ for file in files_to_upload:
253
+ self.last_uploaded[file.local_path] = file.last_modified
254
+ return commit_info
255
+
256
+
257
+ class PartialFileIO(BytesIO):
258
+ """A file-like object that reads only the first part of a file.
259
+
260
+ Useful to upload a file to the Hub when the user might still be appending data to it. Only the first part of the
261
+ file is uploaded (i.e. the part that was available when the filesystem was first scanned).
262
+
263
+ In practice, only used internally by the CommitScheduler to regularly push a folder to the Hub with minimal
264
+ disturbance for the user. The object is passed to `CommitOperationAdd`.
265
+
266
+ Only supports `read`, `tell` and `seek` methods.
267
+
268
+ Args:
269
+ file_path (`str` or `Path`):
270
+ Path to the file to read.
271
+ size_limit (`int`):
272
+ The maximum number of bytes to read from the file. If the file is larger than this, only the first part
273
+ will be read (and uploaded).
274
+ """
275
+
276
+ def __init__(self, file_path: Union[str, Path], size_limit: int) -> None:
277
+ self._file_path = Path(file_path)
278
+ self._file = self._file_path.open("rb")
279
+ self._size_limit = min(size_limit, os.fstat(self._file.fileno()).st_size)
280
+
281
+ def __del__(self) -> None:
282
+ self._file.close()
283
+ return super().__del__()
284
+
285
+ def __repr__(self) -> str:
286
+ return f"<PartialFileIO file_path={self._file_path} size_limit={self._size_limit}>"
287
+
288
+ def __len__(self) -> int:
289
+ return self._size_limit
290
+
291
+ def __getattribute__(self, name: str):
292
+ if name.startswith("_") or name in ("read", "tell", "seek"): # only 3 public methods supported
293
+ return super().__getattribute__(name)
294
+ raise NotImplementedError(f"PartialFileIO does not support '{name}'.")
295
+
296
+ def tell(self) -> int:
297
+ """Return the current file position."""
298
+ return self._file.tell()
299
+
300
+ def seek(self, __offset: int, __whence: int = SEEK_SET) -> int:
301
+ """Change the stream position to the given offset.
302
+
303
+ Behavior is the same as a regular file, except that the position is capped to the size limit.
304
+ """
305
+ if __whence == SEEK_END:
306
+ # SEEK_END => set from the truncated end
307
+ __offset = len(self) + __offset
308
+ __whence = SEEK_SET
309
+
310
+ pos = self._file.seek(__offset, __whence)
311
+ if pos > self._size_limit:
312
+ return self._file.seek(self._size_limit)
313
+ return pos
314
+
315
+ def read(self, __size: Optional[int] = -1) -> bytes:
316
+ """Read at most `__size` bytes from the file.
317
+
318
+ Behavior is the same as a regular file, except that it is capped to the size limit.
319
+ """
320
+ current = self._file.tell()
321
+ if __size is None or __size < 0:
322
+ # Read until file limit
323
+ truncated_size = self._size_limit - current
324
+ else:
325
+ # Read until file limit or __size
326
+ truncated_size = min(__size, self._size_limit - current)
327
+ return self._file.read(truncated_size)
huggingface_hub/_inference_endpoints.py ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from dataclasses import dataclass, field
3
+ from datetime import datetime
4
+ from enum import Enum
5
+ from typing import TYPE_CHECKING, Dict, Optional, Union
6
+
7
+ from huggingface_hub.errors import InferenceEndpointError, InferenceEndpointTimeoutError
8
+
9
+ from .inference._client import InferenceClient
10
+ from .inference._generated._async_client import AsyncInferenceClient
11
+ from .utils import get_session, logging, parse_datetime
12
+
13
+
14
+ if TYPE_CHECKING:
15
+ from .hf_api import HfApi
16
+
17
+
18
+ logger = logging.get_logger(__name__)
19
+
20
+
21
+ class InferenceEndpointStatus(str, Enum):
22
+ PENDING = "pending"
23
+ INITIALIZING = "initializing"
24
+ UPDATING = "updating"
25
+ UPDATE_FAILED = "updateFailed"
26
+ RUNNING = "running"
27
+ PAUSED = "paused"
28
+ FAILED = "failed"
29
+ SCALED_TO_ZERO = "scaledToZero"
30
+
31
+
32
+ class InferenceEndpointType(str, Enum):
33
+ PUBlIC = "public"
34
+ PROTECTED = "protected"
35
+ PRIVATE = "private"
36
+
37
+
38
+ @dataclass
39
+ class InferenceEndpoint:
40
+ """
41
+ Contains information about a deployed Inference Endpoint.
42
+
43
+ Args:
44
+ name (`str`):
45
+ The unique name of the Inference Endpoint.
46
+ namespace (`str`):
47
+ The namespace where the Inference Endpoint is located.
48
+ repository (`str`):
49
+ The name of the model repository deployed on this Inference Endpoint.
50
+ status ([`InferenceEndpointStatus`]):
51
+ The current status of the Inference Endpoint.
52
+ url (`str`, *optional*):
53
+ The URL of the Inference Endpoint, if available. Only a deployed Inference Endpoint will have a URL.
54
+ framework (`str`):
55
+ The machine learning framework used for the model.
56
+ revision (`str`):
57
+ The specific model revision deployed on the Inference Endpoint.
58
+ task (`str`):
59
+ The task associated with the deployed model.
60
+ created_at (`datetime.datetime`):
61
+ The timestamp when the Inference Endpoint was created.
62
+ updated_at (`datetime.datetime`):
63
+ The timestamp of the last update of the Inference Endpoint.
64
+ type ([`InferenceEndpointType`]):
65
+ The type of the Inference Endpoint (public, protected, private).
66
+ raw (`Dict`):
67
+ The raw dictionary data returned from the API.
68
+ token (`str` or `bool`, *optional*):
69
+ Authentication token for the Inference Endpoint, if set when requesting the API. Will default to the
70
+ locally saved token if not provided. Pass `token=False` if you don't want to send your token to the server.
71
+
72
+ Example:
73
+ ```python
74
+ >>> from huggingface_hub import get_inference_endpoint
75
+ >>> endpoint = get_inference_endpoint("my-text-to-image")
76
+ >>> endpoint
77
+ InferenceEndpoint(name='my-text-to-image', ...)
78
+
79
+ # Get status
80
+ >>> endpoint.status
81
+ 'running'
82
+ >>> endpoint.url
83
+ 'https://my-text-to-image.region.vendor.endpoints.huggingface.cloud'
84
+
85
+ # Run inference
86
+ >>> endpoint.client.text_to_image(...)
87
+
88
+ # Pause endpoint to save $$$
89
+ >>> endpoint.pause()
90
+
91
+ # ...
92
+ # Resume and wait for deployment
93
+ >>> endpoint.resume()
94
+ >>> endpoint.wait()
95
+ >>> endpoint.client.text_to_image(...)
96
+ ```
97
+ """
98
+
99
+ # Field in __repr__
100
+ name: str = field(init=False)
101
+ namespace: str
102
+ repository: str = field(init=False)
103
+ status: InferenceEndpointStatus = field(init=False)
104
+ url: Optional[str] = field(init=False)
105
+
106
+ # Other fields
107
+ framework: str = field(repr=False, init=False)
108
+ revision: str = field(repr=False, init=False)
109
+ task: str = field(repr=False, init=False)
110
+ created_at: datetime = field(repr=False, init=False)
111
+ updated_at: datetime = field(repr=False, init=False)
112
+ type: InferenceEndpointType = field(repr=False, init=False)
113
+
114
+ # Raw dict from the API
115
+ raw: Dict = field(repr=False)
116
+
117
+ # Internal fields
118
+ _token: Union[str, bool, None] = field(repr=False, compare=False)
119
+ _api: "HfApi" = field(repr=False, compare=False)
120
+
121
+ @classmethod
122
+ def from_raw(
123
+ cls, raw: Dict, namespace: str, token: Union[str, bool, None] = None, api: Optional["HfApi"] = None
124
+ ) -> "InferenceEndpoint":
125
+ """Initialize object from raw dictionary."""
126
+ if api is None:
127
+ from .hf_api import HfApi
128
+
129
+ api = HfApi()
130
+ if token is None:
131
+ token = api.token
132
+
133
+ # All other fields are populated in __post_init__
134
+ return cls(raw=raw, namespace=namespace, _token=token, _api=api)
135
+
136
+ def __post_init__(self) -> None:
137
+ """Populate fields from raw dictionary."""
138
+ self._populate_from_raw()
139
+
140
+ @property
141
+ def client(self) -> InferenceClient:
142
+ """Returns a client to make predictions on this Inference Endpoint.
143
+
144
+ Returns:
145
+ [`InferenceClient`]: an inference client pointing to the deployed endpoint.
146
+
147
+ Raises:
148
+ [`InferenceEndpointError`]: If the Inference Endpoint is not yet deployed.
149
+ """
150
+ if self.url is None:
151
+ raise InferenceEndpointError(
152
+ "Cannot create a client for this Inference Endpoint as it is not yet deployed. "
153
+ "Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again."
154
+ )
155
+ return InferenceClient(model=self.url, token=self._token)
156
+
157
+ @property
158
+ def async_client(self) -> AsyncInferenceClient:
159
+ """Returns a client to make predictions on this Inference Endpoint.
160
+
161
+ Returns:
162
+ [`AsyncInferenceClient`]: an asyncio-compatible inference client pointing to the deployed endpoint.
163
+
164
+ Raises:
165
+ [`InferenceEndpointError`]: If the Inference Endpoint is not yet deployed.
166
+ """
167
+ if self.url is None:
168
+ raise InferenceEndpointError(
169
+ "Cannot create a client for this Inference Endpoint as it is not yet deployed. "
170
+ "Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again."
171
+ )
172
+ return AsyncInferenceClient(model=self.url, token=self._token)
173
+
174
+ def wait(self, timeout: Optional[int] = None, refresh_every: int = 5) -> "InferenceEndpoint":
175
+ """Wait for the Inference Endpoint to be deployed.
176
+
177
+ Information from the server will be fetched every 1s. If the Inference Endpoint is not deployed after `timeout`
178
+ seconds, a [`InferenceEndpointTimeoutError`] will be raised. The [`InferenceEndpoint`] will be mutated in place with the latest
179
+ data.
180
+
181
+ Args:
182
+ timeout (`int`, *optional*):
183
+ The maximum time to wait for the Inference Endpoint to be deployed, in seconds. If `None`, will wait
184
+ indefinitely.
185
+ refresh_every (`int`, *optional*):
186
+ The time to wait between each fetch of the Inference Endpoint status, in seconds. Defaults to 5s.
187
+
188
+ Returns:
189
+ [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
190
+
191
+ Raises:
192
+ [`InferenceEndpointError`]
193
+ If the Inference Endpoint ended up in a failed state.
194
+ [`InferenceEndpointTimeoutError`]
195
+ If the Inference Endpoint is not deployed after `timeout` seconds.
196
+ """
197
+ if timeout is not None and timeout < 0:
198
+ raise ValueError("`timeout` cannot be negative.")
199
+ if refresh_every <= 0:
200
+ raise ValueError("`refresh_every` must be positive.")
201
+
202
+ start = time.time()
203
+ while True:
204
+ if self.url is not None:
205
+ # Means the URL is provisioned => check if the endpoint is reachable
206
+ response = get_session().get(self.url, headers=self._api._build_hf_headers(token=self._token))
207
+ if response.status_code == 200:
208
+ logger.info("Inference Endpoint is ready to be used.")
209
+ return self
210
+ if self.status == InferenceEndpointStatus.FAILED:
211
+ raise InferenceEndpointError(
212
+ f"Inference Endpoint {self.name} failed to deploy. Please check the logs for more information."
213
+ )
214
+ if timeout is not None:
215
+ if time.time() - start > timeout:
216
+ raise InferenceEndpointTimeoutError("Timeout while waiting for Inference Endpoint to be deployed.")
217
+ logger.info(f"Inference Endpoint is not deployed yet ({self.status}). Waiting {refresh_every}s...")
218
+ time.sleep(refresh_every)
219
+ self.fetch()
220
+
221
+ def fetch(self) -> "InferenceEndpoint":
222
+ """Fetch latest information about the Inference Endpoint.
223
+
224
+ Returns:
225
+ [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
226
+ """
227
+ obj = self._api.get_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token) # type: ignore [arg-type]
228
+ self.raw = obj.raw
229
+ self._populate_from_raw()
230
+ return self
231
+
232
+ def update(
233
+ self,
234
+ *,
235
+ # Compute update
236
+ accelerator: Optional[str] = None,
237
+ instance_size: Optional[str] = None,
238
+ instance_type: Optional[str] = None,
239
+ min_replica: Optional[int] = None,
240
+ max_replica: Optional[int] = None,
241
+ scale_to_zero_timeout: Optional[int] = None,
242
+ # Model update
243
+ repository: Optional[str] = None,
244
+ framework: Optional[str] = None,
245
+ revision: Optional[str] = None,
246
+ task: Optional[str] = None,
247
+ custom_image: Optional[Dict] = None,
248
+ secrets: Optional[Dict[str, str]] = None,
249
+ ) -> "InferenceEndpoint":
250
+ """Update the Inference Endpoint.
251
+
252
+ This method allows the update of either the compute configuration, the deployed model, or both. All arguments are
253
+ optional but at least one must be provided.
254
+
255
+ This is an alias for [`HfApi.update_inference_endpoint`]. The current object is mutated in place with the
256
+ latest data from the server.
257
+
258
+ Args:
259
+ accelerator (`str`, *optional*):
260
+ The hardware accelerator to be used for inference (e.g. `"cpu"`).
261
+ instance_size (`str`, *optional*):
262
+ The size or type of the instance to be used for hosting the model (e.g. `"x4"`).
263
+ instance_type (`str`, *optional*):
264
+ The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`).
265
+ min_replica (`int`, *optional*):
266
+ The minimum number of replicas (instances) to keep running for the Inference Endpoint.
267
+ max_replica (`int`, *optional*):
268
+ The maximum number of replicas (instances) to scale to for the Inference Endpoint.
269
+ scale_to_zero_timeout (`int`, *optional*):
270
+ The duration in minutes before an inactive endpoint is scaled to zero.
271
+
272
+ repository (`str`, *optional*):
273
+ The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`).
274
+ framework (`str`, *optional*):
275
+ The machine learning framework used for the model (e.g. `"custom"`).
276
+ revision (`str`, *optional*):
277
+ The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
278
+ task (`str`, *optional*):
279
+ The task on which to deploy the model (e.g. `"text-classification"`).
280
+ custom_image (`Dict`, *optional*):
281
+ A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
282
+ Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
283
+ secrets (`Dict[str, str]`, *optional*):
284
+ Secret values to inject in the container environment.
285
+ Returns:
286
+ [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
287
+ """
288
+ # Make API call
289
+ obj = self._api.update_inference_endpoint(
290
+ name=self.name,
291
+ namespace=self.namespace,
292
+ accelerator=accelerator,
293
+ instance_size=instance_size,
294
+ instance_type=instance_type,
295
+ min_replica=min_replica,
296
+ max_replica=max_replica,
297
+ scale_to_zero_timeout=scale_to_zero_timeout,
298
+ repository=repository,
299
+ framework=framework,
300
+ revision=revision,
301
+ task=task,
302
+ custom_image=custom_image,
303
+ secrets=secrets,
304
+ token=self._token, # type: ignore [arg-type]
305
+ )
306
+
307
+ # Mutate current object
308
+ self.raw = obj.raw
309
+ self._populate_from_raw()
310
+ return self
311
+
312
+ def pause(self) -> "InferenceEndpoint":
313
+ """Pause the Inference Endpoint.
314
+
315
+ A paused Inference Endpoint will not be charged. It can be resumed at any time using [`InferenceEndpoint.resume`].
316
+ This is different than scaling the Inference Endpoint to zero with [`InferenceEndpoint.scale_to_zero`], which
317
+ would be automatically restarted when a request is made to it.
318
+
319
+ This is an alias for [`HfApi.pause_inference_endpoint`]. The current object is mutated in place with the
320
+ latest data from the server.
321
+
322
+ Returns:
323
+ [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
324
+ """
325
+ obj = self._api.pause_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token) # type: ignore [arg-type]
326
+ self.raw = obj.raw
327
+ self._populate_from_raw()
328
+ return self
329
+
330
+ def resume(self, running_ok: bool = True) -> "InferenceEndpoint":
331
+ """Resume the Inference Endpoint.
332
+
333
+ This is an alias for [`HfApi.resume_inference_endpoint`]. The current object is mutated in place with the
334
+ latest data from the server.
335
+
336
+ Args:
337
+ running_ok (`bool`, *optional*):
338
+ If `True`, the method will not raise an error if the Inference Endpoint is already running. Defaults to
339
+ `True`.
340
+
341
+ Returns:
342
+ [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
343
+ """
344
+ obj = self._api.resume_inference_endpoint(
345
+ name=self.name, namespace=self.namespace, running_ok=running_ok, token=self._token
346
+ ) # type: ignore [arg-type]
347
+ self.raw = obj.raw
348
+ self._populate_from_raw()
349
+ return self
350
+
351
+ def scale_to_zero(self) -> "InferenceEndpoint":
352
+ """Scale Inference Endpoint to zero.
353
+
354
+ An Inference Endpoint scaled to zero will not be charged. It will be resume on the next request to it, with a
355
+ cold start delay. This is different than pausing the Inference Endpoint with [`InferenceEndpoint.pause`], which
356
+ would require a manual resume with [`InferenceEndpoint.resume`].
357
+
358
+ This is an alias for [`HfApi.scale_to_zero_inference_endpoint`]. The current object is mutated in place with the
359
+ latest data from the server.
360
+
361
+ Returns:
362
+ [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
363
+ """
364
+ obj = self._api.scale_to_zero_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token) # type: ignore [arg-type]
365
+ self.raw = obj.raw
366
+ self._populate_from_raw()
367
+ return self
368
+
369
+ def delete(self) -> None:
370
+ """Delete the Inference Endpoint.
371
+
372
+ This operation is not reversible. If you don't want to be charged for an Inference Endpoint, it is preferable
373
+ to pause it with [`InferenceEndpoint.pause`] or scale it to zero with [`InferenceEndpoint.scale_to_zero`].
374
+
375
+ This is an alias for [`HfApi.delete_inference_endpoint`].
376
+ """
377
+ self._api.delete_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token) # type: ignore [arg-type]
378
+
379
+ def _populate_from_raw(self) -> None:
380
+ """Populate fields from raw dictionary.
381
+
382
+ Called in __post_init__ + each time the Inference Endpoint is updated.
383
+ """
384
+ # Repr fields
385
+ self.name = self.raw["name"]
386
+ self.repository = self.raw["model"]["repository"]
387
+ self.status = self.raw["status"]["state"]
388
+ self.url = self.raw["status"].get("url")
389
+
390
+ # Other fields
391
+ self.framework = self.raw["model"]["framework"]
392
+ self.revision = self.raw["model"]["revision"]
393
+ self.task = self.raw["model"]["task"]
394
+ self.created_at = parse_datetime(self.raw["status"]["createdAt"])
395
+ self.updated_at = parse_datetime(self.raw["status"]["updatedAt"])
396
+ self.type = self.raw["type"]
huggingface_hub/_local_folder.py ADDED
@@ -0,0 +1,425 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2024-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to handle the `../.cache/huggingface` folder in local directories.
16
+
17
+ First discussed in https://github.com/huggingface/huggingface_hub/issues/1738 to store
18
+ download metadata when downloading files from the hub to a local directory (without
19
+ using the cache).
20
+
21
+ ./.cache/huggingface folder structure:
22
+ [4.0K] data
23
+ ├── [4.0K] .cache
24
+ │ └── [4.0K] huggingface
25
+ │ └── [4.0K] download
26
+ │ ├── [ 16] file.parquet.metadata
27
+ │ ├── [ 16] file.txt.metadata
28
+ │ └── [4.0K] folder
29
+ │ └── [ 16] file.parquet.metadata
30
+
31
+ ├── [6.5G] file.parquet
32
+ ├── [1.5K] file.txt
33
+ └── [4.0K] folder
34
+ └── [ 16] file.parquet
35
+
36
+
37
+ Download metadata file structure:
38
+ ```
39
+ # file.txt.metadata
40
+ 11c5a3d5811f50298f278a704980280950aedb10
41
+ a16a55fda99d2f2e7b69cce5cf93ff4ad3049930
42
+ 1712656091.123
43
+
44
+ # file.parquet.metadata
45
+ 11c5a3d5811f50298f278a704980280950aedb10
46
+ 7c5d3f4b8b76583b422fcb9189ad6c89d5d97a094541ce8932dce3ecabde1421
47
+ 1712656091.123
48
+ }
49
+ ```
50
+ """
51
+
52
+ import logging
53
+ import os
54
+ import time
55
+ from dataclasses import dataclass
56
+ from functools import lru_cache
57
+ from pathlib import Path
58
+ from typing import Optional
59
+
60
+ from .utils import WeakFileLock
61
+
62
+
63
+ logger = logging.getLogger(__name__)
64
+
65
+
66
+ @dataclass
67
+ class LocalDownloadFilePaths:
68
+ """
69
+ Paths to the files related to a download process in a local dir.
70
+
71
+ Returned by [`get_local_download_paths`].
72
+
73
+ Attributes:
74
+ file_path (`Path`):
75
+ Path where the file will be saved.
76
+ lock_path (`Path`):
77
+ Path to the lock file used to ensure atomicity when reading/writing metadata.
78
+ metadata_path (`Path`):
79
+ Path to the metadata file.
80
+ """
81
+
82
+ file_path: Path
83
+ lock_path: Path
84
+ metadata_path: Path
85
+
86
+ def incomplete_path(self, etag: str) -> Path:
87
+ """Return the path where a file will be temporarily downloaded before being moved to `file_path`."""
88
+ return self.metadata_path.with_suffix(f".{etag}.incomplete")
89
+
90
+
91
+ @dataclass(frozen=True)
92
+ class LocalUploadFilePaths:
93
+ """
94
+ Paths to the files related to an upload process in a local dir.
95
+
96
+ Returned by [`get_local_upload_paths`].
97
+
98
+ Attributes:
99
+ path_in_repo (`str`):
100
+ Path of the file in the repo.
101
+ file_path (`Path`):
102
+ Path where the file will be saved.
103
+ lock_path (`Path`):
104
+ Path to the lock file used to ensure atomicity when reading/writing metadata.
105
+ metadata_path (`Path`):
106
+ Path to the metadata file.
107
+ """
108
+
109
+ path_in_repo: str
110
+ file_path: Path
111
+ lock_path: Path
112
+ metadata_path: Path
113
+
114
+
115
+ @dataclass
116
+ class LocalDownloadFileMetadata:
117
+ """
118
+ Metadata about a file in the local directory related to a download process.
119
+
120
+ Attributes:
121
+ filename (`str`):
122
+ Path of the file in the repo.
123
+ commit_hash (`str`):
124
+ Commit hash of the file in the repo.
125
+ etag (`str`):
126
+ ETag of the file in the repo. Used to check if the file has changed.
127
+ For LFS files, this is the sha256 of the file. For regular files, it corresponds to the git hash.
128
+ timestamp (`int`):
129
+ Unix timestamp of when the metadata was saved i.e. when the metadata was accurate.
130
+ """
131
+
132
+ filename: str
133
+ commit_hash: str
134
+ etag: str
135
+ timestamp: float
136
+
137
+
138
+ @dataclass
139
+ class LocalUploadFileMetadata:
140
+ """
141
+ Metadata about a file in the local directory related to an upload process.
142
+ """
143
+
144
+ size: int
145
+
146
+ # Default values correspond to "we don't know yet"
147
+ timestamp: Optional[float] = None
148
+ should_ignore: Optional[bool] = None
149
+ sha256: Optional[str] = None
150
+ upload_mode: Optional[str] = None
151
+ is_uploaded: bool = False
152
+ is_committed: bool = False
153
+
154
+ def save(self, paths: LocalUploadFilePaths) -> None:
155
+ """Save the metadata to disk."""
156
+ with WeakFileLock(paths.lock_path):
157
+ with paths.metadata_path.open("w") as f:
158
+ new_timestamp = time.time()
159
+ f.write(str(new_timestamp) + "\n")
160
+
161
+ f.write(str(self.size)) # never None
162
+ f.write("\n")
163
+
164
+ if self.should_ignore is not None:
165
+ f.write(str(int(self.should_ignore)))
166
+ f.write("\n")
167
+
168
+ if self.sha256 is not None:
169
+ f.write(self.sha256)
170
+ f.write("\n")
171
+
172
+ if self.upload_mode is not None:
173
+ f.write(self.upload_mode)
174
+ f.write("\n")
175
+
176
+ f.write(str(int(self.is_uploaded)) + "\n")
177
+ f.write(str(int(self.is_committed)) + "\n")
178
+
179
+ self.timestamp = new_timestamp
180
+
181
+
182
+ @lru_cache(maxsize=128) # ensure singleton
183
+ def get_local_download_paths(local_dir: Path, filename: str) -> LocalDownloadFilePaths:
184
+ """Compute paths to the files related to a download process.
185
+
186
+ Folders containing the paths are all guaranteed to exist.
187
+
188
+ Args:
189
+ local_dir (`Path`):
190
+ Path to the local directory in which files are downloaded.
191
+ filename (`str`):
192
+ Path of the file in the repo.
193
+
194
+ Return:
195
+ [`LocalDownloadFilePaths`]: the paths to the files (file_path, lock_path, metadata_path, incomplete_path).
196
+ """
197
+ # filename is the path in the Hub repository (separated by '/')
198
+ # make sure to have a cross platform transcription
199
+ sanitized_filename = os.path.join(*filename.split("/"))
200
+ if os.name == "nt":
201
+ if sanitized_filename.startswith("..\\") or "\\..\\" in sanitized_filename:
202
+ raise ValueError(
203
+ f"Invalid filename: cannot handle filename '{sanitized_filename}' on Windows. Please ask the repository"
204
+ " owner to rename this file."
205
+ )
206
+ file_path = local_dir / sanitized_filename
207
+ metadata_path = _huggingface_dir(local_dir) / "download" / f"{sanitized_filename}.metadata"
208
+ lock_path = metadata_path.with_suffix(".lock")
209
+
210
+ # Some Windows versions do not allow for paths longer than 255 characters.
211
+ # In this case, we must specify it as an extended path by using the "\\?\" prefix
212
+ if os.name == "nt":
213
+ if not str(local_dir).startswith("\\\\?\\") and len(os.path.abspath(lock_path)) > 255:
214
+ file_path = Path("\\\\?\\" + os.path.abspath(file_path))
215
+ lock_path = Path("\\\\?\\" + os.path.abspath(lock_path))
216
+ metadata_path = Path("\\\\?\\" + os.path.abspath(metadata_path))
217
+
218
+ file_path.parent.mkdir(parents=True, exist_ok=True)
219
+ metadata_path.parent.mkdir(parents=True, exist_ok=True)
220
+ return LocalDownloadFilePaths(file_path=file_path, lock_path=lock_path, metadata_path=metadata_path)
221
+
222
+
223
+ @lru_cache(maxsize=128) # ensure singleton
224
+ def get_local_upload_paths(local_dir: Path, filename: str) -> LocalUploadFilePaths:
225
+ """Compute paths to the files related to an upload process.
226
+
227
+ Folders containing the paths are all guaranteed to exist.
228
+
229
+ Args:
230
+ local_dir (`Path`):
231
+ Path to the local directory that is uploaded.
232
+ filename (`str`):
233
+ Path of the file in the repo.
234
+
235
+ Return:
236
+ [`LocalUploadFilePaths`]: the paths to the files (file_path, lock_path, metadata_path).
237
+ """
238
+ # filename is the path in the Hub repository (separated by '/')
239
+ # make sure to have a cross platform transcription
240
+ sanitized_filename = os.path.join(*filename.split("/"))
241
+ if os.name == "nt":
242
+ if sanitized_filename.startswith("..\\") or "\\..\\" in sanitized_filename:
243
+ raise ValueError(
244
+ f"Invalid filename: cannot handle filename '{sanitized_filename}' on Windows. Please ask the repository"
245
+ " owner to rename this file."
246
+ )
247
+ file_path = local_dir / sanitized_filename
248
+ metadata_path = _huggingface_dir(local_dir) / "upload" / f"{sanitized_filename}.metadata"
249
+ lock_path = metadata_path.with_suffix(".lock")
250
+
251
+ # Some Windows versions do not allow for paths longer than 255 characters.
252
+ # In this case, we must specify it as an extended path by using the "\\?\" prefix
253
+ if os.name == "nt":
254
+ if not str(local_dir).startswith("\\\\?\\") and len(os.path.abspath(lock_path)) > 255:
255
+ file_path = Path("\\\\?\\" + os.path.abspath(file_path))
256
+ lock_path = Path("\\\\?\\" + os.path.abspath(lock_path))
257
+ metadata_path = Path("\\\\?\\" + os.path.abspath(metadata_path))
258
+
259
+ file_path.parent.mkdir(parents=True, exist_ok=True)
260
+ metadata_path.parent.mkdir(parents=True, exist_ok=True)
261
+ return LocalUploadFilePaths(
262
+ path_in_repo=filename, file_path=file_path, lock_path=lock_path, metadata_path=metadata_path
263
+ )
264
+
265
+
266
+ def read_download_metadata(local_dir: Path, filename: str) -> Optional[LocalDownloadFileMetadata]:
267
+ """Read metadata about a file in the local directory related to a download process.
268
+
269
+ Args:
270
+ local_dir (`Path`):
271
+ Path to the local directory in which files are downloaded.
272
+ filename (`str`):
273
+ Path of the file in the repo.
274
+
275
+ Return:
276
+ `[LocalDownloadFileMetadata]` or `None`: the metadata if it exists, `None` otherwise.
277
+ """
278
+ paths = get_local_download_paths(local_dir, filename)
279
+ with WeakFileLock(paths.lock_path):
280
+ if paths.metadata_path.exists():
281
+ try:
282
+ with paths.metadata_path.open() as f:
283
+ commit_hash = f.readline().strip()
284
+ etag = f.readline().strip()
285
+ timestamp = float(f.readline().strip())
286
+ metadata = LocalDownloadFileMetadata(
287
+ filename=filename,
288
+ commit_hash=commit_hash,
289
+ etag=etag,
290
+ timestamp=timestamp,
291
+ )
292
+ except Exception as e:
293
+ # remove the metadata file if it is corrupted / not the right format
294
+ logger.warning(
295
+ f"Invalid metadata file {paths.metadata_path}: {e}. Removing it from disk and continue."
296
+ )
297
+ try:
298
+ paths.metadata_path.unlink()
299
+ except Exception as e:
300
+ logger.warning(f"Could not remove corrupted metadata file {paths.metadata_path}: {e}")
301
+
302
+ try:
303
+ # check if the file exists and hasn't been modified since the metadata was saved
304
+ stat = paths.file_path.stat()
305
+ if (
306
+ stat.st_mtime - 1 <= metadata.timestamp
307
+ ): # allow 1s difference as stat.st_mtime might not be precise
308
+ return metadata
309
+ logger.info(f"Ignored metadata for '{filename}' (outdated). Will re-compute hash.")
310
+ except FileNotFoundError:
311
+ # file does not exist => metadata is outdated
312
+ return None
313
+ return None
314
+
315
+
316
+ def read_upload_metadata(local_dir: Path, filename: str) -> LocalUploadFileMetadata:
317
+ """Read metadata about a file in the local directory related to an upload process.
318
+
319
+ TODO: factorize logic with `read_download_metadata`.
320
+
321
+ Args:
322
+ local_dir (`Path`):
323
+ Path to the local directory in which files are downloaded.
324
+ filename (`str`):
325
+ Path of the file in the repo.
326
+
327
+ Return:
328
+ `[LocalUploadFileMetadata]` or `None`: the metadata if it exists, `None` otherwise.
329
+ """
330
+ paths = get_local_upload_paths(local_dir, filename)
331
+ with WeakFileLock(paths.lock_path):
332
+ if paths.metadata_path.exists():
333
+ try:
334
+ with paths.metadata_path.open() as f:
335
+ timestamp = float(f.readline().strip())
336
+
337
+ size = int(f.readline().strip()) # never None
338
+
339
+ _should_ignore = f.readline().strip()
340
+ should_ignore = None if _should_ignore == "" else bool(int(_should_ignore))
341
+
342
+ _sha256 = f.readline().strip()
343
+ sha256 = None if _sha256 == "" else _sha256
344
+
345
+ _upload_mode = f.readline().strip()
346
+ upload_mode = None if _upload_mode == "" else _upload_mode
347
+ if upload_mode not in (None, "regular", "lfs"):
348
+ raise ValueError(f"Invalid upload mode in metadata {paths.path_in_repo}: {upload_mode}")
349
+
350
+ is_uploaded = bool(int(f.readline().strip()))
351
+ is_committed = bool(int(f.readline().strip()))
352
+
353
+ metadata = LocalUploadFileMetadata(
354
+ timestamp=timestamp,
355
+ size=size,
356
+ should_ignore=should_ignore,
357
+ sha256=sha256,
358
+ upload_mode=upload_mode,
359
+ is_uploaded=is_uploaded,
360
+ is_committed=is_committed,
361
+ )
362
+ except Exception as e:
363
+ # remove the metadata file if it is corrupted / not the right format
364
+ logger.warning(
365
+ f"Invalid metadata file {paths.metadata_path}: {e}. Removing it from disk and continue."
366
+ )
367
+ try:
368
+ paths.metadata_path.unlink()
369
+ except Exception as e:
370
+ logger.warning(f"Could not remove corrupted metadata file {paths.metadata_path}: {e}")
371
+
372
+ # TODO: can we do better?
373
+ if (
374
+ metadata.timestamp is not None
375
+ and metadata.is_uploaded # file was uploaded
376
+ and not metadata.is_committed # but not committed
377
+ and time.time() - metadata.timestamp > 20 * 3600 # and it's been more than 20 hours
378
+ ): # => we consider it as garbage-collected by S3
379
+ metadata.is_uploaded = False
380
+
381
+ # check if the file exists and hasn't been modified since the metadata was saved
382
+ try:
383
+ if metadata.timestamp is not None and paths.file_path.stat().st_mtime <= metadata.timestamp:
384
+ return metadata
385
+ logger.info(f"Ignored metadata for '{filename}' (outdated). Will re-compute hash.")
386
+ except FileNotFoundError:
387
+ # file does not exist => metadata is outdated
388
+ pass
389
+
390
+ # empty metadata => we don't know anything expect its size
391
+ return LocalUploadFileMetadata(size=paths.file_path.stat().st_size)
392
+
393
+
394
+ def write_download_metadata(local_dir: Path, filename: str, commit_hash: str, etag: str) -> None:
395
+ """Write metadata about a file in the local directory related to a download process.
396
+
397
+ Args:
398
+ local_dir (`Path`):
399
+ Path to the local directory in which files are downloaded.
400
+ """
401
+ paths = get_local_download_paths(local_dir, filename)
402
+ with WeakFileLock(paths.lock_path):
403
+ with paths.metadata_path.open("w") as f:
404
+ f.write(f"{commit_hash}\n{etag}\n{time.time()}\n")
405
+
406
+
407
+ @lru_cache()
408
+ def _huggingface_dir(local_dir: Path) -> Path:
409
+ """Return the path to the `.cache/huggingface` directory in a local directory."""
410
+ # Wrap in lru_cache to avoid overwriting the .gitignore file if called multiple times
411
+ path = local_dir / ".cache" / "huggingface"
412
+ path.mkdir(exist_ok=True, parents=True)
413
+
414
+ # Create a .gitignore file in the .cache/huggingface directory if it doesn't exist
415
+ # Should be thread-safe enough like this.
416
+ gitignore = path / ".gitignore"
417
+ gitignore_lock = path / ".gitignore.lock"
418
+ if not gitignore.exists():
419
+ try:
420
+ with WeakFileLock(gitignore_lock):
421
+ gitignore.write_text("*")
422
+ gitignore_lock.unlink()
423
+ except OSError: # FileNotFoundError, PermissionError, etc.
424
+ pass
425
+ return path
huggingface_hub/_login.py ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains methods to log in to the Hub."""
15
+
16
+ import os
17
+ import subprocess
18
+ from functools import partial
19
+ from getpass import getpass
20
+ from pathlib import Path
21
+ from typing import Optional
22
+
23
+ from . import constants
24
+ from .commands._cli_utils import ANSI
25
+ from .utils import (
26
+ capture_output,
27
+ get_token,
28
+ is_google_colab,
29
+ is_notebook,
30
+ list_credential_helpers,
31
+ logging,
32
+ run_subprocess,
33
+ set_git_credential,
34
+ unset_git_credential,
35
+ )
36
+ from .utils._token import _get_token_from_environment, _get_token_from_google_colab
37
+
38
+
39
+ logger = logging.get_logger(__name__)
40
+
41
+ _HF_LOGO_ASCII = """
42
+ _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|
43
+ _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|
44
+ _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|
45
+ _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|
46
+ _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|
47
+ """
48
+
49
+
50
+ def login(
51
+ token: Optional[str] = None,
52
+ add_to_git_credential: bool = False,
53
+ new_session: bool = True,
54
+ write_permission: bool = False,
55
+ ) -> None:
56
+ """Login the machine to access the Hub.
57
+
58
+ The `token` is persisted in cache and set as a git credential. Once done, the machine
59
+ is logged in and the access token will be available across all `huggingface_hub`
60
+ components. If `token` is not provided, it will be prompted to the user either with
61
+ a widget (in a notebook) or via the terminal.
62
+
63
+ To log in from outside of a script, one can also use `huggingface-cli login` which is
64
+ a cli command that wraps [`login`].
65
+
66
+ <Tip>
67
+
68
+ [`login`] is a drop-in replacement method for [`notebook_login`] as it wraps and
69
+ extends its capabilities.
70
+
71
+ </Tip>
72
+
73
+ <Tip>
74
+
75
+ When the token is not passed, [`login`] will automatically detect if the script runs
76
+ in a notebook or not. However, this detection might not be accurate due to the
77
+ variety of notebooks that exists nowadays. If that is the case, you can always force
78
+ the UI by using [`notebook_login`] or [`interpreter_login`].
79
+
80
+ </Tip>
81
+
82
+ Args:
83
+ token (`str`, *optional*):
84
+ User access token to generate from https://huggingface.co/settings/token.
85
+ add_to_git_credential (`bool`, defaults to `False`):
86
+ If `True`, token will be set as git credential. If no git credential helper
87
+ is configured, a warning will be displayed to the user. If `token` is `None`,
88
+ the value of `add_to_git_credential` is ignored and will be prompted again
89
+ to the end user.
90
+ new_session (`bool`, defaults to `True`):
91
+ If `True`, will request a token even if one is already saved on the machine.
92
+ write_permission (`bool`, defaults to `False`):
93
+ If `True`, requires a token with write permission.
94
+ Raises:
95
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
96
+ If an organization token is passed. Only personal account tokens are valid
97
+ to log in.
98
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
99
+ If token is invalid.
100
+ [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError)
101
+ If running in a notebook but `ipywidgets` is not installed.
102
+ """
103
+ if token is not None:
104
+ if not add_to_git_credential:
105
+ print(
106
+ "The token has not been saved to the git credentials helper. Pass "
107
+ "`add_to_git_credential=True` in this function directly or "
108
+ "`--add-to-git-credential` if using via `huggingface-cli` if "
109
+ "you want to set the git credential as well."
110
+ )
111
+ _login(token, add_to_git_credential=add_to_git_credential, write_permission=write_permission)
112
+ elif is_notebook():
113
+ notebook_login(new_session=new_session, write_permission=write_permission)
114
+ else:
115
+ interpreter_login(new_session=new_session, write_permission=write_permission)
116
+
117
+
118
+ def logout() -> None:
119
+ """Logout the machine from the Hub.
120
+
121
+ Token is deleted from the machine and removed from git credential.
122
+ """
123
+ if get_token() is None:
124
+ print("Not logged in!")
125
+ return
126
+
127
+ # Delete token from git credentials
128
+ unset_git_credential()
129
+
130
+ # Delete token file
131
+ try:
132
+ Path(constants.HF_TOKEN_PATH).unlink()
133
+ except FileNotFoundError:
134
+ pass
135
+
136
+ # Check if still logged in
137
+ if _get_token_from_google_colab() is not None:
138
+ raise EnvironmentError(
139
+ "You are automatically logged in using a Google Colab secret.\n"
140
+ "To log out, you must unset the `HF_TOKEN` secret in your Colab settings."
141
+ )
142
+ if _get_token_from_environment() is not None:
143
+ raise EnvironmentError(
144
+ "Token has been deleted from your machine but you are still logged in.\n"
145
+ "To log out, you must clear out both `HF_TOKEN` and `HUGGING_FACE_HUB_TOKEN` environment variables."
146
+ )
147
+
148
+ print("Successfully logged out.")
149
+
150
+
151
+ ###
152
+ # Interpreter-based login (text)
153
+ ###
154
+
155
+
156
+ def interpreter_login(new_session: bool = True, write_permission: bool = False) -> None:
157
+ """
158
+ Displays a prompt to log in to the HF website and store the token.
159
+
160
+ This is equivalent to [`login`] without passing a token when not run in a notebook.
161
+ [`interpreter_login`] is useful if you want to force the use of the terminal prompt
162
+ instead of a notebook widget.
163
+
164
+ For more details, see [`login`].
165
+
166
+ Args:
167
+ new_session (`bool`, defaults to `True`):
168
+ If `True`, will request a token even if one is already saved on the machine.
169
+ write_permission (`bool`, defaults to `False`):
170
+ If `True`, requires a token with write permission.
171
+
172
+ """
173
+ if not new_session and _current_token_okay(write_permission=write_permission):
174
+ print("User is already logged in.")
175
+ return
176
+
177
+ from .commands.delete_cache import _ask_for_confirmation_no_tui
178
+
179
+ print(_HF_LOGO_ASCII)
180
+ if get_token() is not None:
181
+ print(
182
+ " A token is already saved on your machine. Run `huggingface-cli"
183
+ " whoami` to get more information or `huggingface-cli logout` if you want"
184
+ " to log out."
185
+ )
186
+ print(" Setting a new token will erase the existing one.")
187
+
188
+ print(" To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .")
189
+ if os.name == "nt":
190
+ print("Token can be pasted using 'Right-Click'.")
191
+ token = getpass("Enter your token (input will not be visible): ")
192
+ add_to_git_credential = _ask_for_confirmation_no_tui("Add token as git credential?")
193
+
194
+ _login(token=token, add_to_git_credential=add_to_git_credential, write_permission=write_permission)
195
+
196
+
197
+ ###
198
+ # Notebook-based login (widget)
199
+ ###
200
+
201
+ NOTEBOOK_LOGIN_PASSWORD_HTML = """<center> <img
202
+ src=https://huggingface.co/front/assets/huggingface_logo-noborder.svg
203
+ alt='Hugging Face'> <br> Immediately click login after typing your password or
204
+ it might be stored in plain text in this notebook file. </center>"""
205
+
206
+
207
+ NOTEBOOK_LOGIN_TOKEN_HTML_START = """<center> <img
208
+ src=https://huggingface.co/front/assets/huggingface_logo-noborder.svg
209
+ alt='Hugging Face'> <br> Copy a token from <a
210
+ href="https://huggingface.co/settings/tokens" target="_blank">your Hugging Face
211
+ tokens page</a> and paste it below. <br> Immediately click login after copying
212
+ your token or it might be stored in plain text in this notebook file. </center>"""
213
+
214
+
215
+ NOTEBOOK_LOGIN_TOKEN_HTML_END = """
216
+ <b>Pro Tip:</b> If you don't already have one, you can create a dedicated
217
+ 'notebooks' token with 'write' access, that you can then easily reuse for all
218
+ notebooks. </center>"""
219
+
220
+
221
+ def notebook_login(new_session: bool = True, write_permission: bool = False) -> None:
222
+ """
223
+ Displays a widget to log in to the HF website and store the token.
224
+
225
+ This is equivalent to [`login`] without passing a token when run in a notebook.
226
+ [`notebook_login`] is useful if you want to force the use of the notebook widget
227
+ instead of a prompt in the terminal.
228
+
229
+ For more details, see [`login`].
230
+
231
+ Args:
232
+ new_session (`bool`, defaults to `True`):
233
+ If `True`, will request a token even if one is already saved on the machine.
234
+ write_permission (`bool`, defaults to `False`):
235
+ If `True`, requires a token with write permission.
236
+ """
237
+ try:
238
+ import ipywidgets.widgets as widgets # type: ignore
239
+ from IPython.display import display # type: ignore
240
+ except ImportError:
241
+ raise ImportError(
242
+ "The `notebook_login` function can only be used in a notebook (Jupyter or"
243
+ " Colab) and you need the `ipywidgets` module: `pip install ipywidgets`."
244
+ )
245
+ if not new_session and _current_token_okay(write_permission=write_permission):
246
+ print("User is already logged in.")
247
+ return
248
+
249
+ box_layout = widgets.Layout(display="flex", flex_flow="column", align_items="center", width="50%")
250
+
251
+ token_widget = widgets.Password(description="Token:")
252
+ git_checkbox_widget = widgets.Checkbox(value=True, description="Add token as git credential?")
253
+ token_finish_button = widgets.Button(description="Login")
254
+
255
+ login_token_widget = widgets.VBox(
256
+ [
257
+ widgets.HTML(NOTEBOOK_LOGIN_TOKEN_HTML_START),
258
+ token_widget,
259
+ git_checkbox_widget,
260
+ token_finish_button,
261
+ widgets.HTML(NOTEBOOK_LOGIN_TOKEN_HTML_END),
262
+ ],
263
+ layout=box_layout,
264
+ )
265
+ display(login_token_widget)
266
+
267
+ # On click events
268
+ def login_token_event(t, write_permission: bool = False):
269
+ """
270
+ Event handler for the login button.
271
+
272
+ Args:
273
+ write_permission (`bool`, defaults to `False`):
274
+ If `True`, requires a token with write permission.
275
+ """
276
+ token = token_widget.value
277
+ add_to_git_credential = git_checkbox_widget.value
278
+ # Erase token and clear value to make sure it's not saved in the notebook.
279
+ token_widget.value = ""
280
+ # Hide inputs
281
+ login_token_widget.children = [widgets.Label("Connecting...")]
282
+ try:
283
+ with capture_output() as captured:
284
+ _login(token, add_to_git_credential=add_to_git_credential, write_permission=write_permission)
285
+ message = captured.getvalue()
286
+ except Exception as error:
287
+ message = str(error)
288
+ # Print result (success message or error)
289
+ login_token_widget.children = [widgets.Label(line) for line in message.split("\n") if line.strip()]
290
+
291
+ token_finish_button.on_click(partial(login_token_event, write_permission=write_permission))
292
+
293
+
294
+ ###
295
+ # Login private helpers
296
+ ###
297
+
298
+
299
+ def _login(token: str, add_to_git_credential: bool, write_permission: bool = False) -> None:
300
+ from .hf_api import get_token_permission # avoid circular import
301
+
302
+ if token.startswith("api_org"):
303
+ raise ValueError("You must use your personal account token, not an organization token.")
304
+
305
+ permission = get_token_permission(token)
306
+ if permission is None:
307
+ raise ValueError("Invalid token passed!")
308
+ elif write_permission and permission != "write":
309
+ raise ValueError(
310
+ "Token is valid but is 'read-only' and a 'write' token is required.\nPlease provide a new token with"
311
+ " correct permission."
312
+ )
313
+ print(f"Token is valid (permission: {permission}).")
314
+
315
+ if add_to_git_credential:
316
+ if _is_git_credential_helper_configured():
317
+ set_git_credential(token)
318
+ print(
319
+ "Your token has been saved in your configured git credential helpers"
320
+ + f" ({','.join(list_credential_helpers())})."
321
+ )
322
+ else:
323
+ print("Token has not been saved to git credential helper.")
324
+
325
+ # Save token
326
+ path = Path(constants.HF_TOKEN_PATH)
327
+ path.parent.mkdir(parents=True, exist_ok=True)
328
+ path.write_text(token)
329
+ print(f"Your token has been saved to {constants.HF_TOKEN_PATH}")
330
+ print("Login successful")
331
+
332
+
333
+ def _current_token_okay(write_permission: bool = False):
334
+ """Check if the current token is valid.
335
+
336
+ Args:
337
+ write_permission (`bool`, defaults to `False`):
338
+ If `True`, requires a token with write permission.
339
+
340
+ Returns:
341
+ `bool`: `True` if the current token is valid, `False` otherwise.
342
+ """
343
+ from .hf_api import get_token_permission # avoid circular import
344
+
345
+ permission = get_token_permission()
346
+ if permission is None or (write_permission and permission != "write"):
347
+ return False
348
+ return True
349
+
350
+
351
+ def _is_git_credential_helper_configured() -> bool:
352
+ """Check if a git credential helper is configured.
353
+
354
+ Warns user if not the case (except for Google Colab where "store" is set by default
355
+ by `huggingface_hub`).
356
+ """
357
+ helpers = list_credential_helpers()
358
+ if len(helpers) > 0:
359
+ return True # Do not warn: at least 1 helper is set
360
+
361
+ # Only in Google Colab to avoid the warning message
362
+ # See https://github.com/huggingface/huggingface_hub/issues/1043#issuecomment-1247010710
363
+ if is_google_colab():
364
+ _set_store_as_git_credential_helper_globally()
365
+ return True # Do not warn: "store" is used by default in Google Colab
366
+
367
+ # Otherwise, warn user
368
+ print(
369
+ ANSI.red(
370
+ "Cannot authenticate through git-credential as no helper is defined on your"
371
+ " machine.\nYou might have to re-authenticate when pushing to the Hugging"
372
+ " Face Hub.\nRun the following command in your terminal in case you want to"
373
+ " set the 'store' credential helper as default.\n\ngit config --global"
374
+ " credential.helper store\n\nRead"
375
+ " https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more"
376
+ " details."
377
+ )
378
+ )
379
+ return False
380
+
381
+
382
+ def _set_store_as_git_credential_helper_globally() -> None:
383
+ """Set globally the credential.helper to `store`.
384
+
385
+ To be used only in Google Colab as we assume the user doesn't care about the git
386
+ credential config. It is the only particular case where we don't want to display the
387
+ warning message in [`notebook_login()`].
388
+
389
+ Related:
390
+ - https://github.com/huggingface/huggingface_hub/issues/1043
391
+ - https://github.com/huggingface/huggingface_hub/issues/1051
392
+ - https://git-scm.com/docs/git-credential-store
393
+ """
394
+ try:
395
+ run_subprocess("git config --global credential.helper store")
396
+ except subprocess.CalledProcessError as exc:
397
+ raise EnvironmentError(exc.stderr)
huggingface_hub/_multi_commits.py ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2023-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to multi-commits (i.e. push changes iteratively on a PR)."""
16
+
17
+ import re
18
+ from dataclasses import dataclass, field
19
+ from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple, Union
20
+
21
+ from ._commit_api import CommitOperationAdd, CommitOperationDelete
22
+ from .community import DiscussionWithDetails
23
+ from .utils import experimental
24
+ from .utils._cache_manager import _format_size
25
+ from .utils.insecure_hashlib import sha256
26
+
27
+
28
+ if TYPE_CHECKING:
29
+ from .hf_api import HfApi
30
+
31
+
32
+ class MultiCommitException(Exception):
33
+ """Base exception for any exception happening while doing a multi-commit."""
34
+
35
+
36
+ MULTI_COMMIT_PR_DESCRIPTION_TEMPLATE = """
37
+ ## {commit_message}
38
+
39
+ {commit_description}
40
+
41
+ **Multi commit ID:** {multi_commit_id}
42
+
43
+ Scheduled commits:
44
+
45
+ {multi_commit_strategy}
46
+
47
+ _This is a PR opened using the `huggingface_hub` library in the context of a multi-commit. PR can be commented as a usual PR. However, please be aware that manually updating the PR description, changing the PR status, or pushing new commits, is not recommended as it might corrupt the commit process. Learn more about multi-commits [in this guide](https://huggingface.co/docs/huggingface_hub/main/guides/upload)._
48
+ """
49
+
50
+ MULTI_COMMIT_PR_COMPLETION_COMMENT_TEMPLATE = """
51
+ Multi-commit is now completed! You can ping the repo owner to review the changes. This PR can now be commented or modified without risking to corrupt it.
52
+
53
+ _This is a comment posted using the `huggingface_hub` library in the context of a multi-commit. Learn more about multi-commits [in this guide](https://huggingface.co/docs/huggingface_hub/main/guides/upload)._
54
+ """
55
+
56
+ MULTI_COMMIT_PR_CLOSING_COMMENT_TEMPLATE = """
57
+ `create_pr=False` has been passed so PR is automatically merged.
58
+
59
+ _This is a comment posted using the `huggingface_hub` library in the context of a multi-commit. Learn more about multi-commits [in this guide](https://huggingface.co/docs/huggingface_hub/main/guides/upload)._
60
+ """
61
+
62
+ MULTI_COMMIT_PR_CLOSE_COMMENT_FAILURE_NO_CHANGES_TEMPLATE = """
63
+ Cannot merge Pull Requests as no changes are associated. This PR will be closed automatically.
64
+
65
+ _This is a comment posted using the `huggingface_hub` library in the context of a multi-commit. Learn more about multi-commits [in this guide](https://huggingface.co/docs/huggingface_hub/main/guides/upload)._
66
+ """
67
+
68
+ MULTI_COMMIT_PR_CLOSE_COMMENT_FAILURE_BAD_REQUEST_TEMPLATE = """
69
+ An error occurred while trying to merge the Pull Request: `{error_message}`.
70
+
71
+ _This is a comment posted using the `huggingface_hub` library in the context of a multi-commit. Learn more about multi-commits [in this guide](https://huggingface.co/docs/huggingface_hub/main/guides/upload)._
72
+ """
73
+
74
+
75
+ STEP_ID_REGEX = re.compile(r"- \[(?P<completed>[ |x])\].*(?P<step_id>[a-fA-F0-9]{64})", flags=re.MULTILINE)
76
+
77
+
78
+ @experimental
79
+ def plan_multi_commits(
80
+ operations: Iterable[Union[CommitOperationAdd, CommitOperationDelete]],
81
+ max_operations_per_commit: int = 50,
82
+ max_upload_size_per_commit: int = 2 * 1024 * 1024 * 1024,
83
+ ) -> Tuple[List[List[CommitOperationAdd]], List[List[CommitOperationDelete]]]:
84
+ """Split a list of operations in a list of commits to perform.
85
+
86
+ Implementation follows a sub-optimal (yet simple) algorithm:
87
+ 1. Delete operations are grouped together by commits of maximum `max_operations_per_commits` operations.
88
+ 2. All additions exceeding `max_upload_size_per_commit` are committed 1 by 1.
89
+ 3. All remaining additions are grouped together and split each time the `max_operations_per_commit` or the
90
+ `max_upload_size_per_commit` limit is reached.
91
+
92
+ We do not try to optimize the splitting to get the lowest number of commits as this is a NP-hard problem (see
93
+ [bin packing problem](https://en.wikipedia.org/wiki/Bin_packing_problem)). For our use case, it is not problematic
94
+ to use a sub-optimal solution so we favored an easy-to-explain implementation.
95
+
96
+ Args:
97
+ operations (`List` of [`~hf_api.CommitOperation`]):
98
+ The list of operations to split into commits.
99
+ max_operations_per_commit (`int`):
100
+ Maximum number of operations in a single commit. Defaults to 50.
101
+ max_upload_size_per_commit (`int`):
102
+ Maximum size to upload (in bytes) in a single commit. Defaults to 2GB. Files bigger than this limit are
103
+ uploaded, 1 per commit.
104
+
105
+ Returns:
106
+ `Tuple[List[List[CommitOperationAdd]], List[List[CommitOperationDelete]]]`: a tuple. First item is a list of
107
+ lists of [`CommitOperationAdd`] representing the addition commits to push. The second item is a list of lists
108
+ of [`CommitOperationDelete`] representing the deletion commits.
109
+
110
+ <Tip warning={true}>
111
+
112
+ `plan_multi_commits` is experimental. Its API and behavior is subject to change in the future without prior notice.
113
+
114
+ </Tip>
115
+
116
+ Example:
117
+ ```python
118
+ >>> from huggingface_hub import HfApi, plan_multi_commits
119
+ >>> addition_commits, deletion_commits = plan_multi_commits(
120
+ ... operations=[
121
+ ... CommitOperationAdd(...),
122
+ ... CommitOperationAdd(...),
123
+ ... CommitOperationDelete(...),
124
+ ... CommitOperationDelete(...),
125
+ ... CommitOperationAdd(...),
126
+ ... ],
127
+ ... )
128
+ >>> HfApi().create_commits_on_pr(
129
+ ... repo_id="my-cool-model",
130
+ ... addition_commits=addition_commits,
131
+ ... deletion_commits=deletion_commits,
132
+ ... (...)
133
+ ... verbose=True,
134
+ ... )
135
+ ```
136
+
137
+ <Tip warning={true}>
138
+
139
+ The initial order of the operations is not guaranteed! All deletions will be performed before additions. If you are
140
+ not updating multiple times the same file, you are fine.
141
+
142
+ </Tip>
143
+ """
144
+ addition_commits: List[List[CommitOperationAdd]] = []
145
+ deletion_commits: List[List[CommitOperationDelete]] = []
146
+
147
+ additions: List[CommitOperationAdd] = []
148
+ additions_size = 0
149
+ deletions: List[CommitOperationDelete] = []
150
+ for op in operations:
151
+ if isinstance(op, CommitOperationDelete):
152
+ # Group delete operations together
153
+ deletions.append(op)
154
+ if len(deletions) >= max_operations_per_commit:
155
+ deletion_commits.append(deletions)
156
+ deletions = []
157
+
158
+ elif op.upload_info.size >= max_upload_size_per_commit:
159
+ # Upload huge files 1 by 1
160
+ addition_commits.append([op])
161
+
162
+ elif additions_size + op.upload_info.size < max_upload_size_per_commit:
163
+ # Group other additions and split if size limit is reached (either max_nb_files or max_upload_size)
164
+ additions.append(op)
165
+ additions_size += op.upload_info.size
166
+
167
+ else:
168
+ addition_commits.append(additions)
169
+ additions = [op]
170
+ additions_size = op.upload_info.size
171
+
172
+ if len(additions) >= max_operations_per_commit:
173
+ addition_commits.append(additions)
174
+ additions = []
175
+ additions_size = 0
176
+
177
+ if len(additions) > 0:
178
+ addition_commits.append(additions)
179
+ if len(deletions) > 0:
180
+ deletion_commits.append(deletions)
181
+
182
+ return addition_commits, deletion_commits
183
+
184
+
185
+ @dataclass
186
+ class MultiCommitStep:
187
+ """Dataclass containing a list of CommitOperation to commit at once.
188
+
189
+ A [`MultiCommitStep`] is one atomic part of a [`MultiCommitStrategy`]. Each step is identified by its own
190
+ deterministic ID based on the list of commit operations (hexadecimal sha256). ID is persistent between re-runs if
191
+ the list of commits is kept the same.
192
+ """
193
+
194
+ operations: List[Union[CommitOperationAdd, CommitOperationDelete]]
195
+
196
+ id: str = field(init=False)
197
+ completed: bool = False
198
+
199
+ def __post_init__(self) -> None:
200
+ if len(self.operations) == 0:
201
+ raise ValueError("A MultiCommitStep must have at least 1 commit operation, got 0.")
202
+
203
+ # Generate commit id
204
+ sha = sha256()
205
+ for op in self.operations:
206
+ if isinstance(op, CommitOperationAdd):
207
+ sha.update(b"ADD")
208
+ sha.update(op.path_in_repo.encode())
209
+ sha.update(op.upload_info.sha256)
210
+ elif isinstance(op, CommitOperationDelete):
211
+ sha.update(b"DELETE")
212
+ sha.update(op.path_in_repo.encode())
213
+ sha.update(str(op.is_folder).encode())
214
+ else:
215
+ NotImplementedError()
216
+ self.id = sha.hexdigest()
217
+
218
+ def __str__(self) -> str:
219
+ """Format a step for PR description.
220
+
221
+ Formatting can be changed in the future as long as it is single line, starts with `- [ ]`/`- [x]` and contains
222
+ `self.id`. Must be able to match `STEP_ID_REGEX`.
223
+ """
224
+ additions = [op for op in self.operations if isinstance(op, CommitOperationAdd)]
225
+ file_deletions = [op for op in self.operations if isinstance(op, CommitOperationDelete) and not op.is_folder]
226
+ folder_deletions = [op for op in self.operations if isinstance(op, CommitOperationDelete) and op.is_folder]
227
+ if len(additions) > 0:
228
+ return (
229
+ f"- [{'x' if self.completed else ' '}] Upload {len(additions)} file(s) "
230
+ f"totalling {_format_size(sum(add.upload_info.size for add in additions))}"
231
+ f" ({self.id})"
232
+ )
233
+ else:
234
+ return (
235
+ f"- [{'x' if self.completed else ' '}] Delete {len(file_deletions)} file(s) and"
236
+ f" {len(folder_deletions)} folder(s) ({self.id})"
237
+ )
238
+
239
+
240
+ @dataclass
241
+ class MultiCommitStrategy:
242
+ """Dataclass containing a list of [`MultiCommitStep`] to commit iteratively.
243
+
244
+ A strategy is identified by its own deterministic ID based on the list of its steps (hexadecimal sha256). ID is
245
+ persistent between re-runs if the list of commits is kept the same.
246
+ """
247
+
248
+ addition_commits: List[MultiCommitStep]
249
+ deletion_commits: List[MultiCommitStep]
250
+
251
+ id: str = field(init=False)
252
+ all_steps: Set[str] = field(init=False)
253
+
254
+ def __post_init__(self) -> None:
255
+ self.all_steps = {step.id for step in self.addition_commits + self.deletion_commits}
256
+ if len(self.all_steps) < len(self.addition_commits) + len(self.deletion_commits):
257
+ raise ValueError("Got duplicate commits in MultiCommitStrategy. All commits must be unique.")
258
+
259
+ if len(self.all_steps) == 0:
260
+ raise ValueError("A MultiCommitStrategy must have at least 1 commit, got 0.")
261
+
262
+ # Generate strategy id
263
+ sha = sha256()
264
+ for step in self.addition_commits + self.deletion_commits:
265
+ sha.update("new step".encode())
266
+ sha.update(step.id.encode())
267
+ self.id = sha.hexdigest()
268
+
269
+
270
+ def multi_commit_create_pull_request(
271
+ api: "HfApi",
272
+ repo_id: str,
273
+ commit_message: str,
274
+ commit_description: Optional[str],
275
+ strategy: MultiCommitStrategy,
276
+ repo_type: Optional[str],
277
+ token: Union[str, bool, None] = None,
278
+ ) -> DiscussionWithDetails:
279
+ return api.create_pull_request(
280
+ repo_id=repo_id,
281
+ title=f"[WIP] {commit_message} (multi-commit {strategy.id})",
282
+ description=multi_commit_generate_comment(
283
+ commit_message=commit_message, commit_description=commit_description, strategy=strategy
284
+ ),
285
+ token=token,
286
+ repo_type=repo_type,
287
+ )
288
+
289
+
290
+ def multi_commit_generate_comment(
291
+ commit_message: str,
292
+ commit_description: Optional[str],
293
+ strategy: MultiCommitStrategy,
294
+ ) -> str:
295
+ return MULTI_COMMIT_PR_DESCRIPTION_TEMPLATE.format(
296
+ commit_message=commit_message,
297
+ commit_description=commit_description or "",
298
+ multi_commit_id=strategy.id,
299
+ multi_commit_strategy="\n".join(
300
+ str(commit) for commit in strategy.deletion_commits + strategy.addition_commits
301
+ ),
302
+ )
303
+
304
+
305
+ def multi_commit_parse_pr_description(description: str) -> Set[str]:
306
+ return {match[1] for match in STEP_ID_REGEX.findall(description)}
huggingface_hub/_snapshot_download.py ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Dict, List, Literal, Optional, Union
4
+
5
+ import requests
6
+ from tqdm.auto import tqdm as base_tqdm
7
+ from tqdm.contrib.concurrent import thread_map
8
+
9
+ from . import constants
10
+ from .errors import GatedRepoError, LocalEntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError
11
+ from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name
12
+ from .hf_api import DatasetInfo, HfApi, ModelInfo, SpaceInfo
13
+ from .utils import OfflineModeIsEnabled, filter_repo_objects, logging, validate_hf_hub_args
14
+ from .utils import tqdm as hf_tqdm
15
+
16
+
17
+ logger = logging.get_logger(__name__)
18
+
19
+
20
+ @validate_hf_hub_args
21
+ def snapshot_download(
22
+ repo_id: str,
23
+ *,
24
+ repo_type: Optional[str] = None,
25
+ revision: Optional[str] = None,
26
+ cache_dir: Union[str, Path, None] = None,
27
+ local_dir: Union[str, Path, None] = None,
28
+ library_name: Optional[str] = None,
29
+ library_version: Optional[str] = None,
30
+ user_agent: Optional[Union[Dict, str]] = None,
31
+ proxies: Optional[Dict] = None,
32
+ etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
33
+ force_download: bool = False,
34
+ token: Optional[Union[bool, str]] = None,
35
+ local_files_only: bool = False,
36
+ allow_patterns: Optional[Union[List[str], str]] = None,
37
+ ignore_patterns: Optional[Union[List[str], str]] = None,
38
+ max_workers: int = 8,
39
+ tqdm_class: Optional[base_tqdm] = None,
40
+ headers: Optional[Dict[str, str]] = None,
41
+ endpoint: Optional[str] = None,
42
+ # Deprecated args
43
+ local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
44
+ resume_download: Optional[bool] = None,
45
+ ) -> str:
46
+ """Download repo files.
47
+
48
+ Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from
49
+ a repo, because you don't know which ones you will need a priori. All files are nested inside a folder in order
50
+ to keep their actual filename relative to that folder. You can also filter which files to download using
51
+ `allow_patterns` and `ignore_patterns`.
52
+
53
+ If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this
54
+ option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir`
55
+ to store some metadata related to the downloaded files. While this mechanism is not as robust as the main
56
+ cache-system, it's optimized for regularly pulling the latest version of a repository.
57
+
58
+ An alternative would be to clone the repo but this requires git and git-lfs to be installed and properly
59
+ configured. It is also not possible to filter which files to download when cloning a repository using git.
60
+
61
+ Args:
62
+ repo_id (`str`):
63
+ A user or an organization name and a repo name separated by a `/`.
64
+ repo_type (`str`, *optional*):
65
+ Set to `"dataset"` or `"space"` if downloading from a dataset or space,
66
+ `None` or `"model"` if downloading from a model. Default is `None`.
67
+ revision (`str`, *optional*):
68
+ An optional Git revision id which can be a branch name, a tag, or a
69
+ commit hash.
70
+ cache_dir (`str`, `Path`, *optional*):
71
+ Path to the folder where cached files are stored.
72
+ local_dir (`str` or `Path`, *optional*):
73
+ If provided, the downloaded files will be placed under this directory.
74
+ library_name (`str`, *optional*):
75
+ The name of the library to which the object corresponds.
76
+ library_version (`str`, *optional*):
77
+ The version of the library.
78
+ user_agent (`str`, `dict`, *optional*):
79
+ The user-agent info in the form of a dictionary or a string.
80
+ proxies (`dict`, *optional*):
81
+ Dictionary mapping protocol to the URL of the proxy passed to
82
+ `requests.request`.
83
+ etag_timeout (`float`, *optional*, defaults to `10`):
84
+ When fetching ETag, how many seconds to wait for the server to send
85
+ data before giving up which is passed to `requests.request`.
86
+ force_download (`bool`, *optional*, defaults to `False`):
87
+ Whether the file should be downloaded even if it already exists in the local cache.
88
+ token (`str`, `bool`, *optional*):
89
+ A token to be used for the download.
90
+ - If `True`, the token is read from the HuggingFace config
91
+ folder.
92
+ - If a string, it's used as the authentication token.
93
+ headers (`dict`, *optional*):
94
+ Additional headers to include in the request. Those headers take precedence over the others.
95
+ local_files_only (`bool`, *optional*, defaults to `False`):
96
+ If `True`, avoid downloading the file and return the path to the
97
+ local cached file if it exists.
98
+ allow_patterns (`List[str]` or `str`, *optional*):
99
+ If provided, only files matching at least one pattern are downloaded.
100
+ ignore_patterns (`List[str]` or `str`, *optional*):
101
+ If provided, files matching any of the patterns are not downloaded.
102
+ max_workers (`int`, *optional*):
103
+ Number of concurrent threads to download files (1 thread = 1 file download).
104
+ Defaults to 8.
105
+ tqdm_class (`tqdm`, *optional*):
106
+ If provided, overwrites the default behavior for the progress bar. Passed
107
+ argument must inherit from `tqdm.auto.tqdm` or at least mimic its behavior.
108
+ Note that the `tqdm_class` is not passed to each individual download.
109
+ Defaults to the custom HF progress bar that can be disabled by setting
110
+ `HF_HUB_DISABLE_PROGRESS_BARS` environment variable.
111
+
112
+ Returns:
113
+ `str`: folder path of the repo snapshot.
114
+
115
+ Raises:
116
+ [`~utils.RepositoryNotFoundError`]
117
+ If the repository to download from cannot be found. This may be because it doesn't exist,
118
+ or because it is set to `private` and you do not have access.
119
+ [`~utils.RevisionNotFoundError`]
120
+ If the revision to download from cannot be found.
121
+ [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
122
+ If `token=True` and the token cannot be found.
123
+ [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
124
+ ETag cannot be determined.
125
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
126
+ if some parameter value is invalid.
127
+ """
128
+ if cache_dir is None:
129
+ cache_dir = constants.HF_HUB_CACHE
130
+ if revision is None:
131
+ revision = constants.DEFAULT_REVISION
132
+ if isinstance(cache_dir, Path):
133
+ cache_dir = str(cache_dir)
134
+
135
+ if repo_type is None:
136
+ repo_type = "model"
137
+ if repo_type not in constants.REPO_TYPES:
138
+ raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}")
139
+
140
+ storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type))
141
+
142
+ repo_info: Union[ModelInfo, DatasetInfo, SpaceInfo, None] = None
143
+ api_call_error: Optional[Exception] = None
144
+ if not local_files_only:
145
+ # try/except logic to handle different errors => taken from `hf_hub_download`
146
+ try:
147
+ # if we have internet connection we want to list files to download
148
+ api = HfApi(
149
+ library_name=library_name,
150
+ library_version=library_version,
151
+ user_agent=user_agent,
152
+ endpoint=endpoint,
153
+ headers=headers,
154
+ )
155
+ repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type, revision=revision, token=token)
156
+ except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
157
+ # Actually raise for those subclasses of ConnectionError
158
+ raise
159
+ except (
160
+ requests.exceptions.ConnectionError,
161
+ requests.exceptions.Timeout,
162
+ OfflineModeIsEnabled,
163
+ ) as error:
164
+ # Internet connection is down
165
+ # => will try to use local files only
166
+ api_call_error = error
167
+ pass
168
+ except RevisionNotFoundError:
169
+ # The repo was found but the revision doesn't exist on the Hub (never existed or got deleted)
170
+ raise
171
+ except requests.HTTPError as error:
172
+ # Multiple reasons for an http error:
173
+ # - Repository is private and invalid/missing token sent
174
+ # - Repository is gated and invalid/missing token sent
175
+ # - Hub is down (error 500 or 504)
176
+ # => let's switch to 'local_files_only=True' to check if the files are already cached.
177
+ # (if it's not the case, the error will be re-raised)
178
+ api_call_error = error
179
+ pass
180
+
181
+ # At this stage, if `repo_info` is None it means either:
182
+ # - internet connection is down
183
+ # - internet connection is deactivated (local_files_only=True or HF_HUB_OFFLINE=True)
184
+ # - repo is private/gated and invalid/missing token sent
185
+ # - Hub is down
186
+ # => let's look if we can find the appropriate folder in the cache:
187
+ # - if the specified revision is a commit hash, look inside "snapshots".
188
+ # - f the specified revision is a branch or tag, look inside "refs".
189
+ # => if local_dir is not None, we will return the path to the local folder if it exists.
190
+ if repo_info is None:
191
+ # Try to get which commit hash corresponds to the specified revision
192
+ commit_hash = None
193
+ if REGEX_COMMIT_HASH.match(revision):
194
+ commit_hash = revision
195
+ else:
196
+ ref_path = os.path.join(storage_folder, "refs", revision)
197
+ if os.path.exists(ref_path):
198
+ # retrieve commit_hash from refs file
199
+ with open(ref_path) as f:
200
+ commit_hash = f.read()
201
+
202
+ # Try to locate snapshot folder for this commit hash
203
+ if commit_hash is not None:
204
+ snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
205
+ if os.path.exists(snapshot_folder):
206
+ # Snapshot folder exists => let's return it
207
+ # (but we can't check if all the files are actually there)
208
+ return snapshot_folder
209
+ # If local_dir is not None, return it if it exists and is not empty
210
+ if local_dir is not None:
211
+ local_dir = Path(local_dir)
212
+ if local_dir.is_dir() and any(local_dir.iterdir()):
213
+ logger.warning(
214
+ f"Returning existing local_dir `{local_dir}` as remote repo cannot be accessed in `snapshot_download` ({api_call_error})."
215
+ )
216
+ return str(local_dir.resolve())
217
+ # If we couldn't find the appropriate folder on disk, raise an error.
218
+ if local_files_only:
219
+ raise LocalEntryNotFoundError(
220
+ "Cannot find an appropriate cached snapshot folder for the specified revision on the local disk and "
221
+ "outgoing traffic has been disabled. To enable repo look-ups and downloads online, pass "
222
+ "'local_files_only=False' as input."
223
+ )
224
+ elif isinstance(api_call_error, OfflineModeIsEnabled):
225
+ raise LocalEntryNotFoundError(
226
+ "Cannot find an appropriate cached snapshot folder for the specified revision on the local disk and "
227
+ "outgoing traffic has been disabled. To enable repo look-ups and downloads online, set "
228
+ "'HF_HUB_OFFLINE=0' as environment variable."
229
+ ) from api_call_error
230
+ elif isinstance(api_call_error, RepositoryNotFoundError) or isinstance(api_call_error, GatedRepoError):
231
+ # Repo not found => let's raise the actual error
232
+ raise api_call_error
233
+ else:
234
+ # Otherwise: most likely a connection issue or Hub downtime => let's warn the user
235
+ raise LocalEntryNotFoundError(
236
+ "An error happened while trying to locate the files on the Hub and we cannot find the appropriate"
237
+ " snapshot folder for the specified revision on the local disk. Please check your internet connection"
238
+ " and try again."
239
+ ) from api_call_error
240
+
241
+ # At this stage, internet connection is up and running
242
+ # => let's download the files!
243
+ assert repo_info.sha is not None, "Repo info returned from server must have a revision sha."
244
+ assert repo_info.siblings is not None, "Repo info returned from server must have a siblings list."
245
+ filtered_repo_files = list(
246
+ filter_repo_objects(
247
+ items=[f.rfilename for f in repo_info.siblings],
248
+ allow_patterns=allow_patterns,
249
+ ignore_patterns=ignore_patterns,
250
+ )
251
+ )
252
+ commit_hash = repo_info.sha
253
+ snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
254
+ # if passed revision is not identical to commit_hash
255
+ # then revision has to be a branch name or tag name.
256
+ # In that case store a ref.
257
+ if revision != commit_hash:
258
+ ref_path = os.path.join(storage_folder, "refs", revision)
259
+ os.makedirs(os.path.dirname(ref_path), exist_ok=True)
260
+ with open(ref_path, "w") as f:
261
+ f.write(commit_hash)
262
+
263
+ # we pass the commit_hash to hf_hub_download
264
+ # so no network call happens if we already
265
+ # have the file locally.
266
+ def _inner_hf_hub_download(repo_file: str):
267
+ return hf_hub_download(
268
+ repo_id,
269
+ filename=repo_file,
270
+ repo_type=repo_type,
271
+ revision=commit_hash,
272
+ endpoint=endpoint,
273
+ cache_dir=cache_dir,
274
+ local_dir=local_dir,
275
+ local_dir_use_symlinks=local_dir_use_symlinks,
276
+ library_name=library_name,
277
+ library_version=library_version,
278
+ user_agent=user_agent,
279
+ proxies=proxies,
280
+ etag_timeout=etag_timeout,
281
+ resume_download=resume_download,
282
+ force_download=force_download,
283
+ token=token,
284
+ headers=headers,
285
+ )
286
+
287
+ if constants.HF_HUB_ENABLE_HF_TRANSFER:
288
+ # when using hf_transfer we don't want extra parallelism
289
+ # from the one hf_transfer provides
290
+ for file in filtered_repo_files:
291
+ _inner_hf_hub_download(file)
292
+ else:
293
+ thread_map(
294
+ _inner_hf_hub_download,
295
+ filtered_repo_files,
296
+ desc=f"Fetching {len(filtered_repo_files)} files",
297
+ max_workers=max_workers,
298
+ # User can use its own tqdm class or the default one from `huggingface_hub.utils`
299
+ tqdm_class=tqdm_class or hf_tqdm,
300
+ )
301
+
302
+ if local_dir is not None:
303
+ return str(os.path.realpath(local_dir))
304
+ return snapshot_folder
huggingface_hub/_space_api.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2019-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ from dataclasses import dataclass
16
+ from datetime import datetime
17
+ from enum import Enum
18
+ from typing import Dict, Optional
19
+
20
+ from huggingface_hub.utils import parse_datetime
21
+
22
+
23
+ class SpaceStage(str, Enum):
24
+ """
25
+ Enumeration of possible stage of a Space on the Hub.
26
+
27
+ Value can be compared to a string:
28
+ ```py
29
+ assert SpaceStage.BUILDING == "BUILDING"
30
+ ```
31
+
32
+ Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceInfo.ts#L61 (private url).
33
+ """
34
+
35
+ # Copied from moon-landing > server > repo_types > SpaceInfo.ts (private repo)
36
+ NO_APP_FILE = "NO_APP_FILE"
37
+ CONFIG_ERROR = "CONFIG_ERROR"
38
+ BUILDING = "BUILDING"
39
+ BUILD_ERROR = "BUILD_ERROR"
40
+ RUNNING = "RUNNING"
41
+ RUNNING_BUILDING = "RUNNING_BUILDING"
42
+ RUNTIME_ERROR = "RUNTIME_ERROR"
43
+ DELETING = "DELETING"
44
+ STOPPED = "STOPPED"
45
+ PAUSED = "PAUSED"
46
+
47
+
48
+ class SpaceHardware(str, Enum):
49
+ """
50
+ Enumeration of hardwares available to run your Space on the Hub.
51
+
52
+ Value can be compared to a string:
53
+ ```py
54
+ assert SpaceHardware.CPU_BASIC == "cpu-basic"
55
+ ```
56
+
57
+ Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceInfo.ts#L73 (private url).
58
+ """
59
+
60
+ CPU_BASIC = "cpu-basic"
61
+ CPU_UPGRADE = "cpu-upgrade"
62
+ T4_SMALL = "t4-small"
63
+ T4_MEDIUM = "t4-medium"
64
+ L4X1 = "l4x1"
65
+ L4X4 = "l4x4"
66
+ ZERO_A10G = "zero-a10g"
67
+ A10G_SMALL = "a10g-small"
68
+ A10G_LARGE = "a10g-large"
69
+ A10G_LARGEX2 = "a10g-largex2"
70
+ A10G_LARGEX4 = "a10g-largex4"
71
+ A100_LARGE = "a100-large"
72
+ V5E_1X1 = "v5e-1x1"
73
+ V5E_2X2 = "v5e-2x2"
74
+ V5E_2X4 = "v5e-2x4"
75
+
76
+
77
+ class SpaceStorage(str, Enum):
78
+ """
79
+ Enumeration of persistent storage available for your Space on the Hub.
80
+
81
+ Value can be compared to a string:
82
+ ```py
83
+ assert SpaceStorage.SMALL == "small"
84
+ ```
85
+
86
+ Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceHardwareFlavor.ts#L24 (private url).
87
+ """
88
+
89
+ SMALL = "small"
90
+ MEDIUM = "medium"
91
+ LARGE = "large"
92
+
93
+
94
+ @dataclass
95
+ class SpaceRuntime:
96
+ """
97
+ Contains information about the current runtime of a Space.
98
+
99
+ Args:
100
+ stage (`str`):
101
+ Current stage of the space. Example: RUNNING.
102
+ hardware (`str` or `None`):
103
+ Current hardware of the space. Example: "cpu-basic". Can be `None` if Space
104
+ is `BUILDING` for the first time.
105
+ requested_hardware (`str` or `None`):
106
+ Requested hardware. Can be different than `hardware` especially if the request
107
+ has just been made. Example: "t4-medium". Can be `None` if no hardware has
108
+ been requested yet.
109
+ sleep_time (`int` or `None`):
110
+ Number of seconds the Space will be kept alive after the last request. By default (if value is `None`), the
111
+ Space will never go to sleep if it's running on an upgraded hardware, while it will go to sleep after 48
112
+ hours on a free 'cpu-basic' hardware. For more details, see https://huggingface.co/docs/hub/spaces-gpus#sleep-time.
113
+ raw (`dict`):
114
+ Raw response from the server. Contains more information about the Space
115
+ runtime like number of replicas, number of cpu, memory size,...
116
+ """
117
+
118
+ stage: SpaceStage
119
+ hardware: Optional[SpaceHardware]
120
+ requested_hardware: Optional[SpaceHardware]
121
+ sleep_time: Optional[int]
122
+ storage: Optional[SpaceStorage]
123
+ raw: Dict
124
+
125
+ def __init__(self, data: Dict) -> None:
126
+ self.stage = data["stage"]
127
+ self.hardware = data.get("hardware", {}).get("current")
128
+ self.requested_hardware = data.get("hardware", {}).get("requested")
129
+ self.sleep_time = data.get("gcTimeout")
130
+ self.storage = data.get("storage")
131
+ self.raw = data
132
+
133
+
134
+ @dataclass
135
+ class SpaceVariable:
136
+ """
137
+ Contains information about the current variables of a Space.
138
+
139
+ Args:
140
+ key (`str`):
141
+ Variable key. Example: `"MODEL_REPO_ID"`
142
+ value (`str`):
143
+ Variable value. Example: `"the_model_repo_id"`.
144
+ description (`str` or None):
145
+ Description of the variable. Example: `"Model Repo ID of the implemented model"`.
146
+ updatedAt (`datetime` or None):
147
+ datetime of the last update of the variable (if the variable has been updated at least once).
148
+ """
149
+
150
+ key: str
151
+ value: str
152
+ description: Optional[str]
153
+ updated_at: Optional[datetime]
154
+
155
+ def __init__(self, key: str, values: Dict) -> None:
156
+ self.key = key
157
+ self.value = values["value"]
158
+ self.description = values.get("description")
159
+ updated_at = values.get("updatedAt")
160
+ self.updated_at = parse_datetime(updated_at) if updated_at is not None else None
huggingface_hub/_tensorboard_logger.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2023 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains a logger to push training logs to the Hub, using Tensorboard."""
15
+
16
+ from pathlib import Path
17
+ from typing import TYPE_CHECKING, List, Optional, Union
18
+
19
+ from ._commit_scheduler import CommitScheduler
20
+ from .errors import EntryNotFoundError
21
+ from .repocard import ModelCard
22
+ from .utils import experimental
23
+
24
+
25
+ # Depending on user's setup, SummaryWriter can come either from 'tensorboardX'
26
+ # or from 'torch.utils.tensorboard'. Both are compatible so let's try to load
27
+ # from either of them.
28
+ try:
29
+ from tensorboardX import SummaryWriter
30
+
31
+ is_summary_writer_available = True
32
+
33
+ except ImportError:
34
+ try:
35
+ from torch.utils.tensorboard import SummaryWriter
36
+
37
+ is_summary_writer_available = False
38
+ except ImportError:
39
+ # Dummy class to avoid failing at import. Will raise on instance creation.
40
+ SummaryWriter = object
41
+ is_summary_writer_available = False
42
+
43
+ if TYPE_CHECKING:
44
+ from tensorboardX import SummaryWriter
45
+
46
+
47
+ class HFSummaryWriter(SummaryWriter):
48
+ """
49
+ Wrapper around the tensorboard's `SummaryWriter` to push training logs to the Hub.
50
+
51
+ Data is logged locally and then pushed to the Hub asynchronously. Pushing data to the Hub is done in a separate
52
+ thread to avoid blocking the training script. In particular, if the upload fails for any reason (e.g. a connection
53
+ issue), the main script will not be interrupted. Data is automatically pushed to the Hub every `commit_every`
54
+ minutes (default to every 5 minutes).
55
+
56
+ <Tip warning={true}>
57
+
58
+ `HFSummaryWriter` is experimental. Its API is subject to change in the future without prior notice.
59
+
60
+ </Tip>
61
+
62
+ Args:
63
+ repo_id (`str`):
64
+ The id of the repo to which the logs will be pushed.
65
+ logdir (`str`, *optional*):
66
+ The directory where the logs will be written. If not specified, a local directory will be created by the
67
+ underlying `SummaryWriter` object.
68
+ commit_every (`int` or `float`, *optional*):
69
+ The frequency (in minutes) at which the logs will be pushed to the Hub. Defaults to 5 minutes.
70
+ squash_history (`bool`, *optional*):
71
+ Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
72
+ useful to avoid degraded performances on the repo when it grows too large.
73
+ repo_type (`str`, *optional*):
74
+ The type of the repo to which the logs will be pushed. Defaults to "model".
75
+ repo_revision (`str`, *optional*):
76
+ The revision of the repo to which the logs will be pushed. Defaults to "main".
77
+ repo_private (`bool`, *optional*):
78
+ Whether to create a private repo or not. Defaults to False. This argument is ignored if the repo already
79
+ exists.
80
+ path_in_repo (`str`, *optional*):
81
+ The path to the folder in the repo where the logs will be pushed. Defaults to "tensorboard/".
82
+ repo_allow_patterns (`List[str]` or `str`, *optional*):
83
+ A list of patterns to include in the upload. Defaults to `"*.tfevents.*"`. Check out the
84
+ [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details.
85
+ repo_ignore_patterns (`List[str]` or `str`, *optional*):
86
+ A list of patterns to exclude in the upload. Check out the
87
+ [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details.
88
+ token (`str`, *optional*):
89
+ Authentication token. Will default to the stored token. See https://huggingface.co/settings/token for more
90
+ details
91
+ kwargs:
92
+ Additional keyword arguments passed to `SummaryWriter`.
93
+
94
+ Examples:
95
+ ```diff
96
+ # Taken from https://pytorch.org/docs/stable/tensorboard.html
97
+ - from torch.utils.tensorboard import SummaryWriter
98
+ + from huggingface_hub import HFSummaryWriter
99
+
100
+ import numpy as np
101
+
102
+ - writer = SummaryWriter()
103
+ + writer = HFSummaryWriter(repo_id="username/my-trained-model")
104
+
105
+ for n_iter in range(100):
106
+ writer.add_scalar('Loss/train', np.random.random(), n_iter)
107
+ writer.add_scalar('Loss/test', np.random.random(), n_iter)
108
+ writer.add_scalar('Accuracy/train', np.random.random(), n_iter)
109
+ writer.add_scalar('Accuracy/test', np.random.random(), n_iter)
110
+ ```
111
+
112
+ ```py
113
+ >>> from huggingface_hub import HFSummaryWriter
114
+
115
+ # Logs are automatically pushed every 15 minutes (5 by default) + when exiting the context manager
116
+ >>> with HFSummaryWriter(repo_id="test_hf_logger", commit_every=15) as logger:
117
+ ... logger.add_scalar("a", 1)
118
+ ... logger.add_scalar("b", 2)
119
+ ```
120
+ """
121
+
122
+ @experimental
123
+ def __new__(cls, *args, **kwargs) -> "HFSummaryWriter":
124
+ if not is_summary_writer_available:
125
+ raise ImportError(
126
+ "You must have `tensorboard` installed to use `HFSummaryWriter`. Please run `pip install --upgrade"
127
+ " tensorboardX` first."
128
+ )
129
+ return super().__new__(cls)
130
+
131
+ def __init__(
132
+ self,
133
+ repo_id: str,
134
+ *,
135
+ logdir: Optional[str] = None,
136
+ commit_every: Union[int, float] = 5,
137
+ squash_history: bool = False,
138
+ repo_type: Optional[str] = None,
139
+ repo_revision: Optional[str] = None,
140
+ repo_private: bool = False,
141
+ path_in_repo: Optional[str] = "tensorboard",
142
+ repo_allow_patterns: Optional[Union[List[str], str]] = "*.tfevents.*",
143
+ repo_ignore_patterns: Optional[Union[List[str], str]] = None,
144
+ token: Optional[str] = None,
145
+ **kwargs,
146
+ ):
147
+ # Initialize SummaryWriter
148
+ super().__init__(logdir=logdir, **kwargs)
149
+
150
+ # Check logdir has been correctly initialized and fail early otherwise. In practice, SummaryWriter takes care of it.
151
+ if not isinstance(self.logdir, str):
152
+ raise ValueError(f"`self.logdir` must be a string. Got '{self.logdir}' of type {type(self.logdir)}.")
153
+
154
+ # Append logdir name to `path_in_repo`
155
+ if path_in_repo is None or path_in_repo == "":
156
+ path_in_repo = Path(self.logdir).name
157
+ else:
158
+ path_in_repo = path_in_repo.strip("/") + "/" + Path(self.logdir).name
159
+
160
+ # Initialize scheduler
161
+ self.scheduler = CommitScheduler(
162
+ folder_path=self.logdir,
163
+ path_in_repo=path_in_repo,
164
+ repo_id=repo_id,
165
+ repo_type=repo_type,
166
+ revision=repo_revision,
167
+ private=repo_private,
168
+ token=token,
169
+ allow_patterns=repo_allow_patterns,
170
+ ignore_patterns=repo_ignore_patterns,
171
+ every=commit_every,
172
+ squash_history=squash_history,
173
+ )
174
+
175
+ # Exposing some high-level info at root level
176
+ self.repo_id = self.scheduler.repo_id
177
+ self.repo_type = self.scheduler.repo_type
178
+ self.repo_revision = self.scheduler.revision
179
+
180
+ # Add `hf-summary-writer` tag to the model card metadata
181
+ try:
182
+ card = ModelCard.load(repo_id_or_path=self.repo_id, repo_type=self.repo_type)
183
+ except EntryNotFoundError:
184
+ card = ModelCard("")
185
+ tags = card.data.get("tags", [])
186
+ if "hf-summary-writer" not in tags:
187
+ tags.append("hf-summary-writer")
188
+ card.data["tags"] = tags
189
+ card.push_to_hub(repo_id=self.repo_id, repo_type=self.repo_type)
190
+
191
+ def __exit__(self, exc_type, exc_val, exc_tb):
192
+ """Push to hub in a non-blocking way when exiting the logger's context manager."""
193
+ super().__exit__(exc_type, exc_val, exc_tb)
194
+ future = self.scheduler.trigger()
195
+ future.result()
huggingface_hub/_upload_large_folder.py ADDED
@@ -0,0 +1,621 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2024-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ import enum
16
+ import logging
17
+ import os
18
+ import queue
19
+ import shutil
20
+ import sys
21
+ import threading
22
+ import time
23
+ import traceback
24
+ from datetime import datetime
25
+ from pathlib import Path
26
+ from threading import Lock
27
+ from typing import TYPE_CHECKING, List, Optional, Tuple, Union
28
+
29
+ from . import constants
30
+ from ._commit_api import CommitOperationAdd, UploadInfo, _fetch_upload_modes
31
+ from ._local_folder import LocalUploadFileMetadata, LocalUploadFilePaths, get_local_upload_paths, read_upload_metadata
32
+ from .constants import DEFAULT_REVISION, REPO_TYPES
33
+ from .utils import DEFAULT_IGNORE_PATTERNS, filter_repo_objects, tqdm
34
+ from .utils._cache_manager import _format_size
35
+ from .utils.sha import sha_fileobj
36
+
37
+
38
+ if TYPE_CHECKING:
39
+ pass
40
+
41
+ logger = logging.getLogger(__name__)
42
+
43
+ WAITING_TIME_IF_NO_TASKS = 10 # seconds
44
+ MAX_NB_REGULAR_FILES_PER_COMMIT = 75
45
+ MAX_NB_LFS_FILES_PER_COMMIT = 150
46
+
47
+
48
+ def upload_large_folder_internal(
49
+ api: "HfApi",
50
+ repo_id: str,
51
+ folder_path: Union[str, Path],
52
+ *,
53
+ repo_type: str, # Repo type is required!
54
+ revision: Optional[str] = None,
55
+ private: bool = False,
56
+ allow_patterns: Optional[Union[List[str], str]] = None,
57
+ ignore_patterns: Optional[Union[List[str], str]] = None,
58
+ num_workers: Optional[int] = None,
59
+ print_report: bool = True,
60
+ print_report_every: int = 60,
61
+ ):
62
+ """Upload a large folder to the Hub in the most resilient way possible.
63
+
64
+ See [`HfApi.upload_large_folder`] for the full documentation.
65
+ """
66
+ # 1. Check args and setup
67
+ if repo_type is None:
68
+ raise ValueError(
69
+ "For large uploads, `repo_type` is explicitly required. Please set it to `model`, `dataset` or `space`."
70
+ " If you are using the CLI, pass it as `--repo-type=model`."
71
+ )
72
+ if repo_type not in REPO_TYPES:
73
+ raise ValueError(f"Invalid repo type, must be one of {REPO_TYPES}")
74
+ if revision is None:
75
+ revision = DEFAULT_REVISION
76
+
77
+ folder_path = Path(folder_path).expanduser().resolve()
78
+ if not folder_path.is_dir():
79
+ raise ValueError(f"Provided path: '{folder_path}' is not a directory")
80
+
81
+ if ignore_patterns is None:
82
+ ignore_patterns = []
83
+ elif isinstance(ignore_patterns, str):
84
+ ignore_patterns = [ignore_patterns]
85
+ ignore_patterns += DEFAULT_IGNORE_PATTERNS
86
+
87
+ if num_workers is None:
88
+ nb_cores = os.cpu_count() or 1
89
+ num_workers = max(nb_cores - 2, 2) # Use all but 2 cores, or at least 2 cores
90
+
91
+ # 2. Create repo if missing
92
+ repo_url = api.create_repo(repo_id=repo_id, repo_type=repo_type, private=private, exist_ok=True)
93
+ logger.info(f"Repo created: {repo_url}")
94
+ repo_id = repo_url.repo_id
95
+
96
+ # 3. List files to upload
97
+ filtered_paths_list = filter_repo_objects(
98
+ (path.relative_to(folder_path).as_posix() for path in folder_path.glob("**/*") if path.is_file()),
99
+ allow_patterns=allow_patterns,
100
+ ignore_patterns=ignore_patterns,
101
+ )
102
+ paths_list = [get_local_upload_paths(folder_path, relpath) for relpath in filtered_paths_list]
103
+ logger.info(f"Found {len(paths_list)} candidate files to upload")
104
+
105
+ # Read metadata for each file
106
+ items = [
107
+ (paths, read_upload_metadata(folder_path, paths.path_in_repo))
108
+ for paths in tqdm(paths_list, desc="Recovering from metadata files")
109
+ ]
110
+
111
+ # 4. Start workers
112
+ status = LargeUploadStatus(items)
113
+ threads = [
114
+ threading.Thread(
115
+ target=_worker_job,
116
+ kwargs={
117
+ "status": status,
118
+ "api": api,
119
+ "repo_id": repo_id,
120
+ "repo_type": repo_type,
121
+ "revision": revision,
122
+ },
123
+ )
124
+ for _ in range(num_workers)
125
+ ]
126
+
127
+ for thread in threads:
128
+ thread.start()
129
+
130
+ # 5. Print regular reports
131
+ if print_report:
132
+ print("\n\n" + status.current_report())
133
+ last_report_ts = time.time()
134
+ while True:
135
+ time.sleep(1)
136
+ if time.time() - last_report_ts >= print_report_every:
137
+ if print_report:
138
+ _print_overwrite(status.current_report())
139
+ last_report_ts = time.time()
140
+ if status.is_done():
141
+ logging.info("Is done: exiting main loop")
142
+ break
143
+
144
+ for thread in threads:
145
+ thread.join()
146
+
147
+ logger.info(status.current_report())
148
+ logging.info("Upload is complete!")
149
+
150
+
151
+ ####################
152
+ # Logic to manage workers and synchronize tasks
153
+ ####################
154
+
155
+
156
+ class WorkerJob(enum.Enum):
157
+ SHA256 = enum.auto()
158
+ GET_UPLOAD_MODE = enum.auto()
159
+ PREUPLOAD_LFS = enum.auto()
160
+ COMMIT = enum.auto()
161
+ WAIT = enum.auto() # if no tasks are available but we don't want to exit
162
+
163
+
164
+ JOB_ITEM_T = Tuple[LocalUploadFilePaths, LocalUploadFileMetadata]
165
+
166
+
167
+ class LargeUploadStatus:
168
+ """Contains information, queues and tasks for a large upload process."""
169
+
170
+ def __init__(self, items: List[JOB_ITEM_T]):
171
+ self.items = items
172
+ self.queue_sha256: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
173
+ self.queue_get_upload_mode: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
174
+ self.queue_preupload_lfs: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
175
+ self.queue_commit: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
176
+ self.lock = Lock()
177
+
178
+ self.nb_workers_sha256: int = 0
179
+ self.nb_workers_get_upload_mode: int = 0
180
+ self.nb_workers_preupload_lfs: int = 0
181
+ self.nb_workers_commit: int = 0
182
+ self.nb_workers_waiting: int = 0
183
+ self.last_commit_attempt: Optional[float] = None
184
+
185
+ self._started_at = datetime.now()
186
+
187
+ # Setup queues
188
+ for item in self.items:
189
+ paths, metadata = item
190
+ if metadata.sha256 is None:
191
+ self.queue_sha256.put(item)
192
+ elif metadata.upload_mode is None:
193
+ self.queue_get_upload_mode.put(item)
194
+ elif metadata.upload_mode == "lfs" and not metadata.is_uploaded:
195
+ self.queue_preupload_lfs.put(item)
196
+ elif not metadata.is_committed:
197
+ self.queue_commit.put(item)
198
+ else:
199
+ logger.debug(f"Skipping file {paths.path_in_repo} (already uploaded and committed)")
200
+
201
+ def current_report(self) -> str:
202
+ """Generate a report of the current status of the large upload."""
203
+ nb_hashed = 0
204
+ size_hashed = 0
205
+ nb_preuploaded = 0
206
+ nb_lfs = 0
207
+ nb_lfs_unsure = 0
208
+ size_preuploaded = 0
209
+ nb_committed = 0
210
+ size_committed = 0
211
+ total_size = 0
212
+ ignored_files = 0
213
+ total_files = 0
214
+
215
+ with self.lock:
216
+ for _, metadata in self.items:
217
+ if metadata.should_ignore:
218
+ ignored_files += 1
219
+ continue
220
+ total_size += metadata.size
221
+ total_files += 1
222
+ if metadata.sha256 is not None:
223
+ nb_hashed += 1
224
+ size_hashed += metadata.size
225
+ if metadata.upload_mode == "lfs":
226
+ nb_lfs += 1
227
+ if metadata.upload_mode is None:
228
+ nb_lfs_unsure += 1
229
+ if metadata.is_uploaded:
230
+ nb_preuploaded += 1
231
+ size_preuploaded += metadata.size
232
+ if metadata.is_committed:
233
+ nb_committed += 1
234
+ size_committed += metadata.size
235
+ total_size_str = _format_size(total_size)
236
+
237
+ now = datetime.now()
238
+ now_str = now.strftime("%Y-%m-%d %H:%M:%S")
239
+ elapsed = now - self._started_at
240
+ elapsed_str = str(elapsed).split(".")[0] # remove milliseconds
241
+
242
+ message = "\n" + "-" * 10
243
+ message += f" {now_str} ({elapsed_str}) "
244
+ message += "-" * 10 + "\n"
245
+
246
+ message += "Files: "
247
+ message += f"hashed {nb_hashed}/{total_files} ({_format_size(size_hashed)}/{total_size_str}) | "
248
+ message += f"pre-uploaded: {nb_preuploaded}/{nb_lfs} ({_format_size(size_preuploaded)}/{total_size_str})"
249
+ if nb_lfs_unsure > 0:
250
+ message += f" (+{nb_lfs_unsure} unsure)"
251
+ message += f" | committed: {nb_committed}/{total_files} ({_format_size(size_committed)}/{total_size_str})"
252
+ message += f" | ignored: {ignored_files}\n"
253
+
254
+ message += "Workers: "
255
+ message += f"hashing: {self.nb_workers_sha256} | "
256
+ message += f"get upload mode: {self.nb_workers_get_upload_mode} | "
257
+ message += f"pre-uploading: {self.nb_workers_preupload_lfs} | "
258
+ message += f"committing: {self.nb_workers_commit} | "
259
+ message += f"waiting: {self.nb_workers_waiting}\n"
260
+ message += "-" * 51
261
+
262
+ return message
263
+
264
+ def is_done(self) -> bool:
265
+ with self.lock:
266
+ return all(metadata.is_committed or metadata.should_ignore for _, metadata in self.items)
267
+
268
+
269
+ def _worker_job(
270
+ status: LargeUploadStatus,
271
+ api: "HfApi",
272
+ repo_id: str,
273
+ repo_type: str,
274
+ revision: str,
275
+ ):
276
+ """
277
+ Main process for a worker. The worker will perform tasks based on the priority list until all files are uploaded
278
+ and committed. If no tasks are available, the worker will wait for 10 seconds before checking again.
279
+
280
+ If a task fails for any reason, the item(s) are put back in the queue for another worker to pick up.
281
+
282
+ Read `upload_large_folder` docstring for more information on how tasks are prioritized.
283
+ """
284
+ while True:
285
+ next_job: Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]] = None
286
+
287
+ # Determine next task
288
+ next_job = _determine_next_job(status)
289
+ if next_job is None:
290
+ return
291
+ job, items = next_job
292
+
293
+ # Perform task
294
+ if job == WorkerJob.SHA256:
295
+ item = items[0] # single item
296
+ try:
297
+ _compute_sha256(item)
298
+ status.queue_get_upload_mode.put(item)
299
+ except KeyboardInterrupt:
300
+ raise
301
+ except Exception as e:
302
+ logger.error(f"Failed to compute sha256: {e}")
303
+ traceback.format_exc()
304
+ status.queue_sha256.put(item)
305
+
306
+ with status.lock:
307
+ status.nb_workers_sha256 -= 1
308
+
309
+ elif job == WorkerJob.GET_UPLOAD_MODE:
310
+ try:
311
+ _get_upload_mode(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
312
+ except KeyboardInterrupt:
313
+ raise
314
+ except Exception as e:
315
+ logger.error(f"Failed to get upload mode: {e}")
316
+ traceback.format_exc()
317
+
318
+ # Items are either:
319
+ # - dropped (if should_ignore)
320
+ # - put in LFS queue (if LFS)
321
+ # - put in commit queue (if regular)
322
+ # - or put back (if error occurred).
323
+ for item in items:
324
+ _, metadata = item
325
+ if metadata.should_ignore:
326
+ continue
327
+ if metadata.upload_mode == "lfs":
328
+ status.queue_preupload_lfs.put(item)
329
+ elif metadata.upload_mode == "regular":
330
+ status.queue_commit.put(item)
331
+ else:
332
+ status.queue_get_upload_mode.put(item)
333
+
334
+ with status.lock:
335
+ status.nb_workers_get_upload_mode -= 1
336
+
337
+ elif job == WorkerJob.PREUPLOAD_LFS:
338
+ item = items[0] # single item
339
+ try:
340
+ _preupload_lfs(item, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
341
+ status.queue_commit.put(item)
342
+ except KeyboardInterrupt:
343
+ raise
344
+ except Exception as e:
345
+ logger.error(f"Failed to preupload LFS: {e}")
346
+ traceback.format_exc()
347
+ status.queue_preupload_lfs.put(item)
348
+
349
+ with status.lock:
350
+ status.nb_workers_preupload_lfs -= 1
351
+
352
+ elif job == WorkerJob.COMMIT:
353
+ try:
354
+ _commit(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
355
+ except KeyboardInterrupt:
356
+ raise
357
+ except Exception as e:
358
+ logger.error(f"Failed to commit: {e}")
359
+ traceback.format_exc()
360
+ for item in items:
361
+ status.queue_commit.put(item)
362
+ with status.lock:
363
+ status.last_commit_attempt = time.time()
364
+ status.nb_workers_commit -= 1
365
+
366
+ elif job == WorkerJob.WAIT:
367
+ time.sleep(WAITING_TIME_IF_NO_TASKS)
368
+ with status.lock:
369
+ status.nb_workers_waiting -= 1
370
+
371
+
372
+ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]]:
373
+ with status.lock:
374
+ # 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file)
375
+ if (
376
+ status.nb_workers_commit == 0
377
+ and status.queue_commit.qsize() > 0
378
+ and status.last_commit_attempt is not None
379
+ and time.time() - status.last_commit_attempt > 5 * 60
380
+ ):
381
+ status.nb_workers_commit += 1
382
+ logger.debug("Job: commit (more than 5 minutes since last commit attempt)")
383
+ return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
384
+
385
+ # 2. Commit if at least 100 files are ready to commit
386
+ elif status.nb_workers_commit == 0 and status.queue_commit.qsize() >= 150:
387
+ status.nb_workers_commit += 1
388
+ logger.debug("Job: commit (>100 files ready)")
389
+ return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
390
+
391
+ # 3. Get upload mode if at least 10 files
392
+ elif status.queue_get_upload_mode.qsize() >= 10:
393
+ status.nb_workers_get_upload_mode += 1
394
+ logger.debug("Job: get upload mode (>10 files ready)")
395
+ return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
396
+
397
+ # 4. Preupload LFS file if at least 1 file and no worker is preuploading LFS
398
+ elif status.queue_preupload_lfs.qsize() > 0 and status.nb_workers_preupload_lfs == 0:
399
+ status.nb_workers_preupload_lfs += 1
400
+ logger.debug("Job: preupload LFS (no other worker preuploading LFS)")
401
+ return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
402
+
403
+ # 5. Compute sha256 if at least 1 file and no worker is computing sha256
404
+ elif status.queue_sha256.qsize() > 0 and status.nb_workers_sha256 == 0:
405
+ status.nb_workers_sha256 += 1
406
+ logger.debug("Job: sha256 (no other worker computing sha256)")
407
+ return (WorkerJob.SHA256, _get_one(status.queue_sha256))
408
+
409
+ # 6. Get upload mode if at least 1 file and no worker is getting upload mode
410
+ elif status.queue_get_upload_mode.qsize() > 0 and status.nb_workers_get_upload_mode == 0:
411
+ status.nb_workers_get_upload_mode += 1
412
+ logger.debug("Job: get upload mode (no other worker getting upload mode)")
413
+ return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
414
+
415
+ # 7. Preupload LFS file if at least 1 file
416
+ # Skip if hf_transfer is enabled and there is already a worker preuploading LFS
417
+ elif status.queue_preupload_lfs.qsize() > 0 and (
418
+ status.nb_workers_preupload_lfs == 0 or not constants.HF_HUB_ENABLE_HF_TRANSFER
419
+ ):
420
+ status.nb_workers_preupload_lfs += 1
421
+ logger.debug("Job: preupload LFS")
422
+ return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
423
+
424
+ # 8. Compute sha256 if at least 1 file
425
+ elif status.queue_sha256.qsize() > 0:
426
+ status.nb_workers_sha256 += 1
427
+ logger.debug("Job: sha256")
428
+ return (WorkerJob.SHA256, _get_one(status.queue_sha256))
429
+
430
+ # 9. Get upload mode if at least 1 file
431
+ elif status.queue_get_upload_mode.qsize() > 0:
432
+ status.nb_workers_get_upload_mode += 1
433
+ logger.debug("Job: get upload mode")
434
+ return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
435
+
436
+ # 10. Commit if at least 1 file and 1 min since last commit attempt
437
+ elif (
438
+ status.nb_workers_commit == 0
439
+ and status.queue_commit.qsize() > 0
440
+ and status.last_commit_attempt is not None
441
+ and time.time() - status.last_commit_attempt > 1 * 60
442
+ ):
443
+ status.nb_workers_commit += 1
444
+ logger.debug("Job: commit (1 min since last commit attempt)")
445
+ return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
446
+
447
+ # 11. Commit if at least 1 file all other queues are empty and all workers are waiting
448
+ # e.g. when it's the last commit
449
+ elif (
450
+ status.nb_workers_commit == 0
451
+ and status.queue_commit.qsize() > 0
452
+ and status.queue_sha256.qsize() == 0
453
+ and status.queue_get_upload_mode.qsize() == 0
454
+ and status.queue_preupload_lfs.qsize() == 0
455
+ and status.nb_workers_sha256 == 0
456
+ and status.nb_workers_get_upload_mode == 0
457
+ and status.nb_workers_preupload_lfs == 0
458
+ ):
459
+ status.nb_workers_commit += 1
460
+ logger.debug("Job: commit")
461
+ return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
462
+
463
+ # 12. If all queues are empty, exit
464
+ elif all(metadata.is_committed or metadata.should_ignore for _, metadata in status.items):
465
+ logger.info("All files have been processed! Exiting worker.")
466
+ return None
467
+
468
+ # 13. If no task is available, wait
469
+ else:
470
+ status.nb_workers_waiting += 1
471
+ logger.debug(f"No task available, waiting... ({WAITING_TIME_IF_NO_TASKS}s)")
472
+ return (WorkerJob.WAIT, [])
473
+
474
+
475
+ ####################
476
+ # Atomic jobs (sha256, get_upload_mode, preupload_lfs, commit)
477
+ ####################
478
+
479
+
480
+ def _compute_sha256(item: JOB_ITEM_T) -> None:
481
+ """Compute sha256 of a file and save it in metadata."""
482
+ paths, metadata = item
483
+ if metadata.sha256 is None:
484
+ with paths.file_path.open("rb") as f:
485
+ metadata.sha256 = sha_fileobj(f).hex()
486
+ metadata.save(paths)
487
+
488
+
489
+ def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
490
+ """Get upload mode for each file and update metadata.
491
+
492
+ Also receive info if the file should be ignored.
493
+ """
494
+ additions = [_build_hacky_operation(item) for item in items]
495
+ _fetch_upload_modes(
496
+ additions=additions,
497
+ repo_type=repo_type,
498
+ repo_id=repo_id,
499
+ headers=api._build_hf_headers(),
500
+ revision=revision,
501
+ )
502
+ for item, addition in zip(items, additions):
503
+ paths, metadata = item
504
+ metadata.upload_mode = addition._upload_mode
505
+ metadata.should_ignore = addition._should_ignore
506
+ metadata.save(paths)
507
+
508
+
509
+ def _preupload_lfs(item: JOB_ITEM_T, api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
510
+ """Preupload LFS file and update metadata."""
511
+ paths, metadata = item
512
+ addition = _build_hacky_operation(item)
513
+ api.preupload_lfs_files(
514
+ repo_id=repo_id,
515
+ repo_type=repo_type,
516
+ revision=revision,
517
+ additions=[addition],
518
+ )
519
+
520
+ metadata.is_uploaded = True
521
+ metadata.save(paths)
522
+
523
+
524
+ def _commit(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
525
+ """Commit files to the repo."""
526
+ additions = [_build_hacky_operation(item) for item in items]
527
+ api.create_commit(
528
+ repo_id=repo_id,
529
+ repo_type=repo_type,
530
+ revision=revision,
531
+ operations=additions,
532
+ commit_message="Add files using upload-large-folder tool",
533
+ )
534
+ for paths, metadata in items:
535
+ metadata.is_committed = True
536
+ metadata.save(paths)
537
+
538
+
539
+ ####################
540
+ # Hacks with CommitOperationAdd to bypass checks/sha256 calculation
541
+ ####################
542
+
543
+
544
+ class HackyCommitOperationAdd(CommitOperationAdd):
545
+ def __post_init__(self) -> None:
546
+ if isinstance(self.path_or_fileobj, Path):
547
+ self.path_or_fileobj = str(self.path_or_fileobj)
548
+
549
+
550
+ def _build_hacky_operation(item: JOB_ITEM_T) -> HackyCommitOperationAdd:
551
+ paths, metadata = item
552
+ operation = HackyCommitOperationAdd(path_in_repo=paths.path_in_repo, path_or_fileobj=paths.file_path)
553
+ with paths.file_path.open("rb") as file:
554
+ sample = file.peek(512)[:512]
555
+ if metadata.sha256 is None:
556
+ raise ValueError("sha256 must have been computed by now!")
557
+ operation.upload_info = UploadInfo(sha256=bytes.fromhex(metadata.sha256), size=metadata.size, sample=sample)
558
+ return operation
559
+
560
+
561
+ ####################
562
+ # Misc helpers
563
+ ####################
564
+
565
+
566
+ def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]:
567
+ return [queue.get()]
568
+
569
+
570
+ def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> List[JOB_ITEM_T]:
571
+ return [queue.get() for _ in range(min(queue.qsize(), n))]
572
+
573
+
574
+ def _get_items_to_commit(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]:
575
+ """Special case for commit job: the number of items to commit depends on the type of files."""
576
+ # Can take at most 50 regular files and/or 100 LFS files in a single commit
577
+ items: List[JOB_ITEM_T] = []
578
+ nb_lfs, nb_regular = 0, 0
579
+ while True:
580
+ # If empty queue => commit everything
581
+ if queue.qsize() == 0:
582
+ return items
583
+
584
+ # If we have enough items => commit them
585
+ if nb_lfs >= MAX_NB_LFS_FILES_PER_COMMIT or nb_regular >= MAX_NB_REGULAR_FILES_PER_COMMIT:
586
+ return items
587
+
588
+ # Else, get a new item and increase counter
589
+ item = queue.get()
590
+ items.append(item)
591
+ _, metadata = item
592
+ if metadata.upload_mode == "lfs":
593
+ nb_lfs += 1
594
+ else:
595
+ nb_regular += 1
596
+
597
+
598
+ def _print_overwrite(report: str) -> None:
599
+ """Print a report, overwriting the previous lines.
600
+
601
+ Since tqdm in using `sys.stderr` to (re-)write progress bars, we need to use `sys.stdout`
602
+ to print the report.
603
+
604
+ Note: works well only if no other process is writing to `sys.stdout`!
605
+ """
606
+ report += "\n"
607
+ # Get terminal width
608
+ terminal_width = shutil.get_terminal_size().columns
609
+
610
+ # Count number of lines that should be cleared
611
+ nb_lines = sum(len(line) // terminal_width + 1 for line in report.splitlines())
612
+
613
+ # Clear previous lines based on the number of lines in the report
614
+ for _ in range(nb_lines):
615
+ sys.stdout.write("\r\033[K") # Clear line
616
+ sys.stdout.write("\033[F") # Move cursor up one line
617
+
618
+ # Print the new report, filling remaining space with whitespace
619
+ sys.stdout.write(report)
620
+ sys.stdout.write(" " * (terminal_width - len(report.splitlines()[-1])))
621
+ sys.stdout.flush()
huggingface_hub/_webhooks_payload.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2023-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains data structures to parse the webhooks payload."""
16
+
17
+ from typing import List, Literal, Optional
18
+
19
+ from .utils import is_pydantic_available
20
+
21
+
22
+ if is_pydantic_available():
23
+ from pydantic import BaseModel
24
+ else:
25
+ # Define a dummy BaseModel to avoid import errors when pydantic is not installed
26
+ # Import error will be raised when trying to use the class
27
+
28
+ class BaseModel: # type: ignore [no-redef]
29
+ def __init__(self, *args, **kwargs) -> None:
30
+ raise ImportError(
31
+ "You must have `pydantic` installed to use `WebhookPayload`. This is an optional dependency that"
32
+ " should be installed separately. Please run `pip install --upgrade pydantic` and retry."
33
+ )
34
+
35
+
36
+ # This is an adaptation of the ReportV3 interface implemented in moon-landing. V0, V1 and V2 have been ignored as they
37
+ # are not in used anymore. To keep in sync when format is updated in
38
+ # https://github.com/huggingface/moon-landing/blob/main/server/lib/HFWebhooks.ts (internal link).
39
+
40
+
41
+ WebhookEvent_T = Literal[
42
+ "create",
43
+ "delete",
44
+ "move",
45
+ "update",
46
+ ]
47
+ RepoChangeEvent_T = Literal[
48
+ "add",
49
+ "move",
50
+ "remove",
51
+ "update",
52
+ ]
53
+ RepoType_T = Literal[
54
+ "dataset",
55
+ "model",
56
+ "space",
57
+ ]
58
+ DiscussionStatus_T = Literal[
59
+ "closed",
60
+ "draft",
61
+ "open",
62
+ "merged",
63
+ ]
64
+ SupportedWebhookVersion = Literal[3]
65
+
66
+
67
+ class ObjectId(BaseModel):
68
+ id: str
69
+
70
+
71
+ class WebhookPayloadUrl(BaseModel):
72
+ web: str
73
+ api: Optional[str] = None
74
+
75
+
76
+ class WebhookPayloadMovedTo(BaseModel):
77
+ name: str
78
+ owner: ObjectId
79
+
80
+
81
+ class WebhookPayloadWebhook(ObjectId):
82
+ version: SupportedWebhookVersion
83
+
84
+
85
+ class WebhookPayloadEvent(BaseModel):
86
+ action: WebhookEvent_T
87
+ scope: str
88
+
89
+
90
+ class WebhookPayloadDiscussionChanges(BaseModel):
91
+ base: str
92
+ mergeCommitId: Optional[str] = None
93
+
94
+
95
+ class WebhookPayloadComment(ObjectId):
96
+ author: ObjectId
97
+ hidden: bool
98
+ content: Optional[str] = None
99
+ url: WebhookPayloadUrl
100
+
101
+
102
+ class WebhookPayloadDiscussion(ObjectId):
103
+ num: int
104
+ author: ObjectId
105
+ url: WebhookPayloadUrl
106
+ title: str
107
+ isPullRequest: bool
108
+ status: DiscussionStatus_T
109
+ changes: Optional[WebhookPayloadDiscussionChanges] = None
110
+ pinned: Optional[bool] = None
111
+
112
+
113
+ class WebhookPayloadRepo(ObjectId):
114
+ owner: ObjectId
115
+ head_sha: Optional[str] = None
116
+ name: str
117
+ private: bool
118
+ subdomain: Optional[str] = None
119
+ tags: Optional[List[str]] = None
120
+ type: Literal["dataset", "model", "space"]
121
+ url: WebhookPayloadUrl
122
+
123
+
124
+ class WebhookPayloadUpdatedRef(BaseModel):
125
+ ref: str
126
+ oldSha: Optional[str] = None
127
+ newSha: Optional[str] = None
128
+
129
+
130
+ class WebhookPayload(BaseModel):
131
+ event: WebhookPayloadEvent
132
+ repo: WebhookPayloadRepo
133
+ discussion: Optional[WebhookPayloadDiscussion] = None
134
+ comment: Optional[WebhookPayloadComment] = None
135
+ webhook: WebhookPayloadWebhook
136
+ movedTo: Optional[WebhookPayloadMovedTo] = None
137
+ updatedRefs: Optional[List[WebhookPayloadUpdatedRef]] = None
huggingface_hub/_webhooks_server.py ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2023-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains `WebhooksServer` and `webhook_endpoint` to create a webhook server easily."""
16
+
17
+ import atexit
18
+ import inspect
19
+ import os
20
+ from functools import wraps
21
+ from typing import TYPE_CHECKING, Any, Callable, Dict, Optional
22
+
23
+ from .utils import experimental, is_fastapi_available, is_gradio_available
24
+
25
+
26
+ if TYPE_CHECKING:
27
+ import gradio as gr
28
+ from fastapi import Request
29
+
30
+ if is_fastapi_available():
31
+ from fastapi import FastAPI, Request
32
+ from fastapi.responses import JSONResponse
33
+ else:
34
+ # Will fail at runtime if FastAPI is not available
35
+ FastAPI = Request = JSONResponse = None # type: ignore [misc, assignment]
36
+
37
+
38
+ _global_app: Optional["WebhooksServer"] = None
39
+ _is_local = os.environ.get("SPACE_ID") is None
40
+
41
+
42
+ @experimental
43
+ class WebhooksServer:
44
+ """
45
+ The [`WebhooksServer`] class lets you create an instance of a Gradio app that can receive Huggingface webhooks.
46
+ These webhooks can be registered using the [`~WebhooksServer.add_webhook`] decorator. Webhook endpoints are added to
47
+ the app as a POST endpoint to the FastAPI router. Once all the webhooks are registered, the `launch` method has to be
48
+ called to start the app.
49
+
50
+ It is recommended to accept [`WebhookPayload`] as the first argument of the webhook function. It is a Pydantic
51
+ model that contains all the information about the webhook event. The data will be parsed automatically for you.
52
+
53
+ Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
54
+ WebhooksServer and deploy it on a Space.
55
+
56
+ <Tip warning={true}>
57
+
58
+ `WebhooksServer` is experimental. Its API is subject to change in the future.
59
+
60
+ </Tip>
61
+
62
+ <Tip warning={true}>
63
+
64
+ You must have `gradio` installed to use `WebhooksServer` (`pip install --upgrade gradio`).
65
+
66
+ </Tip>
67
+
68
+ Args:
69
+ ui (`gradio.Blocks`, optional):
70
+ A Gradio UI instance to be used as the Space landing page. If `None`, a UI displaying instructions
71
+ about the configured webhooks is created.
72
+ webhook_secret (`str`, optional):
73
+ A secret key to verify incoming webhook requests. You can set this value to any secret you want as long as
74
+ you also configure it in your [webhooks settings panel](https://huggingface.co/settings/webhooks). You
75
+ can also set this value as the `WEBHOOK_SECRET` environment variable. If no secret is provided, the
76
+ webhook endpoints are opened without any security.
77
+
78
+ Example:
79
+
80
+ ```python
81
+ import gradio as gr
82
+ from huggingface_hub import WebhooksServer, WebhookPayload
83
+
84
+ with gr.Blocks() as ui:
85
+ ...
86
+
87
+ app = WebhooksServer(ui=ui, webhook_secret="my_secret_key")
88
+
89
+ @app.add_webhook("/say_hello")
90
+ async def hello(payload: WebhookPayload):
91
+ return {"message": "hello"}
92
+
93
+ app.launch()
94
+ ```
95
+ """
96
+
97
+ def __new__(cls, *args, **kwargs) -> "WebhooksServer":
98
+ if not is_gradio_available():
99
+ raise ImportError(
100
+ "You must have `gradio` installed to use `WebhooksServer`. Please run `pip install --upgrade gradio`"
101
+ " first."
102
+ )
103
+ if not is_fastapi_available():
104
+ raise ImportError(
105
+ "You must have `fastapi` installed to use `WebhooksServer`. Please run `pip install --upgrade fastapi`"
106
+ " first."
107
+ )
108
+ return super().__new__(cls)
109
+
110
+ def __init__(
111
+ self,
112
+ ui: Optional["gr.Blocks"] = None,
113
+ webhook_secret: Optional[str] = None,
114
+ ) -> None:
115
+ self._ui = ui
116
+
117
+ self.webhook_secret = webhook_secret or os.getenv("WEBHOOK_SECRET")
118
+ self.registered_webhooks: Dict[str, Callable] = {}
119
+ _warn_on_empty_secret(self.webhook_secret)
120
+
121
+ def add_webhook(self, path: Optional[str] = None) -> Callable:
122
+ """
123
+ Decorator to add a webhook to the [`WebhooksServer`] server.
124
+
125
+ Args:
126
+ path (`str`, optional):
127
+ The URL path to register the webhook function. If not provided, the function name will be used as the
128
+ path. In any case, all webhooks are registered under `/webhooks`.
129
+
130
+ Raises:
131
+ ValueError: If the provided path is already registered as a webhook.
132
+
133
+ Example:
134
+ ```python
135
+ from huggingface_hub import WebhooksServer, WebhookPayload
136
+
137
+ app = WebhooksServer()
138
+
139
+ @app.add_webhook
140
+ async def trigger_training(payload: WebhookPayload):
141
+ if payload.repo.type == "dataset" and payload.event.action == "update":
142
+ # Trigger a training job if a dataset is updated
143
+ ...
144
+
145
+ app.launch()
146
+ ```
147
+ """
148
+ # Usage: directly as decorator. Example: `@app.add_webhook`
149
+ if callable(path):
150
+ # If path is a function, it means it was used as a decorator without arguments
151
+ return self.add_webhook()(path)
152
+
153
+ # Usage: provide a path. Example: `@app.add_webhook(...)`
154
+ @wraps(FastAPI.post)
155
+ def _inner_post(*args, **kwargs):
156
+ func = args[0]
157
+ abs_path = f"/webhooks/{(path or func.__name__).strip('/')}"
158
+ if abs_path in self.registered_webhooks:
159
+ raise ValueError(f"Webhook {abs_path} already exists.")
160
+ self.registered_webhooks[abs_path] = func
161
+
162
+ return _inner_post
163
+
164
+ def launch(self, prevent_thread_lock: bool = False, **launch_kwargs: Any) -> None:
165
+ """Launch the Gradio app and register webhooks to the underlying FastAPI server.
166
+
167
+ Input parameters are forwarded to Gradio when launching the app.
168
+ """
169
+ ui = self._ui or self._get_default_ui()
170
+
171
+ # Start Gradio App
172
+ # - as non-blocking so that webhooks can be added afterwards
173
+ # - as shared if launch locally (to debug webhooks)
174
+ launch_kwargs.setdefault("share", _is_local)
175
+ self.fastapi_app, _, _ = ui.launch(prevent_thread_lock=True, **launch_kwargs)
176
+
177
+ # Register webhooks to FastAPI app
178
+ for path, func in self.registered_webhooks.items():
179
+ # Add secret check if required
180
+ if self.webhook_secret is not None:
181
+ func = _wrap_webhook_to_check_secret(func, webhook_secret=self.webhook_secret)
182
+
183
+ # Add route to FastAPI app
184
+ self.fastapi_app.post(path)(func)
185
+
186
+ # Print instructions and block main thread
187
+ space_host = os.environ.get("SPACE_HOST")
188
+ url = "https://" + space_host if space_host is not None else (ui.share_url or ui.local_url)
189
+ url = url.strip("/")
190
+ message = "\nWebhooks are correctly setup and ready to use:"
191
+ message += "\n" + "\n".join(f" - POST {url}{webhook}" for webhook in self.registered_webhooks)
192
+ message += "\nGo to https://huggingface.co/settings/webhooks to setup your webhooks."
193
+ print(message)
194
+
195
+ if not prevent_thread_lock:
196
+ ui.block_thread()
197
+
198
+ def _get_default_ui(self) -> "gr.Blocks":
199
+ """Default UI if not provided (lists webhooks and provides basic instructions)."""
200
+ import gradio as gr
201
+
202
+ with gr.Blocks() as ui:
203
+ gr.Markdown("# This is an app to process 🤗 Webhooks")
204
+ gr.Markdown(
205
+ "Webhooks are a foundation for MLOps-related features. They allow you to listen for new changes on"
206
+ " specific repos or to all repos belonging to particular set of users/organizations (not just your"
207
+ " repos, but any repo). Check out this [guide](https://huggingface.co/docs/hub/webhooks) to get to"
208
+ " know more about webhooks on the Huggingface Hub."
209
+ )
210
+ gr.Markdown(
211
+ f"{len(self.registered_webhooks)} webhook(s) are registered:"
212
+ + "\n\n"
213
+ + "\n ".join(
214
+ f"- [{webhook_path}]({_get_webhook_doc_url(webhook.__name__, webhook_path)})"
215
+ for webhook_path, webhook in self.registered_webhooks.items()
216
+ )
217
+ )
218
+ gr.Markdown(
219
+ "Go to https://huggingface.co/settings/webhooks to setup your webhooks."
220
+ + "\nYou app is running locally. Please look at the logs to check the full URL you need to set."
221
+ if _is_local
222
+ else (
223
+ "\nThis app is running on a Space. You can find the corresponding URL in the options menu"
224
+ " (top-right) > 'Embed the Space'. The URL looks like 'https://{username}-{repo_name}.hf.space'."
225
+ )
226
+ )
227
+ return ui
228
+
229
+
230
+ @experimental
231
+ def webhook_endpoint(path: Optional[str] = None) -> Callable:
232
+ """Decorator to start a [`WebhooksServer`] and register the decorated function as a webhook endpoint.
233
+
234
+ This is a helper to get started quickly. If you need more flexibility (custom landing page or webhook secret),
235
+ you can use [`WebhooksServer`] directly. You can register multiple webhook endpoints (to the same server) by using
236
+ this decorator multiple times.
237
+
238
+ Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
239
+ server and deploy it on a Space.
240
+
241
+ <Tip warning={true}>
242
+
243
+ `webhook_endpoint` is experimental. Its API is subject to change in the future.
244
+
245
+ </Tip>
246
+
247
+ <Tip warning={true}>
248
+
249
+ You must have `gradio` installed to use `webhook_endpoint` (`pip install --upgrade gradio`).
250
+
251
+ </Tip>
252
+
253
+ Args:
254
+ path (`str`, optional):
255
+ The URL path to register the webhook function. If not provided, the function name will be used as the path.
256
+ In any case, all webhooks are registered under `/webhooks`.
257
+
258
+ Examples:
259
+ The default usage is to register a function as a webhook endpoint. The function name will be used as the path.
260
+ The server will be started automatically at exit (i.e. at the end of the script).
261
+
262
+ ```python
263
+ from huggingface_hub import webhook_endpoint, WebhookPayload
264
+
265
+ @webhook_endpoint
266
+ async def trigger_training(payload: WebhookPayload):
267
+ if payload.repo.type == "dataset" and payload.event.action == "update":
268
+ # Trigger a training job if a dataset is updated
269
+ ...
270
+
271
+ # Server is automatically started at the end of the script.
272
+ ```
273
+
274
+ Advanced usage: register a function as a webhook endpoint and start the server manually. This is useful if you
275
+ are running it in a notebook.
276
+
277
+ ```python
278
+ from huggingface_hub import webhook_endpoint, WebhookPayload
279
+
280
+ @webhook_endpoint
281
+ async def trigger_training(payload: WebhookPayload):
282
+ if payload.repo.type == "dataset" and payload.event.action == "update":
283
+ # Trigger a training job if a dataset is updated
284
+ ...
285
+
286
+ # Start the server manually
287
+ trigger_training.launch()
288
+ ```
289
+ """
290
+ if callable(path):
291
+ # If path is a function, it means it was used as a decorator without arguments
292
+ return webhook_endpoint()(path)
293
+
294
+ @wraps(WebhooksServer.add_webhook)
295
+ def _inner(func: Callable) -> Callable:
296
+ app = _get_global_app()
297
+ app.add_webhook(path)(func)
298
+ if len(app.registered_webhooks) == 1:
299
+ # Register `app.launch` to run at exit (only once)
300
+ atexit.register(app.launch)
301
+
302
+ @wraps(app.launch)
303
+ def _launch_now():
304
+ # Run the app directly (without waiting atexit)
305
+ atexit.unregister(app.launch)
306
+ app.launch()
307
+
308
+ func.launch = _launch_now # type: ignore
309
+ return func
310
+
311
+ return _inner
312
+
313
+
314
+ def _get_global_app() -> WebhooksServer:
315
+ global _global_app
316
+ if _global_app is None:
317
+ _global_app = WebhooksServer()
318
+ return _global_app
319
+
320
+
321
+ def _warn_on_empty_secret(webhook_secret: Optional[str]) -> None:
322
+ if webhook_secret is None:
323
+ print("Webhook secret is not defined. This means your webhook endpoints will be open to everyone.")
324
+ print(
325
+ "To add a secret, set `WEBHOOK_SECRET` as environment variable or pass it at initialization: "
326
+ "\n\t`app = WebhooksServer(webhook_secret='my_secret', ...)`"
327
+ )
328
+ print(
329
+ "For more details about webhook secrets, please refer to"
330
+ " https://huggingface.co/docs/hub/webhooks#webhook-secret."
331
+ )
332
+ else:
333
+ print("Webhook secret is correctly defined.")
334
+
335
+
336
+ def _get_webhook_doc_url(webhook_name: str, webhook_path: str) -> str:
337
+ """Returns the anchor to a given webhook in the docs (experimental)"""
338
+ return "/docs#/default/" + webhook_name + webhook_path.replace("/", "_") + "_post"
339
+
340
+
341
+ def _wrap_webhook_to_check_secret(func: Callable, webhook_secret: str) -> Callable:
342
+ """Wraps a webhook function to check the webhook secret before calling the function.
343
+
344
+ This is a hacky way to add the `request` parameter to the function signature. Since FastAPI based itself on route
345
+ parameters to inject the values to the function, we need to hack the function signature to retrieve the `Request`
346
+ object (and hence the headers). A far cleaner solution would be to use a middleware. However, since
347
+ `fastapi==0.90.1`, a middleware cannot be added once the app has started. And since the FastAPI app is started by
348
+ Gradio internals (and not by us), we cannot add a middleware.
349
+
350
+ This method is called only when a secret has been defined by the user. If a request is sent without the
351
+ "x-webhook-secret", the function will return a 401 error (unauthorized). If the header is sent but is incorrect,
352
+ the function will return a 403 error (forbidden).
353
+
354
+ Inspired by https://stackoverflow.com/a/33112180.
355
+ """
356
+ initial_sig = inspect.signature(func)
357
+
358
+ @wraps(func)
359
+ async def _protected_func(request: Request, **kwargs):
360
+ request_secret = request.headers.get("x-webhook-secret")
361
+ if request_secret is None:
362
+ return JSONResponse({"error": "x-webhook-secret header not set."}, status_code=401)
363
+ if request_secret != webhook_secret:
364
+ return JSONResponse({"error": "Invalid webhook secret."}, status_code=403)
365
+
366
+ # Inject `request` in kwargs if required
367
+ if "request" in initial_sig.parameters:
368
+ kwargs["request"] = request
369
+
370
+ # Handle both sync and async routes
371
+ if inspect.iscoroutinefunction(func):
372
+ return await func(**kwargs)
373
+ else:
374
+ return func(**kwargs)
375
+
376
+ # Update signature to include request
377
+ if "request" not in initial_sig.parameters:
378
+ _protected_func.__signature__ = initial_sig.replace( # type: ignore
379
+ parameters=(
380
+ inspect.Parameter(name="request", kind=inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=Request),
381
+ )
382
+ + tuple(initial_sig.parameters.values())
383
+ )
384
+
385
+ # Return protected route
386
+ return _protected_func
huggingface_hub/commands/__init__.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from abc import ABC, abstractmethod
16
+ from argparse import _SubParsersAction
17
+
18
+
19
+ class BaseHuggingfaceCLICommand(ABC):
20
+ @staticmethod
21
+ @abstractmethod
22
+ def register_subcommand(parser: _SubParsersAction):
23
+ raise NotImplementedError()
24
+
25
+ @abstractmethod
26
+ def run(self):
27
+ raise NotImplementedError()
huggingface_hub/commands/_cli_utils.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2022 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains a utility for good-looking prints."""
15
+
16
+ import os
17
+ from typing import List, Union
18
+
19
+
20
+ class ANSI:
21
+ """
22
+ Helper for en.wikipedia.org/wiki/ANSI_escape_code
23
+ """
24
+
25
+ _bold = "\u001b[1m"
26
+ _gray = "\u001b[90m"
27
+ _red = "\u001b[31m"
28
+ _reset = "\u001b[0m"
29
+ _yellow = "\u001b[33m"
30
+
31
+ @classmethod
32
+ def bold(cls, s: str) -> str:
33
+ return cls._format(s, cls._bold)
34
+
35
+ @classmethod
36
+ def gray(cls, s: str) -> str:
37
+ return cls._format(s, cls._gray)
38
+
39
+ @classmethod
40
+ def red(cls, s: str) -> str:
41
+ return cls._format(s, cls._bold + cls._red)
42
+
43
+ @classmethod
44
+ def yellow(cls, s: str) -> str:
45
+ return cls._format(s, cls._yellow)
46
+
47
+ @classmethod
48
+ def _format(cls, s: str, code: str) -> str:
49
+ if os.environ.get("NO_COLOR"):
50
+ # See https://no-color.org/
51
+ return s
52
+ return f"{code}{s}{cls._reset}"
53
+
54
+
55
+ def tabulate(rows: List[List[Union[str, int]]], headers: List[str]) -> str:
56
+ """
57
+ Inspired by:
58
+
59
+ - stackoverflow.com/a/8356620/593036
60
+ - stackoverflow.com/questions/9535954/printing-lists-as-tabular-data
61
+ """
62
+ col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)]
63
+ row_format = ("{{:{}}} " * len(headers)).format(*col_widths)
64
+ lines = []
65
+ lines.append(row_format.format(*headers))
66
+ lines.append(row_format.format(*["-" * w for w in col_widths]))
67
+ for row in rows:
68
+ lines.append(row_format.format(*row))
69
+ return "\n".join(lines)
huggingface_hub/commands/delete_cache.py ADDED
@@ -0,0 +1,428 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains command to delete some revisions from the HF cache directory.
16
+
17
+ Usage:
18
+ huggingface-cli delete-cache
19
+ huggingface-cli delete-cache --disable-tui
20
+ huggingface-cli delete-cache --dir ~/.cache/huggingface/hub
21
+
22
+ NOTE:
23
+ This command is based on `InquirerPy` to build the multiselect menu in the terminal.
24
+ This dependency has to be installed with `pip install huggingface_hub[cli]`. Since
25
+ we want to avoid as much as possible cross-platform issues, I chose a library that
26
+ is built on top of `python-prompt-toolkit` which seems to be a reference in terminal
27
+ GUI (actively maintained on both Unix and Windows, 7.9k stars).
28
+
29
+ For the moment, the TUI feature is in beta.
30
+
31
+ See:
32
+ - https://github.com/kazhala/InquirerPy
33
+ - https://inquirerpy.readthedocs.io/en/latest/
34
+ - https://github.com/prompt-toolkit/python-prompt-toolkit
35
+
36
+ Other solutions could have been:
37
+ - `simple_term_menu`: would be good as well for our use case but some issues suggest
38
+ that Windows is less supported.
39
+ See: https://github.com/IngoMeyer441/simple-term-menu
40
+ - `PyInquirer`: very similar to `InquirerPy` but older and not maintained anymore.
41
+ In particular, no support of Python3.10.
42
+ See: https://github.com/CITGuru/PyInquirer
43
+ - `pick` (or `pickpack`): easy to use and flexible but built on top of Python's
44
+ standard library `curses` that is specific to Unix (not implemented on Windows).
45
+ See https://github.com/wong2/pick and https://github.com/anafvana/pickpack.
46
+ - `inquirer`: lot of traction (700 stars) but explicitly states "experimental
47
+ support of Windows". Not built on top of `python-prompt-toolkit`.
48
+ See https://github.com/magmax/python-inquirer
49
+
50
+ TODO: add support for `huggingface-cli delete-cache aaaaaa bbbbbb cccccc (...)` ?
51
+ TODO: add "--keep-last" arg to delete revisions that are not on `main` ref
52
+ TODO: add "--filter" arg to filter repositories by name ?
53
+ TODO: add "--sort" arg to sort by size ?
54
+ TODO: add "--limit" arg to limit to X repos ?
55
+ TODO: add "-y" arg for immediate deletion ?
56
+ See discussions in https://github.com/huggingface/huggingface_hub/issues/1025.
57
+ """
58
+
59
+ import os
60
+ from argparse import Namespace, _SubParsersAction
61
+ from functools import wraps
62
+ from tempfile import mkstemp
63
+ from typing import Any, Callable, Iterable, List, Optional, Union
64
+
65
+ from ..utils import CachedRepoInfo, CachedRevisionInfo, HFCacheInfo, scan_cache_dir
66
+ from . import BaseHuggingfaceCLICommand
67
+ from ._cli_utils import ANSI
68
+
69
+
70
+ try:
71
+ from InquirerPy import inquirer
72
+ from InquirerPy.base.control import Choice
73
+ from InquirerPy.separator import Separator
74
+
75
+ _inquirer_py_available = True
76
+ except ImportError:
77
+ _inquirer_py_available = False
78
+
79
+
80
+ def require_inquirer_py(fn: Callable) -> Callable:
81
+ """Decorator to flag methods that require `InquirerPy`."""
82
+
83
+ # TODO: refactor this + imports in a unified pattern across codebase
84
+ @wraps(fn)
85
+ def _inner(*args, **kwargs):
86
+ if not _inquirer_py_available:
87
+ raise ImportError(
88
+ "The `delete-cache` command requires extra dependencies to work with"
89
+ " the TUI.\nPlease run `pip install huggingface_hub[cli]` to install"
90
+ " them.\nOtherwise, disable TUI using the `--disable-tui` flag."
91
+ )
92
+
93
+ return fn(*args, **kwargs)
94
+
95
+ return _inner
96
+
97
+
98
+ # Possibility for the user to cancel deletion
99
+ _CANCEL_DELETION_STR = "CANCEL_DELETION"
100
+
101
+
102
+ class DeleteCacheCommand(BaseHuggingfaceCLICommand):
103
+ @staticmethod
104
+ def register_subcommand(parser: _SubParsersAction):
105
+ delete_cache_parser = parser.add_parser("delete-cache", help="Delete revisions from the cache directory.")
106
+
107
+ delete_cache_parser.add_argument(
108
+ "--dir",
109
+ type=str,
110
+ default=None,
111
+ help="cache directory (optional). Default to the default HuggingFace cache.",
112
+ )
113
+
114
+ delete_cache_parser.add_argument(
115
+ "--disable-tui",
116
+ action="store_true",
117
+ help=(
118
+ "Disable Terminal User Interface (TUI) mode. Useful if your"
119
+ " platform/terminal doesn't support the multiselect menu."
120
+ ),
121
+ )
122
+
123
+ delete_cache_parser.set_defaults(func=DeleteCacheCommand)
124
+
125
+ def __init__(self, args: Namespace) -> None:
126
+ self.cache_dir: Optional[str] = args.dir
127
+ self.disable_tui: bool = args.disable_tui
128
+
129
+ def run(self):
130
+ """Run `delete-cache` command with or without TUI."""
131
+ # Scan cache directory
132
+ hf_cache_info = scan_cache_dir(self.cache_dir)
133
+
134
+ # Manual review from the user
135
+ if self.disable_tui:
136
+ selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[])
137
+ else:
138
+ selected_hashes = _manual_review_tui(hf_cache_info, preselected=[])
139
+
140
+ # If deletion is not cancelled
141
+ if len(selected_hashes) > 0 and _CANCEL_DELETION_STR not in selected_hashes:
142
+ confirm_message = _get_expectations_str(hf_cache_info, selected_hashes) + " Confirm deletion ?"
143
+
144
+ # Confirm deletion
145
+ if self.disable_tui:
146
+ confirmed = _ask_for_confirmation_no_tui(confirm_message)
147
+ else:
148
+ confirmed = _ask_for_confirmation_tui(confirm_message)
149
+
150
+ # Deletion is confirmed
151
+ if confirmed:
152
+ strategy = hf_cache_info.delete_revisions(*selected_hashes)
153
+ print("Start deletion.")
154
+ strategy.execute()
155
+ print(
156
+ f"Done. Deleted {len(strategy.repos)} repo(s) and"
157
+ f" {len(strategy.snapshots)} revision(s) for a total of"
158
+ f" {strategy.expected_freed_size_str}."
159
+ )
160
+ return
161
+
162
+ # Deletion is cancelled
163
+ print("Deletion is cancelled. Do nothing.")
164
+
165
+
166
+ @require_inquirer_py
167
+ def _manual_review_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) -> List[str]:
168
+ """Ask the user for a manual review of the revisions to delete.
169
+
170
+ Displays a multi-select menu in the terminal (TUI).
171
+ """
172
+ # Define multiselect list
173
+ choices = _get_tui_choices_from_scan(repos=hf_cache_info.repos, preselected=preselected)
174
+ checkbox = inquirer.checkbox(
175
+ message="Select revisions to delete:",
176
+ choices=choices, # List of revisions with some pre-selection
177
+ cycle=False, # No loop between top and bottom
178
+ height=100, # Large list if possible
179
+ # We use the instruction to display to the user the expected effect of the
180
+ # deletion.
181
+ instruction=_get_expectations_str(
182
+ hf_cache_info,
183
+ selected_hashes=[c.value for c in choices if isinstance(c, Choice) and c.enabled],
184
+ ),
185
+ # We use the long instruction to should keybindings instructions to the user
186
+ long_instruction="Press <space> to select, <enter> to validate and <ctrl+c> to quit without modification.",
187
+ # Message that is displayed once the user validates its selection.
188
+ transformer=lambda result: f"{len(result)} revision(s) selected.",
189
+ )
190
+
191
+ # Add a callback to update the information line when a revision is
192
+ # selected/unselected
193
+ def _update_expectations(_) -> None:
194
+ # Hacky way to dynamically set an instruction message to the checkbox when
195
+ # a revision hash is selected/unselected.
196
+ checkbox._instruction = _get_expectations_str(
197
+ hf_cache_info,
198
+ selected_hashes=[choice["value"] for choice in checkbox.content_control.choices if choice["enabled"]],
199
+ )
200
+
201
+ checkbox.kb_func_lookup["toggle"].append({"func": _update_expectations})
202
+
203
+ # Finally display the form to the user.
204
+ try:
205
+ return checkbox.execute()
206
+ except KeyboardInterrupt:
207
+ return [] # Quit without deletion
208
+
209
+
210
+ @require_inquirer_py
211
+ def _ask_for_confirmation_tui(message: str, default: bool = True) -> bool:
212
+ """Ask for confirmation using Inquirer."""
213
+ return inquirer.confirm(message, default=default).execute()
214
+
215
+
216
+ def _get_tui_choices_from_scan(repos: Iterable[CachedRepoInfo], preselected: List[str]) -> List:
217
+ """Build a list of choices from the scanned repos.
218
+
219
+ Args:
220
+ repos (*Iterable[`CachedRepoInfo`]*):
221
+ List of scanned repos on which we want to delete revisions.
222
+ preselected (*List[`str`]*):
223
+ List of revision hashes that will be preselected.
224
+
225
+ Return:
226
+ The list of choices to pass to `inquirer.checkbox`.
227
+ """
228
+ choices: List[Union[Choice, Separator]] = []
229
+
230
+ # First choice is to cancel the deletion. If selected, nothing will be deleted,
231
+ # no matter the other selected items.
232
+ choices.append(
233
+ Choice(
234
+ _CANCEL_DELETION_STR,
235
+ name="None of the following (if selected, nothing will be deleted).",
236
+ enabled=False,
237
+ )
238
+ )
239
+
240
+ # Display a separator per repo and a Choice for each revisions of the repo
241
+ for repo in sorted(repos, key=_repo_sorting_order):
242
+ # Repo as separator
243
+ choices.append(
244
+ Separator(
245
+ f"\n{repo.repo_type.capitalize()} {repo.repo_id} ({repo.size_on_disk_str},"
246
+ f" used {repo.last_accessed_str})"
247
+ )
248
+ )
249
+ for revision in sorted(repo.revisions, key=_revision_sorting_order):
250
+ # Revision as choice
251
+ choices.append(
252
+ Choice(
253
+ revision.commit_hash,
254
+ name=(
255
+ f"{revision.commit_hash[:8]}:"
256
+ f" {', '.join(sorted(revision.refs)) or '(detached)'} #"
257
+ f" modified {revision.last_modified_str}"
258
+ ),
259
+ enabled=revision.commit_hash in preselected,
260
+ )
261
+ )
262
+
263
+ # Return choices
264
+ return choices
265
+
266
+
267
+ def _manual_review_no_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) -> List[str]:
268
+ """Ask the user for a manual review of the revisions to delete.
269
+
270
+ Used when TUI is disabled. Manual review happens in a separate tmp file that the
271
+ user can manually edit.
272
+ """
273
+ # 1. Generate temporary file with delete commands.
274
+ fd, tmp_path = mkstemp(suffix=".txt") # suffix to make it easier to find by editors
275
+ os.close(fd)
276
+
277
+ lines = []
278
+ for repo in sorted(hf_cache_info.repos, key=_repo_sorting_order):
279
+ lines.append(
280
+ f"\n# {repo.repo_type.capitalize()} {repo.repo_id} ({repo.size_on_disk_str},"
281
+ f" used {repo.last_accessed_str})"
282
+ )
283
+ for revision in sorted(repo.revisions, key=_revision_sorting_order):
284
+ lines.append(
285
+ # Deselect by prepending a '#'
286
+ f"{'' if revision.commit_hash in preselected else '#'} "
287
+ f" {revision.commit_hash} # Refs:"
288
+ # Print `refs` as comment on same line
289
+ f" {', '.join(sorted(revision.refs)) or '(detached)'} # modified"
290
+ # Print `last_modified` as comment on same line
291
+ f" {revision.last_modified_str}"
292
+ )
293
+
294
+ with open(tmp_path, "w") as f:
295
+ f.write(_MANUAL_REVIEW_NO_TUI_INSTRUCTIONS)
296
+ f.write("\n".join(lines))
297
+
298
+ # 2. Prompt instructions to user.
299
+ instructions = f"""
300
+ TUI is disabled. In order to select which revisions you want to delete, please edit
301
+ the following file using the text editor of your choice. Instructions for manual
302
+ editing are located at the beginning of the file. Edit the file, save it and confirm
303
+ to continue.
304
+ File to edit: {ANSI.bold(tmp_path)}
305
+ """
306
+ print("\n".join(line.strip() for line in instructions.strip().split("\n")))
307
+
308
+ # 3. Wait for user confirmation.
309
+ while True:
310
+ selected_hashes = _read_manual_review_tmp_file(tmp_path)
311
+ if _ask_for_confirmation_no_tui(
312
+ _get_expectations_str(hf_cache_info, selected_hashes) + " Continue ?",
313
+ default=False,
314
+ ):
315
+ break
316
+
317
+ # 4. Return selected_hashes
318
+ os.remove(tmp_path)
319
+ return selected_hashes
320
+
321
+
322
+ def _ask_for_confirmation_no_tui(message: str, default: bool = True) -> bool:
323
+ """Ask for confirmation using pure-python."""
324
+ YES = ("y", "yes", "1")
325
+ NO = ("n", "no", "0")
326
+ DEFAULT = ""
327
+ ALL = YES + NO + (DEFAULT,)
328
+ full_message = message + (" (Y/n) " if default else " (y/N) ")
329
+ while True:
330
+ answer = input(full_message).lower()
331
+ if answer == DEFAULT:
332
+ return default
333
+ if answer in YES:
334
+ return True
335
+ if answer in NO:
336
+ return False
337
+ print(f"Invalid input. Must be one of {ALL}")
338
+
339
+
340
+ def _get_expectations_str(hf_cache_info: HFCacheInfo, selected_hashes: List[str]) -> str:
341
+ """Format a string to display to the user how much space would be saved.
342
+
343
+ Example:
344
+ ```
345
+ >>> _get_expectations_str(hf_cache_info, selected_hashes)
346
+ '7 revisions selected counting for 4.3G.'
347
+ ```
348
+ """
349
+ if _CANCEL_DELETION_STR in selected_hashes:
350
+ return "Nothing will be deleted."
351
+ strategy = hf_cache_info.delete_revisions(*selected_hashes)
352
+ return f"{len(selected_hashes)} revisions selected counting for {strategy.expected_freed_size_str}."
353
+
354
+
355
+ def _read_manual_review_tmp_file(tmp_path: str) -> List[str]:
356
+ """Read the manually reviewed instruction file and return a list of revision hash.
357
+
358
+ Example:
359
+ ```txt
360
+ # This is the tmp file content
361
+ ###
362
+
363
+ # Commented out line
364
+ 123456789 # revision hash
365
+
366
+ # Something else
367
+ # a_newer_hash # 2 days ago
368
+ an_older_hash # 3 days ago
369
+ ```
370
+
371
+ ```py
372
+ >>> _read_manual_review_tmp_file(tmp_path)
373
+ ['123456789', 'an_older_hash']
374
+ ```
375
+ """
376
+ with open(tmp_path) as f:
377
+ content = f.read()
378
+
379
+ # Split lines
380
+ lines = [line.strip() for line in content.split("\n")]
381
+
382
+ # Filter commented lines
383
+ selected_lines = [line for line in lines if not line.startswith("#")]
384
+
385
+ # Select only before comment
386
+ selected_hashes = [line.split("#")[0].strip() for line in selected_lines]
387
+
388
+ # Return revision hashes
389
+ return [hash for hash in selected_hashes if len(hash) > 0]
390
+
391
+
392
+ _MANUAL_REVIEW_NO_TUI_INSTRUCTIONS = f"""
393
+ # INSTRUCTIONS
394
+ # ------------
395
+ # This is a temporary file created by running `huggingface-cli delete-cache` with the
396
+ # `--disable-tui` option. It contains a set of revisions that can be deleted from your
397
+ # local cache directory.
398
+ #
399
+ # Please manually review the revisions you want to delete:
400
+ # - Revision hashes can be commented out with '#'.
401
+ # - Only non-commented revisions in this file will be deleted.
402
+ # - Revision hashes that are removed from this file are ignored as well.
403
+ # - If `{_CANCEL_DELETION_STR}` line is uncommented, the all cache deletion is cancelled and
404
+ # no changes will be applied.
405
+ #
406
+ # Once you've manually reviewed this file, please confirm deletion in the terminal. This
407
+ # file will be automatically removed once done.
408
+ # ------------
409
+
410
+ # KILL SWITCH
411
+ # ------------
412
+ # Un-comment following line to completely cancel the deletion process
413
+ # {_CANCEL_DELETION_STR}
414
+ # ------------
415
+
416
+ # REVISIONS
417
+ # ------------
418
+ """.strip()
419
+
420
+
421
+ def _repo_sorting_order(repo: CachedRepoInfo) -> Any:
422
+ # First split by Dataset/Model, then sort by last accessed (oldest first)
423
+ return (repo.repo_type, repo.last_accessed)
424
+
425
+
426
+ def _revision_sorting_order(revision: CachedRevisionInfo) -> Any:
427
+ # Sort by last modified (oldest first)
428
+ return revision.last_modified
huggingface_hub/commands/download.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2023-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains command to download files from the Hub with the CLI.
16
+
17
+ Usage:
18
+ huggingface-cli download --help
19
+
20
+ # Download file
21
+ huggingface-cli download gpt2 config.json
22
+
23
+ # Download entire repo
24
+ huggingface-cli download fffiloni/zeroscope --repo-type=space --revision=refs/pr/78
25
+
26
+ # Download repo with filters
27
+ huggingface-cli download gpt2 --include="*.safetensors"
28
+
29
+ # Download with token
30
+ huggingface-cli download Wauplin/private-model --token=hf_***
31
+
32
+ # Download quietly (no progress bar, no warnings, only the returned path)
33
+ huggingface-cli download gpt2 config.json --quiet
34
+
35
+ # Download to local dir
36
+ huggingface-cli download gpt2 --local-dir=./models/gpt2
37
+ """
38
+
39
+ import warnings
40
+ from argparse import Namespace, _SubParsersAction
41
+ from typing import List, Optional
42
+
43
+ from huggingface_hub import logging
44
+ from huggingface_hub._snapshot_download import snapshot_download
45
+ from huggingface_hub.commands import BaseHuggingfaceCLICommand
46
+ from huggingface_hub.file_download import hf_hub_download
47
+ from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
48
+
49
+
50
+ logger = logging.get_logger(__name__)
51
+
52
+
53
+ class DownloadCommand(BaseHuggingfaceCLICommand):
54
+ @staticmethod
55
+ def register_subcommand(parser: _SubParsersAction):
56
+ download_parser = parser.add_parser("download", help="Download files from the Hub")
57
+ download_parser.add_argument(
58
+ "repo_id", type=str, help="ID of the repo to download from (e.g. `username/repo-name`)."
59
+ )
60
+ download_parser.add_argument(
61
+ "filenames", type=str, nargs="*", help="Files to download (e.g. `config.json`, `data/metadata.jsonl`)."
62
+ )
63
+ download_parser.add_argument(
64
+ "--repo-type",
65
+ choices=["model", "dataset", "space"],
66
+ default="model",
67
+ help="Type of repo to download from (defaults to 'model').",
68
+ )
69
+ download_parser.add_argument(
70
+ "--revision",
71
+ type=str,
72
+ help="An optional Git revision id which can be a branch name, a tag, or a commit hash.",
73
+ )
74
+ download_parser.add_argument(
75
+ "--include", nargs="*", type=str, help="Glob patterns to match files to download."
76
+ )
77
+ download_parser.add_argument(
78
+ "--exclude", nargs="*", type=str, help="Glob patterns to exclude from files to download."
79
+ )
80
+ download_parser.add_argument(
81
+ "--cache-dir", type=str, help="Path to the directory where to save the downloaded files."
82
+ )
83
+ download_parser.add_argument(
84
+ "--local-dir",
85
+ type=str,
86
+ help=(
87
+ "If set, the downloaded file will be placed under this directory. Check out"
88
+ " https://huggingface.co/docs/huggingface_hub/guides/download#download-files-to-local-folder for more"
89
+ " details."
90
+ ),
91
+ )
92
+ download_parser.add_argument(
93
+ "--local-dir-use-symlinks",
94
+ choices=["auto", "True", "False"],
95
+ help=("Deprecated and ignored. Downloading to a local directory does not use symlinks anymore."),
96
+ )
97
+ download_parser.add_argument(
98
+ "--force-download",
99
+ action="store_true",
100
+ help="If True, the files will be downloaded even if they are already cached.",
101
+ )
102
+ download_parser.add_argument(
103
+ "--resume-download",
104
+ action="store_true",
105
+ help="Deprecated and ignored. Downloading a file to local dir always attempts to resume previously interrupted downloads (unless hf-transfer is enabled).",
106
+ )
107
+ download_parser.add_argument(
108
+ "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
109
+ )
110
+ download_parser.add_argument(
111
+ "--quiet",
112
+ action="store_true",
113
+ help="If True, progress bars are disabled and only the path to the download files is printed.",
114
+ )
115
+ download_parser.add_argument(
116
+ "--max-workers",
117
+ type=int,
118
+ default=8,
119
+ help="Maximum number of workers to use for downloading files. Default is 8.",
120
+ )
121
+ download_parser.set_defaults(func=DownloadCommand)
122
+
123
+ def __init__(self, args: Namespace) -> None:
124
+ self.token = args.token
125
+ self.repo_id: str = args.repo_id
126
+ self.filenames: List[str] = args.filenames
127
+ self.repo_type: str = args.repo_type
128
+ self.revision: Optional[str] = args.revision
129
+ self.include: Optional[List[str]] = args.include
130
+ self.exclude: Optional[List[str]] = args.exclude
131
+ self.cache_dir: Optional[str] = args.cache_dir
132
+ self.local_dir: Optional[str] = args.local_dir
133
+ self.force_download: bool = args.force_download
134
+ self.resume_download: Optional[bool] = args.resume_download or None
135
+ self.quiet: bool = args.quiet
136
+ self.max_workers: int = args.max_workers
137
+
138
+ if args.local_dir_use_symlinks is not None:
139
+ warnings.warn(
140
+ "Ignoring --local-dir-use-symlinks. Downloading to a local directory does not use symlinks anymore.",
141
+ FutureWarning,
142
+ )
143
+
144
+ def run(self) -> None:
145
+ if self.quiet:
146
+ disable_progress_bars()
147
+ with warnings.catch_warnings():
148
+ warnings.simplefilter("ignore")
149
+ print(self._download()) # Print path to downloaded files
150
+ enable_progress_bars()
151
+ else:
152
+ logging.set_verbosity_info()
153
+ print(self._download()) # Print path to downloaded files
154
+ logging.set_verbosity_warning()
155
+
156
+ def _download(self) -> str:
157
+ # Warn user if patterns are ignored
158
+ if len(self.filenames) > 0:
159
+ if self.include is not None and len(self.include) > 0:
160
+ warnings.warn("Ignoring `--include` since filenames have being explicitly set.")
161
+ if self.exclude is not None and len(self.exclude) > 0:
162
+ warnings.warn("Ignoring `--exclude` since filenames have being explicitly set.")
163
+
164
+ # Single file to download: use `hf_hub_download`
165
+ if len(self.filenames) == 1:
166
+ return hf_hub_download(
167
+ repo_id=self.repo_id,
168
+ repo_type=self.repo_type,
169
+ revision=self.revision,
170
+ filename=self.filenames[0],
171
+ cache_dir=self.cache_dir,
172
+ resume_download=self.resume_download,
173
+ force_download=self.force_download,
174
+ token=self.token,
175
+ local_dir=self.local_dir,
176
+ library_name="huggingface-cli",
177
+ )
178
+
179
+ # Otherwise: use `snapshot_download` to ensure all files comes from same revision
180
+ elif len(self.filenames) == 0:
181
+ allow_patterns = self.include
182
+ ignore_patterns = self.exclude
183
+ else:
184
+ allow_patterns = self.filenames
185
+ ignore_patterns = None
186
+
187
+ return snapshot_download(
188
+ repo_id=self.repo_id,
189
+ repo_type=self.repo_type,
190
+ revision=self.revision,
191
+ allow_patterns=allow_patterns,
192
+ ignore_patterns=ignore_patterns,
193
+ resume_download=self.resume_download,
194
+ force_download=self.force_download,
195
+ cache_dir=self.cache_dir,
196
+ token=self.token,
197
+ local_dir=self.local_dir,
198
+ library_name="huggingface-cli",
199
+ max_workers=self.max_workers,
200
+ )
huggingface_hub/commands/env.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2022 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains command to print information about the environment.
15
+
16
+ Usage:
17
+ huggingface-cli env
18
+ """
19
+
20
+ from argparse import _SubParsersAction
21
+
22
+ from ..utils import dump_environment_info
23
+ from . import BaseHuggingfaceCLICommand
24
+
25
+
26
+ class EnvironmentCommand(BaseHuggingfaceCLICommand):
27
+ def __init__(self, args):
28
+ self.args = args
29
+
30
+ @staticmethod
31
+ def register_subcommand(parser: _SubParsersAction):
32
+ env_parser = parser.add_parser("env", help="Print information about the environment.")
33
+ env_parser.set_defaults(func=EnvironmentCommand)
34
+
35
+ def run(self) -> None:
36
+ dump_environment_info()
huggingface_hub/commands/huggingface_cli.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from argparse import ArgumentParser
16
+
17
+ from huggingface_hub.commands.delete_cache import DeleteCacheCommand
18
+ from huggingface_hub.commands.download import DownloadCommand
19
+ from huggingface_hub.commands.env import EnvironmentCommand
20
+ from huggingface_hub.commands.lfs import LfsCommands
21
+ from huggingface_hub.commands.repo_files import RepoFilesCommand
22
+ from huggingface_hub.commands.scan_cache import ScanCacheCommand
23
+ from huggingface_hub.commands.tag import TagCommands
24
+ from huggingface_hub.commands.upload import UploadCommand
25
+ from huggingface_hub.commands.upload_large_folder import UploadLargeFolderCommand
26
+ from huggingface_hub.commands.user import UserCommands
27
+ from huggingface_hub.commands.version import VersionCommand
28
+
29
+
30
+ def main():
31
+ parser = ArgumentParser("huggingface-cli", usage="huggingface-cli <command> [<args>]")
32
+ commands_parser = parser.add_subparsers(help="huggingface-cli command helpers")
33
+
34
+ # Register commands
35
+ DownloadCommand.register_subcommand(commands_parser)
36
+ UploadCommand.register_subcommand(commands_parser)
37
+ RepoFilesCommand.register_subcommand(commands_parser)
38
+ EnvironmentCommand.register_subcommand(commands_parser)
39
+ UserCommands.register_subcommand(commands_parser)
40
+ LfsCommands.register_subcommand(commands_parser)
41
+ ScanCacheCommand.register_subcommand(commands_parser)
42
+ DeleteCacheCommand.register_subcommand(commands_parser)
43
+ TagCommands.register_subcommand(commands_parser)
44
+ VersionCommand.register_subcommand(commands_parser)
45
+
46
+ # Experimental
47
+ UploadLargeFolderCommand.register_subcommand(commands_parser)
48
+
49
+ # Let's go
50
+ args = parser.parse_args()
51
+ if not hasattr(args, "func"):
52
+ parser.print_help()
53
+ exit(1)
54
+
55
+ # Run
56
+ service = args.func(args)
57
+ service.run()
58
+
59
+
60
+ if __name__ == "__main__":
61
+ main()