Spaces:
Running
Running
Upload 2 files
Browse files- loop_retrieve_cards.py +52 -0
- utils.py +86 -0
loop_retrieve_cards.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import threading
|
2 |
+
import time
|
3 |
+
|
4 |
+
from utils import fetch_models, fetch_datasets, fetch_readme
|
5 |
+
from tqdm import tqdm
|
6 |
+
|
7 |
+
|
8 |
+
# Define global variables
|
9 |
+
models = None
|
10 |
+
datasets = None
|
11 |
+
readme_dict = {}
|
12 |
+
|
13 |
+
|
14 |
+
# Provide an API to get models
|
15 |
+
def get_models():
|
16 |
+
return models
|
17 |
+
|
18 |
+
|
19 |
+
# Provide an API to get datasets
|
20 |
+
def get_datasets():
|
21 |
+
return datasets
|
22 |
+
|
23 |
+
|
24 |
+
# Provide an API to get READMEs
|
25 |
+
def get_readme_dict():
|
26 |
+
return readme_dict
|
27 |
+
|
28 |
+
|
29 |
+
# Start a thread to continuously update cards
|
30 |
+
def run():
|
31 |
+
global models, datasets, readme_dict, cnt
|
32 |
+
|
33 |
+
while True:
|
34 |
+
new_models = fetch_models()
|
35 |
+
new_datasets = fetch_datasets()
|
36 |
+
|
37 |
+
# Add READMEs
|
38 |
+
new_readme_dict = {}
|
39 |
+
for model in new_models:
|
40 |
+
new_readme_dict[model] = fetch_readme(model, "model")
|
41 |
+
|
42 |
+
for dataset in new_datasets:
|
43 |
+
new_readme_dict[dataset] = fetch_readme(dataset, "dataset")
|
44 |
+
|
45 |
+
# Update global variables
|
46 |
+
models = new_models
|
47 |
+
datasets = new_datasets
|
48 |
+
readme_dict = new_readme_dict
|
49 |
+
|
50 |
+
|
51 |
+
t = threading.Thread(target=run)
|
52 |
+
t.start()
|
utils.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import re
|
3 |
+
|
4 |
+
|
5 |
+
def fetch_models(author: str = "SaProtHub") -> list:
|
6 |
+
"""
|
7 |
+
Retrieve models belonging to a specific author
|
8 |
+
|
9 |
+
Args:
|
10 |
+
author: Author name
|
11 |
+
|
12 |
+
Returns:
|
13 |
+
models: List of models
|
14 |
+
"""
|
15 |
+
|
16 |
+
url = f"https://hf-mirror.com/api/models?author={author}"
|
17 |
+
response = requests.get(url)
|
18 |
+
models_dict = response.json()
|
19 |
+
models = [item["id"] for item in models_dict]
|
20 |
+
|
21 |
+
return models
|
22 |
+
|
23 |
+
|
24 |
+
def fetch_datasets(author: str = "SaProtHub") -> list:
|
25 |
+
"""
|
26 |
+
Retrieve datasets belonging to a specific author
|
27 |
+
|
28 |
+
Args:
|
29 |
+
author: Author name
|
30 |
+
|
31 |
+
Returns:
|
32 |
+
datasets: List of datasets
|
33 |
+
"""
|
34 |
+
|
35 |
+
url = f"https://hf-mirror.com/api/datasets?author={author}"
|
36 |
+
response = requests.get(url)
|
37 |
+
datasets_dict = response.json()
|
38 |
+
datasets = [item["id"] for item in datasets_dict]
|
39 |
+
|
40 |
+
return datasets
|
41 |
+
|
42 |
+
|
43 |
+
def fetch_readme(card_id: str, card_type: str) -> str:
|
44 |
+
"""
|
45 |
+
Retrieve the README file of a model or dataset
|
46 |
+
|
47 |
+
Args:
|
48 |
+
card_id: Model or dataset ID
|
49 |
+
card_type: Type of card, either "model" or "dataset"
|
50 |
+
|
51 |
+
Returns:
|
52 |
+
readme: README text
|
53 |
+
"""
|
54 |
+
if card_type == "model":
|
55 |
+
url = f"https://hf-mirror.com/{card_id}/raw/main/README.md"
|
56 |
+
else:
|
57 |
+
url = f"https://hf-mirror.com/datasets/{card_id}/raw/main/README.md"
|
58 |
+
|
59 |
+
response = requests.get(url)
|
60 |
+
readme = response.text.split("---")[-1]
|
61 |
+
|
62 |
+
return readme
|
63 |
+
|
64 |
+
|
65 |
+
def set_text_bg_color(pattern: str, text: str, color: str = "yellow") -> str:
|
66 |
+
"""
|
67 |
+
Set the background color of a pattern in a text
|
68 |
+
|
69 |
+
Args:
|
70 |
+
pattern: Pattern to highlight
|
71 |
+
text: Text to search
|
72 |
+
color: Background color
|
73 |
+
|
74 |
+
Returns:
|
75 |
+
text: Text with highlighted pattern
|
76 |
+
"""
|
77 |
+
|
78 |
+
# Find all matches, ignoring case
|
79 |
+
matches = set(re.findall(pattern, text, flags=re.IGNORECASE))
|
80 |
+
|
81 |
+
# Highlight all matches
|
82 |
+
for match in matches:
|
83 |
+
text = text.replace(match, f'<span style="background-color:{color}">{match}</span>')
|
84 |
+
|
85 |
+
return text
|
86 |
+
|