LTEnjoy commited on
Commit
4d3a37a
1 Parent(s): 8355ae1

Upload 2 files

Browse files
Files changed (2) hide show
  1. loop_retrieve_cards.py +52 -0
  2. utils.py +86 -0
loop_retrieve_cards.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import threading
2
+ import time
3
+
4
+ from utils import fetch_models, fetch_datasets, fetch_readme
5
+ from tqdm import tqdm
6
+
7
+
8
+ # Define global variables
9
+ models = None
10
+ datasets = None
11
+ readme_dict = {}
12
+
13
+
14
+ # Provide an API to get models
15
+ def get_models():
16
+ return models
17
+
18
+
19
+ # Provide an API to get datasets
20
+ def get_datasets():
21
+ return datasets
22
+
23
+
24
+ # Provide an API to get READMEs
25
+ def get_readme_dict():
26
+ return readme_dict
27
+
28
+
29
+ # Start a thread to continuously update cards
30
+ def run():
31
+ global models, datasets, readme_dict, cnt
32
+
33
+ while True:
34
+ new_models = fetch_models()
35
+ new_datasets = fetch_datasets()
36
+
37
+ # Add READMEs
38
+ new_readme_dict = {}
39
+ for model in new_models:
40
+ new_readme_dict[model] = fetch_readme(model, "model")
41
+
42
+ for dataset in new_datasets:
43
+ new_readme_dict[dataset] = fetch_readme(dataset, "dataset")
44
+
45
+ # Update global variables
46
+ models = new_models
47
+ datasets = new_datasets
48
+ readme_dict = new_readme_dict
49
+
50
+
51
+ t = threading.Thread(target=run)
52
+ t.start()
utils.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import re
3
+
4
+
5
+ def fetch_models(author: str = "SaProtHub") -> list:
6
+ """
7
+ Retrieve models belonging to a specific author
8
+
9
+ Args:
10
+ author: Author name
11
+
12
+ Returns:
13
+ models: List of models
14
+ """
15
+
16
+ url = f"https://hf-mirror.com/api/models?author={author}"
17
+ response = requests.get(url)
18
+ models_dict = response.json()
19
+ models = [item["id"] for item in models_dict]
20
+
21
+ return models
22
+
23
+
24
+ def fetch_datasets(author: str = "SaProtHub") -> list:
25
+ """
26
+ Retrieve datasets belonging to a specific author
27
+
28
+ Args:
29
+ author: Author name
30
+
31
+ Returns:
32
+ datasets: List of datasets
33
+ """
34
+
35
+ url = f"https://hf-mirror.com/api/datasets?author={author}"
36
+ response = requests.get(url)
37
+ datasets_dict = response.json()
38
+ datasets = [item["id"] for item in datasets_dict]
39
+
40
+ return datasets
41
+
42
+
43
+ def fetch_readme(card_id: str, card_type: str) -> str:
44
+ """
45
+ Retrieve the README file of a model or dataset
46
+
47
+ Args:
48
+ card_id: Model or dataset ID
49
+ card_type: Type of card, either "model" or "dataset"
50
+
51
+ Returns:
52
+ readme: README text
53
+ """
54
+ if card_type == "model":
55
+ url = f"https://hf-mirror.com/{card_id}/raw/main/README.md"
56
+ else:
57
+ url = f"https://hf-mirror.com/datasets/{card_id}/raw/main/README.md"
58
+
59
+ response = requests.get(url)
60
+ readme = response.text.split("---")[-1]
61
+
62
+ return readme
63
+
64
+
65
+ def set_text_bg_color(pattern: str, text: str, color: str = "yellow") -> str:
66
+ """
67
+ Set the background color of a pattern in a text
68
+
69
+ Args:
70
+ pattern: Pattern to highlight
71
+ text: Text to search
72
+ color: Background color
73
+
74
+ Returns:
75
+ text: Text with highlighted pattern
76
+ """
77
+
78
+ # Find all matches, ignoring case
79
+ matches = set(re.findall(pattern, text, flags=re.IGNORECASE))
80
+
81
+ # Highlight all matches
82
+ for match in matches:
83
+ text = text.replace(match, f'<span style="background-color:{color}">{match}</span>')
84
+
85
+ return text
86
+