Spaces:
Running
Running
kovacsvi
commited on
Commit
·
7e0dad9
1
Parent(s):
8869f68
bigquery model list (write)
Browse files- app.py +43 -7
- requirements.txt +9 -1
- utils.py +31 -3
app.py
CHANGED
@@ -15,7 +15,15 @@ from interfaces.emotion9 import demo as e9_demo
|
|
15 |
from interfaces.cap_media import demo as cap_media_demo
|
16 |
from interfaces.cap_media2 import demo as cap_media2_demo
|
17 |
from interfaces.cap_minor_media import demo as cap_minor_media_demo
|
18 |
-
from utils import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
|
21 |
css = """
|
@@ -44,21 +52,49 @@ with gr.Blocks(css=css, theme="origin") as demo:
|
|
44 |
Please feel free to check back for model updates, or reach out to us at that point if you wish to ask about a specific model.
|
45 |
</p>
|
46 |
</div>
|
47 |
-
"""
|
|
|
48 |
|
49 |
gr.TabbedInterface(
|
50 |
-
interface_list=[
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
)
|
53 |
|
54 |
if __name__ == "__main__":
|
55 |
set_torch_threads()
|
56 |
set_hf_cache_dir("/data")
|
57 |
with hf_cleanup():
|
58 |
-
df_h()
|
59 |
scan_cache()
|
60 |
-
download_spacy_models()
|
61 |
-
download_hf_models()
|
|
|
62 |
demo.launch()
|
63 |
|
64 |
# TODO: add all languages & domains
|
|
|
15 |
from interfaces.cap_media import demo as cap_media_demo
|
16 |
from interfaces.cap_media2 import demo as cap_media2_demo
|
17 |
from interfaces.cap_minor_media import demo as cap_minor_media_demo
|
18 |
+
from utils import (
|
19 |
+
download_hf_models,
|
20 |
+
hf_cleanup,
|
21 |
+
df_h,
|
22 |
+
set_hf_cache_dir,
|
23 |
+
scan_cache,
|
24 |
+
set_torch_threads,
|
25 |
+
update_bq_model_table,
|
26 |
+
)
|
27 |
|
28 |
|
29 |
css = """
|
|
|
52 |
Please feel free to check back for model updates, or reach out to us at that point if you wish to ask about a specific model.
|
53 |
</p>
|
54 |
</div>
|
55 |
+
"""
|
56 |
+
)
|
57 |
|
58 |
gr.TabbedInterface(
|
59 |
+
interface_list=[
|
60 |
+
cap_demo,
|
61 |
+
cap_media_demo,
|
62 |
+
cap_media2_demo,
|
63 |
+
cap_minor_demo,
|
64 |
+
cap_minor_media_demo,
|
65 |
+
manifesto_demo,
|
66 |
+
sentiment_demo,
|
67 |
+
emotion_demo,
|
68 |
+
e9_demo,
|
69 |
+
illframes_demo,
|
70 |
+
ner_demo,
|
71 |
+
ontolisst_demo,
|
72 |
+
],
|
73 |
+
tab_names=[
|
74 |
+
"CAP Major (22)",
|
75 |
+
"CAP Major + Media (29)",
|
76 |
+
"CAP Major + Media2 (40)",
|
77 |
+
"CAP Minor (214)",
|
78 |
+
"CAP Minor + Media (231)",
|
79 |
+
"Manifesto",
|
80 |
+
"Sentiment (3)",
|
81 |
+
"Emotions (6)",
|
82 |
+
"Emotions (9)",
|
83 |
+
"ILLFRAMES",
|
84 |
+
"Named Entity Recognition",
|
85 |
+
"ONTOLISST",
|
86 |
+
],
|
87 |
)
|
88 |
|
89 |
if __name__ == "__main__":
|
90 |
set_torch_threads()
|
91 |
set_hf_cache_dir("/data")
|
92 |
with hf_cleanup():
|
93 |
+
df_h() # debug -> check disk space before launching demo - TO-DO: smarter disk space usage
|
94 |
scan_cache()
|
95 |
+
download_spacy_models()
|
96 |
+
download_hf_models() # does this affect the build?
|
97 |
+
update_bq_model_table()
|
98 |
demo.launch()
|
99 |
|
100 |
# TODO: add all languages & domains
|
requirements.txt
CHANGED
@@ -1,10 +1,18 @@
|
|
|
|
1 |
pandas
|
|
|
|
|
2 |
torch==2.2.2
|
3 |
transformers==4.39.1
|
4 |
sentencepiece==0.2.0
|
5 |
accelerate
|
6 |
spacy
|
7 |
huspacy
|
8 |
-
numpy==1.26.4
|
9 |
safetensors
|
10 |
huggingface_hub
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy==1.26.4
|
2 |
pandas
|
3 |
+
|
4 |
+
# models
|
5 |
torch==2.2.2
|
6 |
transformers==4.39.1
|
7 |
sentencepiece==0.2.0
|
8 |
accelerate
|
9 |
spacy
|
10 |
huspacy
|
|
|
11 |
safetensors
|
12 |
huggingface_hub
|
13 |
+
|
14 |
+
# BQ stuff
|
15 |
+
google-cloud-bigquery
|
16 |
+
google-auth
|
17 |
+
pyarrow
|
18 |
+
pandas-gbq
|
utils.py
CHANGED
@@ -5,6 +5,11 @@ import subprocess
|
|
5 |
from contextlib import contextmanager
|
6 |
|
7 |
import torch
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
10 |
|
@@ -18,9 +23,7 @@ from interfaces.illframes import domains as domains_illframes
|
|
18 |
from interfaces.cap import build_huggingface_path as hf_cap_path
|
19 |
from interfaces.cap_minor import build_huggingface_path as hf_cap_minor_path
|
20 |
from interfaces.cap_minor_media import build_huggingface_path as hf_cap_minor_media_path
|
21 |
-
from interfaces.cap_media import
|
22 |
-
build_huggingface_path as hf_cap_media_path,
|
23 |
-
) # why... just follow the name template the next time pls
|
24 |
from interfaces.cap_media2 import build_huggingface_path as hf_cap_media2_path
|
25 |
from interfaces.manifesto import build_huggingface_path as hf_manifesto_path
|
26 |
from interfaces.sentiment import build_huggingface_path as hf_sentiment_path
|
@@ -216,3 +219,28 @@ def is_disk_full(min_free_space_in_GB=10):
|
|
216 |
return False
|
217 |
else:
|
218 |
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
from contextlib import contextmanager
|
6 |
|
7 |
import torch
|
8 |
+
import pandas as pd
|
9 |
+
|
10 |
+
import json
|
11 |
+
from google.oauth2 import service_account
|
12 |
+
from pandas_gbq import to_gbq
|
13 |
|
14 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
15 |
|
|
|
23 |
from interfaces.cap import build_huggingface_path as hf_cap_path
|
24 |
from interfaces.cap_minor import build_huggingface_path as hf_cap_minor_path
|
25 |
from interfaces.cap_minor_media import build_huggingface_path as hf_cap_minor_media_path
|
26 |
+
from interfaces.cap_media import build_huggingface_path as hf_cap_media_path
|
|
|
|
|
27 |
from interfaces.cap_media2 import build_huggingface_path as hf_cap_media2_path
|
28 |
from interfaces.manifesto import build_huggingface_path as hf_manifesto_path
|
29 |
from interfaces.sentiment import build_huggingface_path as hf_sentiment_path
|
|
|
219 |
return False
|
220 |
else:
|
221 |
return True
|
222 |
+
|
223 |
+
|
224 |
+
def update_bq_model_table():
|
225 |
+
try:
|
226 |
+
# Load BQ credentials from HF secret
|
227 |
+
service_account_info = json.loads(os.environ["GCP_SERVICE_ACCOUNT_JSON"])
|
228 |
+
credentials = service_account.Credentials.from_service_account_info(
|
229 |
+
service_account_info
|
230 |
+
)
|
231 |
+
|
232 |
+
project_id = os.environ.get("BQ_PROJECT_ID", None)
|
233 |
+
dataset_id = os.environ.get("BQ_DATASET_ID", None)
|
234 |
+
table_id = os.environ.get("BQ_TABLE_ID", None) # hf_space_models
|
235 |
+
full_table_id = f"{dataset_id}.{table_id}"
|
236 |
+
|
237 |
+
to_gbq(
|
238 |
+
pd.DataFrame({"model_id": models}),
|
239 |
+
destination_table=full_table_id,
|
240 |
+
project_id=project_id,
|
241 |
+
if_exists="replace",
|
242 |
+
table_schema=table_schema,
|
243 |
+
credentials=credentials,
|
244 |
+
)
|
245 |
+
except Exception as e:
|
246 |
+
print(f"BigQuery model table update failed: {e}")
|