kovacsvi commited on
Commit
7e0dad9
·
1 Parent(s): 8869f68

bigquery model list (write)

Browse files
Files changed (3) hide show
  1. app.py +43 -7
  2. requirements.txt +9 -1
  3. utils.py +31 -3
app.py CHANGED
@@ -15,7 +15,15 @@ from interfaces.emotion9 import demo as e9_demo
15
  from interfaces.cap_media import demo as cap_media_demo
16
  from interfaces.cap_media2 import demo as cap_media2_demo
17
  from interfaces.cap_minor_media import demo as cap_minor_media_demo
18
- from utils import download_hf_models, hf_cleanup, df_h, set_hf_cache_dir, scan_cache, set_torch_threads
 
 
 
 
 
 
 
 
19
 
20
 
21
  css = """
@@ -44,21 +52,49 @@ with gr.Blocks(css=css, theme="origin") as demo:
44
  Please feel free to check back for model updates, or reach out to us at that point if you wish to ask about a specific model.
45
  </p>
46
  </div>
47
- """)
 
48
 
49
  gr.TabbedInterface(
50
- interface_list=[cap_demo, cap_media_demo, cap_media2_demo, cap_minor_demo, cap_minor_media_demo, manifesto_demo, sentiment_demo, emotion_demo, e9_demo,illframes_demo, ner_demo, ontolisst_demo],
51
- tab_names=["CAP Major (22)", "CAP Major + Media (29)", "CAP Major + Media2 (40)", "CAP Minor (214)", "CAP Minor + Media (231)", "Manifesto", "Sentiment (3)", "Emotions (6)","Emotions (9)", "ILLFRAMES", "Named Entity Recognition", "ONTOLISST"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  )
53
 
54
  if __name__ == "__main__":
55
  set_torch_threads()
56
  set_hf_cache_dir("/data")
57
  with hf_cleanup():
58
- df_h() # debug -> check disk space before launching demo - TO-DO: smarter disk space usage
59
  scan_cache()
60
- download_spacy_models()
61
- download_hf_models() # does this affect the build?
 
62
  demo.launch()
63
 
64
  # TODO: add all languages & domains
 
15
  from interfaces.cap_media import demo as cap_media_demo
16
  from interfaces.cap_media2 import demo as cap_media2_demo
17
  from interfaces.cap_minor_media import demo as cap_minor_media_demo
18
+ from utils import (
19
+ download_hf_models,
20
+ hf_cleanup,
21
+ df_h,
22
+ set_hf_cache_dir,
23
+ scan_cache,
24
+ set_torch_threads,
25
+ update_bq_model_table,
26
+ )
27
 
28
 
29
  css = """
 
52
  Please feel free to check back for model updates, or reach out to us at that point if you wish to ask about a specific model.
53
  </p>
54
  </div>
55
+ """
56
+ )
57
 
58
  gr.TabbedInterface(
59
+ interface_list=[
60
+ cap_demo,
61
+ cap_media_demo,
62
+ cap_media2_demo,
63
+ cap_minor_demo,
64
+ cap_minor_media_demo,
65
+ manifesto_demo,
66
+ sentiment_demo,
67
+ emotion_demo,
68
+ e9_demo,
69
+ illframes_demo,
70
+ ner_demo,
71
+ ontolisst_demo,
72
+ ],
73
+ tab_names=[
74
+ "CAP Major (22)",
75
+ "CAP Major + Media (29)",
76
+ "CAP Major + Media2 (40)",
77
+ "CAP Minor (214)",
78
+ "CAP Minor + Media (231)",
79
+ "Manifesto",
80
+ "Sentiment (3)",
81
+ "Emotions (6)",
82
+ "Emotions (9)",
83
+ "ILLFRAMES",
84
+ "Named Entity Recognition",
85
+ "ONTOLISST",
86
+ ],
87
  )
88
 
89
  if __name__ == "__main__":
90
  set_torch_threads()
91
  set_hf_cache_dir("/data")
92
  with hf_cleanup():
93
+ df_h() # debug -> check disk space before launching demo - TO-DO: smarter disk space usage
94
  scan_cache()
95
+ download_spacy_models()
96
+ download_hf_models() # does this affect the build?
97
+ update_bq_model_table()
98
  demo.launch()
99
 
100
  # TODO: add all languages & domains
requirements.txt CHANGED
@@ -1,10 +1,18 @@
 
1
  pandas
 
 
2
  torch==2.2.2
3
  transformers==4.39.1
4
  sentencepiece==0.2.0
5
  accelerate
6
  spacy
7
  huspacy
8
- numpy==1.26.4
9
  safetensors
10
  huggingface_hub
 
 
 
 
 
 
 
1
+ numpy==1.26.4
2
  pandas
3
+
4
+ # models
5
  torch==2.2.2
6
  transformers==4.39.1
7
  sentencepiece==0.2.0
8
  accelerate
9
  spacy
10
  huspacy
 
11
  safetensors
12
  huggingface_hub
13
+
14
+ # BQ stuff
15
+ google-cloud-bigquery
16
+ google-auth
17
+ pyarrow
18
+ pandas-gbq
utils.py CHANGED
@@ -5,6 +5,11 @@ import subprocess
5
  from contextlib import contextmanager
6
 
7
  import torch
 
 
 
 
 
8
 
9
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
 
@@ -18,9 +23,7 @@ from interfaces.illframes import domains as domains_illframes
18
  from interfaces.cap import build_huggingface_path as hf_cap_path
19
  from interfaces.cap_minor import build_huggingface_path as hf_cap_minor_path
20
  from interfaces.cap_minor_media import build_huggingface_path as hf_cap_minor_media_path
21
- from interfaces.cap_media import (
22
- build_huggingface_path as hf_cap_media_path,
23
- ) # why... just follow the name template the next time pls
24
  from interfaces.cap_media2 import build_huggingface_path as hf_cap_media2_path
25
  from interfaces.manifesto import build_huggingface_path as hf_manifesto_path
26
  from interfaces.sentiment import build_huggingface_path as hf_sentiment_path
@@ -216,3 +219,28 @@ def is_disk_full(min_free_space_in_GB=10):
216
  return False
217
  else:
218
  return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  from contextlib import contextmanager
6
 
7
  import torch
8
+ import pandas as pd
9
+
10
+ import json
11
+ from google.oauth2 import service_account
12
+ from pandas_gbq import to_gbq
13
 
14
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
15
 
 
23
  from interfaces.cap import build_huggingface_path as hf_cap_path
24
  from interfaces.cap_minor import build_huggingface_path as hf_cap_minor_path
25
  from interfaces.cap_minor_media import build_huggingface_path as hf_cap_minor_media_path
26
+ from interfaces.cap_media import build_huggingface_path as hf_cap_media_path
 
 
27
  from interfaces.cap_media2 import build_huggingface_path as hf_cap_media2_path
28
  from interfaces.manifesto import build_huggingface_path as hf_manifesto_path
29
  from interfaces.sentiment import build_huggingface_path as hf_sentiment_path
 
219
  return False
220
  else:
221
  return True
222
+
223
+
224
+ def update_bq_model_table():
225
+ try:
226
+ # Load BQ credentials from HF secret
227
+ service_account_info = json.loads(os.environ["GCP_SERVICE_ACCOUNT_JSON"])
228
+ credentials = service_account.Credentials.from_service_account_info(
229
+ service_account_info
230
+ )
231
+
232
+ project_id = os.environ.get("BQ_PROJECT_ID", None)
233
+ dataset_id = os.environ.get("BQ_DATASET_ID", None)
234
+ table_id = os.environ.get("BQ_TABLE_ID", None) # hf_space_models
235
+ full_table_id = f"{dataset_id}.{table_id}"
236
+
237
+ to_gbq(
238
+ pd.DataFrame({"model_id": models}),
239
+ destination_table=full_table_id,
240
+ project_id=project_id,
241
+ if_exists="replace",
242
+ table_schema=table_schema,
243
+ credentials=credentials,
244
+ )
245
+ except Exception as e:
246
+ print(f"BigQuery model table update failed: {e}")