Spaces:
Running
Running
kapllan
commited on
Commit
•
1cdf555
1
Parent(s):
8a95fb3
First commit for migrating the swiss topic modelling space.
Browse files- README.md +3 -3
- app.py +100 -0
- id2label.json +227 -0
- install_packages.py +57 -0
- requirements.txt +21 -0
README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
---
|
2 |
title: SwissParlTopicModelling
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
|
|
1 |
---
|
2 |
title: SwissParlTopicModelling
|
3 |
+
emoji: 📉
|
4 |
+
colorFrom: indigo
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.32.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
app.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json as js
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
from typing import List
|
5 |
+
|
6 |
+
import fasttext
|
7 |
+
import gradio as gr
|
8 |
+
import joblib
|
9 |
+
import omikuji
|
10 |
+
from huggingface_hub import snapshot_download
|
11 |
+
from install_packages import download_model
|
12 |
+
|
13 |
+
download_model('https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin', 'lid.176.bin')
|
14 |
+
|
15 |
+
# Download the model files from Hugging Face
|
16 |
+
for repo_id in ['kapllan/omikuji-bonsai-parliament-de-spacy', 'kapllan/omikuji-bonsai-parliament-fr-spacy',
|
17 |
+
'kapllan/omikuji-bonsai-parliament-it-spacy']:
|
18 |
+
if not os.path.exists(repo_id):
|
19 |
+
os.makedirs(repo_id)
|
20 |
+
model_dir = snapshot_download(repo_id=repo_id, local_dir=repo_id)
|
21 |
+
|
22 |
+
lang_model = fasttext.load_model('lid.176.bin')
|
23 |
+
|
24 |
+
with open('./id2label.json', 'r') as f:
|
25 |
+
id2label = js.load(f)
|
26 |
+
|
27 |
+
|
28 |
+
def map_language(language: str) -> str:
|
29 |
+
language_mapping = {'de': 'German',
|
30 |
+
'it': 'Italian',
|
31 |
+
'fr': 'French'}
|
32 |
+
if language in language_mapping.keys():
|
33 |
+
return language_mapping[language]
|
34 |
+
else:
|
35 |
+
return language
|
36 |
+
|
37 |
+
|
38 |
+
def find_model(language: str):
|
39 |
+
vectorizer, model = None, None
|
40 |
+
if language in ['de', 'fr', 'it']:
|
41 |
+
path_to_vectorizer = f'./kapllan/omikuji-bonsai-parliament-{language}-spacy/vectorizer'
|
42 |
+
path_to_model = f'./kapllan/omikuji-bonsai-parliament-{language}-spacy/omikuji-model'
|
43 |
+
vectorizer = joblib.load(path_to_vectorizer)
|
44 |
+
model = omikuji.Model.load(path_to_model)
|
45 |
+
return vectorizer, model
|
46 |
+
|
47 |
+
|
48 |
+
def predict_lang(text: str) -> str:
|
49 |
+
text = re.sub(r'\n', '', text) # Remove linebreaks because fasttext cannot process that otherwise
|
50 |
+
predictions = lang_model.predict(text, k=1) # returns top 2 matching languages
|
51 |
+
language = predictions[0][0] # returns top 2 matching languages
|
52 |
+
language = re.sub(r'__label__', '', language) # returns top 2 matching languages
|
53 |
+
return language
|
54 |
+
|
55 |
+
|
56 |
+
def predict_topic(text: str) -> [List[str], str]:
|
57 |
+
results = []
|
58 |
+
language = predict_lang(text)
|
59 |
+
vectorizer, model = find_model(language)
|
60 |
+
language = map_language(language)
|
61 |
+
if vectorizer is not None:
|
62 |
+
texts = [text]
|
63 |
+
vector = vectorizer.transform(texts)
|
64 |
+
for row in vector:
|
65 |
+
if row.nnz == 0: # All zero vector, empty result
|
66 |
+
continue
|
67 |
+
feature_values = [(col, row[0, col]) for col in row.nonzero()[1]]
|
68 |
+
for subj_id, score in model.predict(feature_values, top_k=1000):
|
69 |
+
results.append((id2label[str(subj_id)], score))
|
70 |
+
return results, language
|
71 |
+
|
72 |
+
|
73 |
+
def topic_modeling(text: str, threshold: float) -> [List[str], str]:
|
74 |
+
# Prepare labels and scores for the plot
|
75 |
+
sorted_topics, language = predict_topic(text)
|
76 |
+
if len(sorted_topics) > 0 and language in ['German', 'French', 'Italian']:
|
77 |
+
sorted_topics = [t for t in sorted_topics if t[1] >= threshold]
|
78 |
+
else:
|
79 |
+
sorted_topics = []
|
80 |
+
return sorted_topics, language
|
81 |
+
|
82 |
+
|
83 |
+
with gr.Blocks() as iface:
|
84 |
+
gr.Markdown("# Topic Modeling")
|
85 |
+
gr.Markdown("Enter a document and get each topic along with its score.")
|
86 |
+
|
87 |
+
with gr.Row():
|
88 |
+
with gr.Column():
|
89 |
+
input_text = gr.Textbox(lines=10, placeholder="Enter a document")
|
90 |
+
submit_button = gr.Button("Submit")
|
91 |
+
threshold_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Score Threshold", value=0.0)
|
92 |
+
language_text = gr.Textbox(lines=1, placeholder="Detected language will be shown here...",
|
93 |
+
interactive=False, label="Detected Language")
|
94 |
+
with gr.Column():
|
95 |
+
output_data = gr.Dataframe(headers=["Label", "Score"])
|
96 |
+
|
97 |
+
submit_button.click(topic_modeling, inputs=[input_text, threshold_slider], outputs=[output_data, language_text])
|
98 |
+
|
99 |
+
# Launch the app
|
100 |
+
iface.launch(share=True)
|
id2label.json
ADDED
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"0": "AHV",
|
3 |
+
"1": "Abfall",
|
4 |
+
"2": "Abgabe",
|
5 |
+
"3": "Agrarpolitik",
|
6 |
+
"4": "Agrarproduktion",
|
7 |
+
"5": "Altersfragen",
|
8 |
+
"6": "Arbeit",
|
9 |
+
"7": "Arbeitslosenversicherung",
|
10 |
+
"8": "Arbeitslosigkeit",
|
11 |
+
"9": "Arbeitsmarkt",
|
12 |
+
"10": "Arbeitsrecht",
|
13 |
+
"11": "Armee",
|
14 |
+
"12": "Armut/Ungleichheit",
|
15 |
+
"13": "Artenvielfalt",
|
16 |
+
"14": "Asylpolitik",
|
17 |
+
"15": "Auslandschweizer",
|
18 |
+
"16": "Ausländerpolitik",
|
19 |
+
"17": "Aussenpolitik : Ausland",
|
20 |
+
"18": "Aussenpolitik : Schweiz",
|
21 |
+
"19": "Aussenwirtschaftspolitik",
|
22 |
+
"20": "Bankenkrise",
|
23 |
+
"21": "Bauwesen/Immobilien",
|
24 |
+
"22": "Behinderung",
|
25 |
+
"23": "Berg",
|
26 |
+
"24": "Berufliche Vorsorge",
|
27 |
+
"25": "Berufsbildung",
|
28 |
+
"26": "Beschwerderecht",
|
29 |
+
"27": "Beschäftigung und Arbeit",
|
30 |
+
"28": "Bewaffnung",
|
31 |
+
"29": "Beziehung Schweiz - EU",
|
32 |
+
"30": "Bildung",
|
33 |
+
"31": "Boden",
|
34 |
+
"32": "Bürgerrecht",
|
35 |
+
"33": "Datenschutz",
|
36 |
+
"34": "Demokratie",
|
37 |
+
"35": "Digitalisierung",
|
38 |
+
"36": "Diplomatie",
|
39 |
+
"37": "Diskriminierung",
|
40 |
+
"38": "Elektrizität",
|
41 |
+
"39": "Energie",
|
42 |
+
"40": "Energiepolitik",
|
43 |
+
"41": "Erberecht",
|
44 |
+
"42": "Ernährung",
|
45 |
+
"43": "Erwerbsersatzordnung",
|
46 |
+
"44": "Europapolitik",
|
47 |
+
"45": "Europarat",
|
48 |
+
"46": "Europarecht",
|
49 |
+
"47": "Europäische Union",
|
50 |
+
"48": "Europäisches Parlament",
|
51 |
+
"49": "Exekutive",
|
52 |
+
"50": "Familienfragen",
|
53 |
+
"51": "Familienrecht",
|
54 |
+
"52": "Familienzulage",
|
55 |
+
"53": "Finanzausgleich",
|
56 |
+
"54": "Finanzmarkt",
|
57 |
+
"55": "Finanzplatz",
|
58 |
+
"56": "Finanzrecht",
|
59 |
+
"57": "Finanzwesen",
|
60 |
+
"58": "Flüchtling",
|
61 |
+
"59": "Forschung",
|
62 |
+
"60": "Fortpflanzung",
|
63 |
+
"61": "Fossile Energie",
|
64 |
+
"62": "Föderalismus",
|
65 |
+
"63": "Geld- und Währungspolitik",
|
66 |
+
"64": "Geldwäscherei",
|
67 |
+
"65": "Gentechnologie",
|
68 |
+
"66": "Gerichtswesen",
|
69 |
+
"67": "Geschichte Ausland",
|
70 |
+
"68": "Geschichte Schweiz",
|
71 |
+
"69": "Geschlechterfragen",
|
72 |
+
"70": "Gesellschaftsfragen",
|
73 |
+
"71": "Gesundheit",
|
74 |
+
"72": "Gesundheitspolitik",
|
75 |
+
"73": "Gewalt",
|
76 |
+
"74": "Gewerkschaft",
|
77 |
+
"75": "Globalisierung",
|
78 |
+
"76": "Grenze",
|
79 |
+
"77": "Grundrechte",
|
80 |
+
"78": "Güterverkehr",
|
81 |
+
"79": "Handel",
|
82 |
+
"80": "Heil- und Hilfsmittel",
|
83 |
+
"81": "Informatik",
|
84 |
+
"82": "Information",
|
85 |
+
"83": "Informationswissenschaft",
|
86 |
+
"84": "Internationale Politik",
|
87 |
+
"85": "Internationales Recht",
|
88 |
+
"86": "Internet und soziale Medien",
|
89 |
+
"87": "Interventionspolitik",
|
90 |
+
"88": "Invalidenversicherung",
|
91 |
+
"89": "Jagd und Fischerei",
|
92 |
+
"90": "Kapital",
|
93 |
+
"91": "Katastrophe",
|
94 |
+
"92": "Kernenergie",
|
95 |
+
"93": "Kinder- und Jugendfragen",
|
96 |
+
"94": "Kinderrechte",
|
97 |
+
"95": "Kindes- und Erwachsenenschutzrecht",
|
98 |
+
"96": "Klimafragen",
|
99 |
+
"97": "Konkursrecht",
|
100 |
+
"98": "Konsum",
|
101 |
+
"99": "Korruption",
|
102 |
+
"100": "Krankenversicherung",
|
103 |
+
"101": "Krieg",
|
104 |
+
"102": "Krise",
|
105 |
+
"103": "Kultur",
|
106 |
+
"104": "Landwirtschaft",
|
107 |
+
"105": "Luft",
|
108 |
+
"106": "Luftfahrt",
|
109 |
+
"107": "Lärm",
|
110 |
+
"108": "Medien",
|
111 |
+
"109": "Medien / Kommunikation",
|
112 |
+
"110": "Medienrecht",
|
113 |
+
"111": "Medizinalberuf",
|
114 |
+
"112": "Menschenrechte",
|
115 |
+
"113": "Miet- und Wohnungswesen",
|
116 |
+
"114": "Migration",
|
117 |
+
"115": "Migrationsbewegung",
|
118 |
+
"116": "Mutterschaftsversicherung",
|
119 |
+
"117": "Nationalbank",
|
120 |
+
"118": "Obligationenrecht",
|
121 |
+
"119": "Parlament",
|
122 |
+
"120": "Parlament Ausland",
|
123 |
+
"121": "Parlament Schweiz",
|
124 |
+
"122": "Patient",
|
125 |
+
"123": "Personenrecht",
|
126 |
+
"124": "Pflege",
|
127 |
+
"125": "Post",
|
128 |
+
"126": "Presse",
|
129 |
+
"127": "Privatversicherung",
|
130 |
+
"128": "Produktion",
|
131 |
+
"129": "Radio und Fernsehen",
|
132 |
+
"130": "Rassismus",
|
133 |
+
"131": "Ratsmitglied",
|
134 |
+
"132": "Raumplanung",
|
135 |
+
"133": "Raumplanung und Wohnungswesen",
|
136 |
+
"134": "Recht Allgemein",
|
137 |
+
"135": "Rechte und Freiheiten",
|
138 |
+
"136": "Rechtswissenschaft",
|
139 |
+
"137": "Religionsfragen",
|
140 |
+
"138": "Sachenrecht",
|
141 |
+
"139": "Sans-Papiers",
|
142 |
+
"140": "Schiedsgerichtsbarkeit",
|
143 |
+
"141": "Schienenverkehr",
|
144 |
+
"142": "Schifffahrt",
|
145 |
+
"143": "Schule",
|
146 |
+
"144": "Service public",
|
147 |
+
"145": "Sicherheitspolitik",
|
148 |
+
"146": "Sicherheitspolitik/Friedenspolitik",
|
149 |
+
"147": "Soziale Fragen",
|
150 |
+
"148": "Sozialer Schutz",
|
151 |
+
"149": "Sozialhilfe",
|
152 |
+
"150": "Sozialpolitik",
|
153 |
+
"151": "Sozialversicherung",
|
154 |
+
"152": "Spiel",
|
155 |
+
"153": "Spital",
|
156 |
+
"154": "Sport",
|
157 |
+
"155": "Sprache",
|
158 |
+
"156": "Staat",
|
159 |
+
"157": "Staatspolitik",
|
160 |
+
"158": "Staatssouveränität",
|
161 |
+
"159": "Sterben und Tod",
|
162 |
+
"160": "Steuer",
|
163 |
+
"161": "Steuerhinterziehung",
|
164 |
+
"162": "Steuerrecht",
|
165 |
+
"163": "Steuerwettbewerb",
|
166 |
+
"164": "Stiftung",
|
167 |
+
"165": "Strafprozessordnung",
|
168 |
+
"166": "Strafrecht",
|
169 |
+
"167": "Straftat",
|
170 |
+
"168": "Strassenverkehr",
|
171 |
+
"169": "Sucht",
|
172 |
+
"170": "Telefonie",
|
173 |
+
"171": "Terrorismus",
|
174 |
+
"172": "Tierschutz",
|
175 |
+
"173": "Tierversuch",
|
176 |
+
"174": "Tourismus",
|
177 |
+
"175": "Umwelt",
|
178 |
+
"176": "Umweltpolitik",
|
179 |
+
"177": "Umweltschutz",
|
180 |
+
"178": "Unfallversicherung",
|
181 |
+
"179": "Universität/Hochschule/Fachhochschule",
|
182 |
+
"180": "Unternehmen",
|
183 |
+
"181": "Urheberrecht",
|
184 |
+
"182": "Verfahrensrecht",
|
185 |
+
"183": "Verfassung",
|
186 |
+
"184": "Vergaberecht",
|
187 |
+
"185": "Verkehr",
|
188 |
+
"186": "Verkehrspolitik",
|
189 |
+
"187": "Vertrag",
|
190 |
+
"188": "Verwaltungsrecht",
|
191 |
+
"189": "Volksabstimmung",
|
192 |
+
"190": "Vorrechte und Immunität",
|
193 |
+
"191": "Wahlen",
|
194 |
+
"192": "Wald",
|
195 |
+
"193": "Wasser",
|
196 |
+
"194": "Weiterbildung",
|
197 |
+
"195": "Wettbewerb",
|
198 |
+
"196": "Wirtschaft",
|
199 |
+
"197": "Wirtschaftsleben",
|
200 |
+
"198": "Wirtschaftspolitik",
|
201 |
+
"199": "Wissenschaft / Forschung",
|
202 |
+
"200": "Zivilprozessordnung",
|
203 |
+
"201": "Zivilrecht",
|
204 |
+
"202": "Zivilschutz und Bevölkerungsschutz/Zivildienst",
|
205 |
+
"203": "Zoll",
|
206 |
+
"204": "erneuerbare Energie",
|
207 |
+
"205": "innere Sicherheit",
|
208 |
+
"206": "internationale Beziehungen",
|
209 |
+
"207": "internationale Organisation",
|
210 |
+
"208": "internationale Politik",
|
211 |
+
"209": "internationale Rechtshilfe",
|
212 |
+
"210": "internationale Strafjustiz",
|
213 |
+
"211": "internationale Zusammenarbeit",
|
214 |
+
"212": "internationaler Konflikt",
|
215 |
+
"213": "internationales Abkommen",
|
216 |
+
"214": "internationales Privatrecht",
|
217 |
+
"215": "internationales Recht",
|
218 |
+
"216": "internationales humanitäres Recht",
|
219 |
+
"217": "kantonales Parlament",
|
220 |
+
"218": "politische Partei",
|
221 |
+
"219": "politische Rechte",
|
222 |
+
"220": "politisches Leben",
|
223 |
+
"221": "politisches System",
|
224 |
+
"222": "öffentliche Finanzen",
|
225 |
+
"223": "öffentliche Verwaltung",
|
226 |
+
"224": "öffentlicher Verkehr"
|
227 |
+
}
|
install_packages.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import subprocess
|
3 |
+
import sys
|
4 |
+
|
5 |
+
import requests
|
6 |
+
|
7 |
+
|
8 |
+
def download_model(url, save_path):
|
9 |
+
# Send a GET request to the URL
|
10 |
+
response = requests.get(url, stream=True)
|
11 |
+
|
12 |
+
# Check if the request was successful (status code 200)
|
13 |
+
if response.status_code == 200:
|
14 |
+
# Open a file in binary write mode to save the downloaded content
|
15 |
+
with open(save_path, 'wb') as f:
|
16 |
+
# Iterate over the response content in chunks and write to the file
|
17 |
+
for chunk in response.iter_content(chunk_size=1024):
|
18 |
+
f.write(chunk)
|
19 |
+
print("Model downloaded successfully!")
|
20 |
+
else:
|
21 |
+
# Print an error message if the request was not successful
|
22 |
+
print(f"Failed to download model. Status code: {response.status_code}")
|
23 |
+
|
24 |
+
|
25 |
+
def set_tokenizers_parallelism(value):
|
26 |
+
"""Set the TOKENIZERS_PARALLELISM environment variable."""
|
27 |
+
os.environ['TOKENIZERS_PARALLELISM'] = 'true' if value else 'false'
|
28 |
+
print(f"TOKENIZERS_PARALLELISM set to {os.environ['TOKENIZERS_PARALLELISM']}")
|
29 |
+
|
30 |
+
|
31 |
+
def install_requirements():
|
32 |
+
"""Install packages listed in requirements.txt"""
|
33 |
+
try:
|
34 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
|
35 |
+
print("All packages from requirements.txt installed successfully.")
|
36 |
+
except subprocess.CalledProcessError as e:
|
37 |
+
print(f"Failed to install packages from requirements.txt: {e}")
|
38 |
+
sys.exit(1)
|
39 |
+
|
40 |
+
|
41 |
+
def install_spacy_model(model_name):
|
42 |
+
"""Install a specific spaCy model"""
|
43 |
+
try:
|
44 |
+
subprocess.check_call([sys.executable, "-m", "spacy", "download", model_name])
|
45 |
+
print(f"spaCy model '{model_name}' installed successfully.")
|
46 |
+
except subprocess.CalledProcessError as e:
|
47 |
+
print(f"Failed to install spaCy model '{model_name}': {e}")
|
48 |
+
sys.exit(1)
|
49 |
+
|
50 |
+
|
51 |
+
if __name__ == "__main__":
|
52 |
+
install_requirements()
|
53 |
+
install_spacy_model("de_core_news_lg")
|
54 |
+
install_spacy_model("fr_core_news_lg")
|
55 |
+
install_spacy_model("it_core_news_lg")
|
56 |
+
set_tokenizers_parallelism(True)
|
57 |
+
download_model('https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin', 'lid.176.bin')
|
requirements.txt
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Requests==2.32.2
|
2 |
+
annif
|
3 |
+
beautifulsoup4==4.12.3
|
4 |
+
datasets==2.14.5
|
5 |
+
fasttext==0.9.2
|
6 |
+
gradio
|
7 |
+
iterative_stratification==0.1.7
|
8 |
+
nltk==3.8.1
|
9 |
+
numpy==1.24.4
|
10 |
+
omikuji==0.5.1
|
11 |
+
openpyxl
|
12 |
+
pandas==2.2.2
|
13 |
+
pytz==2023.3.post1
|
14 |
+
scikit_learn==1.3.2
|
15 |
+
sentence_transformers==2.2.2
|
16 |
+
swissparlpy==0.3.0
|
17 |
+
tqdm==4.66.1
|
18 |
+
transformers==4.39.3
|
19 |
+
spacy==3.7.4
|
20 |
+
huggingface_hub
|
21 |
+
requests
|