Spaces:
Sleeping
Sleeping
Upload navigium.py
Browse files- navigium.py +28 -18
navigium.py
CHANGED
@@ -61,7 +61,6 @@ def request2navigium(words=["currere"]):
|
|
61 |
continue
|
62 |
output.append(formated_answer)
|
63 |
dice("Erfolgreich: Alle Informationen konnten erfolgreich extrahiert werden", word, "good")
|
64 |
-
time.sleep(0.1)
|
65 |
return output
|
66 |
|
67 |
def threaded_function(words, function=request2navigium, anzahl_threads=20, wordtype=None):
|
@@ -115,26 +114,23 @@ def sort_by_wordtype(input):
|
|
115 |
"Subjunktionen": [],
|
116 |
"Unbekannt": []
|
117 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
|
119 |
for word in input:
|
120 |
-
if word["wortart"]
|
121 |
-
sorted["
|
122 |
-
elif word["wortart"].lower() == "VERB".lower():
|
123 |
-
sorted["Verben"].append(word)
|
124 |
-
elif word["wortart"].lower() == "ADJ".lower():
|
125 |
-
sorted["Adjektive"].append(word)
|
126 |
-
elif word["wortart"].lower() == "ADV".lower():
|
127 |
-
sorted["Adverbien"].append(word)
|
128 |
-
elif word["wortart"].lower() == "PRON".lower():
|
129 |
-
sorted["Pronomen"].append(word)
|
130 |
-
elif word["wortart"].lower() == "KONJ".lower():
|
131 |
-
sorted["Konjunktionen"].append(word)
|
132 |
-
elif word["wortart"].lower() == "PRAEP".lower():
|
133 |
-
sorted["Präpositionen"].append(word)
|
134 |
-
elif word["wortart"].lower() == "SUBJ".lower():
|
135 |
-
sorted["Subjunktionen"].append(word)
|
136 |
else:
|
137 |
sorted["Unbekannt"].append(word)
|
|
|
138 |
return sorted
|
139 |
|
140 |
def split_into_words(input, delete_special_characters=True):
|
@@ -206,6 +202,12 @@ def identify_adjectives(word, NomGen, arg, failures, plural):
|
|
206 |
failures -= 1
|
207 |
return NomGen, failures, plural
|
208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
def advanced_formating(input):
|
210 |
vocabulary = {
|
211 |
"Nomen": [],#
|
@@ -267,6 +269,8 @@ def advanced_formating(input):
|
|
267 |
word_properties["Dekl.-Kl."] = "unbekannt"
|
268 |
word_properties["Bedeutung"] = word["bedeutungen"]
|
269 |
|
|
|
|
|
270 |
vocabulary["Nomen"].append(word_properties)
|
271 |
|
272 |
for word in input["Verben"]:
|
@@ -315,6 +319,8 @@ def advanced_formating(input):
|
|
315 |
|
316 |
word_properties["Bedeutung"] = word["bedeutungen"]
|
317 |
|
|
|
|
|
318 |
vocabulary["Verben"].append(word_properties)
|
319 |
|
320 |
for word in input["Adjektive"]:
|
@@ -375,6 +381,8 @@ def advanced_formating(input):
|
|
375 |
|
376 |
vocabulary["Adjektive"].append(word_properties)
|
377 |
|
|
|
|
|
378 |
return vocabulary
|
379 |
|
380 |
def save2json(file, data):
|
@@ -501,7 +509,9 @@ def save2by2(excel_file, by2_filepath):
|
|
501 |
result_list = []
|
502 |
for index, row in df.iterrows():
|
503 |
# Greife auf die zweite und dritte Spalte der Zeile zu, unabhängig von den Namen
|
504 |
-
if
|
|
|
|
|
505 |
combined_str = f"{row.iloc[0]}, {row.iloc[1]}; {row.iloc[2]}; {row.iloc[3]}:\n{row.iloc[-1]}"
|
506 |
elif sheet == "Verben":
|
507 |
combined_str = f"{row.iloc[0]}, {row.iloc[1]}, {row.iloc[2]}, {row.iloc[3]}; {row.iloc[4]}:\n{row.iloc[-1]}"
|
|
|
61 |
continue
|
62 |
output.append(formated_answer)
|
63 |
dice("Erfolgreich: Alle Informationen konnten erfolgreich extrahiert werden", word, "good")
|
|
|
64 |
return output
|
65 |
|
66 |
def threaded_function(words, function=request2navigium, anzahl_threads=20, wordtype=None):
|
|
|
114 |
"Subjunktionen": [],
|
115 |
"Unbekannt": []
|
116 |
}
|
117 |
+
wortarten_kürzel = {
|
118 |
+
"SUBST": "Nomen",
|
119 |
+
"VERB": "Verben",
|
120 |
+
"ADJ": "Adjektive",
|
121 |
+
"ADV": "Adverbien",
|
122 |
+
"PRON": "Pronomen",
|
123 |
+
"KONJ": "Konjunktionen",
|
124 |
+
"PREP": "Präpositionen",
|
125 |
+
"SUBJ": "Subjunktionen"
|
126 |
+
}
|
127 |
|
128 |
for word in input:
|
129 |
+
if word["wortart"] in wortarten_kürzel.keys():
|
130 |
+
sorted[wortarten_kürzel[word["wortart"]]].append(word)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
else:
|
132 |
sorted["Unbekannt"].append(word)
|
133 |
+
|
134 |
return sorted
|
135 |
|
136 |
def split_into_words(input, delete_special_characters=True):
|
|
|
202 |
failures -= 1
|
203 |
return NomGen, failures, plural
|
204 |
|
205 |
+
def cleanup_properties(properties):
|
206 |
+
for key, property in properties.items():
|
207 |
+
if property.replace(" ", "") == "":
|
208 |
+
properties[key] = "-"
|
209 |
+
return properties
|
210 |
+
|
211 |
def advanced_formating(input):
|
212 |
vocabulary = {
|
213 |
"Nomen": [],#
|
|
|
269 |
word_properties["Dekl.-Kl."] = "unbekannt"
|
270 |
word_properties["Bedeutung"] = word["bedeutungen"]
|
271 |
|
272 |
+
word_properties = cleanup_properties(word_properties)
|
273 |
+
|
274 |
vocabulary["Nomen"].append(word_properties)
|
275 |
|
276 |
for word in input["Verben"]:
|
|
|
319 |
|
320 |
word_properties["Bedeutung"] = word["bedeutungen"]
|
321 |
|
322 |
+
word_properties = cleanup_properties(word_properties)
|
323 |
+
|
324 |
vocabulary["Verben"].append(word_properties)
|
325 |
|
326 |
for word in input["Adjektive"]:
|
|
|
381 |
|
382 |
vocabulary["Adjektive"].append(word_properties)
|
383 |
|
384 |
+
word_properties = cleanup_properties(word_properties)
|
385 |
+
|
386 |
return vocabulary
|
387 |
|
388 |
def save2json(file, data):
|
|
|
509 |
result_list = []
|
510 |
for index, row in df.iterrows():
|
511 |
# Greife auf die zweite und dritte Spalte der Zeile zu, unabhängig von den Namen
|
512 |
+
if row.iloc[0] == "Diese Liste basiert auf Daten von \"https://www.navigium.de/suchfunktion/_search?q={}\". © Rechteinhaber: Navigium.de":
|
513 |
+
continue
|
514 |
+
elif sheet == "Nomen":
|
515 |
combined_str = f"{row.iloc[0]}, {row.iloc[1]}; {row.iloc[2]}; {row.iloc[3]}:\n{row.iloc[-1]}"
|
516 |
elif sheet == "Verben":
|
517 |
combined_str = f"{row.iloc[0]}, {row.iloc[1]}, {row.iloc[2]}, {row.iloc[3]}; {row.iloc[4]}:\n{row.iloc[-1]}"
|