Rostbraten commited on
Commit
3d454c5
·
verified ·
1 Parent(s): 8445950

Upload navigium.py

Browse files
Files changed (1) hide show
  1. navigium.py +28 -18
navigium.py CHANGED
@@ -61,7 +61,6 @@ def request2navigium(words=["currere"]):
61
  continue
62
  output.append(formated_answer)
63
  dice("Erfolgreich: Alle Informationen konnten erfolgreich extrahiert werden", word, "good")
64
- time.sleep(0.1)
65
  return output
66
 
67
  def threaded_function(words, function=request2navigium, anzahl_threads=20, wordtype=None):
@@ -115,26 +114,23 @@ def sort_by_wordtype(input):
115
  "Subjunktionen": [],
116
  "Unbekannt": []
117
  }
 
 
 
 
 
 
 
 
 
 
118
 
119
  for word in input:
120
- if word["wortart"].lower() == "SUBST".lower():
121
- sorted["Nomen"].append(word)
122
- elif word["wortart"].lower() == "VERB".lower():
123
- sorted["Verben"].append(word)
124
- elif word["wortart"].lower() == "ADJ".lower():
125
- sorted["Adjektive"].append(word)
126
- elif word["wortart"].lower() == "ADV".lower():
127
- sorted["Adverbien"].append(word)
128
- elif word["wortart"].lower() == "PRON".lower():
129
- sorted["Pronomen"].append(word)
130
- elif word["wortart"].lower() == "KONJ".lower():
131
- sorted["Konjunktionen"].append(word)
132
- elif word["wortart"].lower() == "PRAEP".lower():
133
- sorted["Präpositionen"].append(word)
134
- elif word["wortart"].lower() == "SUBJ".lower():
135
- sorted["Subjunktionen"].append(word)
136
  else:
137
  sorted["Unbekannt"].append(word)
 
138
  return sorted
139
 
140
  def split_into_words(input, delete_special_characters=True):
@@ -206,6 +202,12 @@ def identify_adjectives(word, NomGen, arg, failures, plural):
206
  failures -= 1
207
  return NomGen, failures, plural
208
 
 
 
 
 
 
 
209
  def advanced_formating(input):
210
  vocabulary = {
211
  "Nomen": [],#
@@ -267,6 +269,8 @@ def advanced_formating(input):
267
  word_properties["Dekl.-Kl."] = "unbekannt"
268
  word_properties["Bedeutung"] = word["bedeutungen"]
269
 
 
 
270
  vocabulary["Nomen"].append(word_properties)
271
 
272
  for word in input["Verben"]:
@@ -315,6 +319,8 @@ def advanced_formating(input):
315
 
316
  word_properties["Bedeutung"] = word["bedeutungen"]
317
 
 
 
318
  vocabulary["Verben"].append(word_properties)
319
 
320
  for word in input["Adjektive"]:
@@ -375,6 +381,8 @@ def advanced_formating(input):
375
 
376
  vocabulary["Adjektive"].append(word_properties)
377
 
 
 
378
  return vocabulary
379
 
380
  def save2json(file, data):
@@ -501,7 +509,9 @@ def save2by2(excel_file, by2_filepath):
501
  result_list = []
502
  for index, row in df.iterrows():
503
  # Greife auf die zweite und dritte Spalte der Zeile zu, unabhängig von den Namen
504
- if sheet == "Nomen":
 
 
505
  combined_str = f"{row.iloc[0]}, {row.iloc[1]}; {row.iloc[2]}; {row.iloc[3]}:\n{row.iloc[-1]}"
506
  elif sheet == "Verben":
507
  combined_str = f"{row.iloc[0]}, {row.iloc[1]}, {row.iloc[2]}, {row.iloc[3]}; {row.iloc[4]}:\n{row.iloc[-1]}"
 
61
  continue
62
  output.append(formated_answer)
63
  dice("Erfolgreich: Alle Informationen konnten erfolgreich extrahiert werden", word, "good")
 
64
  return output
65
 
66
  def threaded_function(words, function=request2navigium, anzahl_threads=20, wordtype=None):
 
114
  "Subjunktionen": [],
115
  "Unbekannt": []
116
  }
117
+ wortarten_kürzel = {
118
+ "SUBST": "Nomen",
119
+ "VERB": "Verben",
120
+ "ADJ": "Adjektive",
121
+ "ADV": "Adverbien",
122
+ "PRON": "Pronomen",
123
+ "KONJ": "Konjunktionen",
124
+ "PREP": "Präpositionen",
125
+ "SUBJ": "Subjunktionen"
126
+ }
127
 
128
  for word in input:
129
+ if word["wortart"] in wortarten_kürzel.keys():
130
+ sorted[wortarten_kürzel[word["wortart"]]].append(word)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  else:
132
  sorted["Unbekannt"].append(word)
133
+
134
  return sorted
135
 
136
  def split_into_words(input, delete_special_characters=True):
 
202
  failures -= 1
203
  return NomGen, failures, plural
204
 
205
+ def cleanup_properties(properties):
206
+ for key, property in properties.items():
207
+ if property.replace(" ", "") == "":
208
+ properties[key] = "-"
209
+ return properties
210
+
211
  def advanced_formating(input):
212
  vocabulary = {
213
  "Nomen": [],#
 
269
  word_properties["Dekl.-Kl."] = "unbekannt"
270
  word_properties["Bedeutung"] = word["bedeutungen"]
271
 
272
+ word_properties = cleanup_properties(word_properties)
273
+
274
  vocabulary["Nomen"].append(word_properties)
275
 
276
  for word in input["Verben"]:
 
319
 
320
  word_properties["Bedeutung"] = word["bedeutungen"]
321
 
322
+ word_properties = cleanup_properties(word_properties)
323
+
324
  vocabulary["Verben"].append(word_properties)
325
 
326
  for word in input["Adjektive"]:
 
381
 
382
  vocabulary["Adjektive"].append(word_properties)
383
 
384
+ word_properties = cleanup_properties(word_properties)
385
+
386
  return vocabulary
387
 
388
  def save2json(file, data):
 
509
  result_list = []
510
  for index, row in df.iterrows():
511
  # Greife auf die zweite und dritte Spalte der Zeile zu, unabhängig von den Namen
512
+ if row.iloc[0] == "Diese Liste basiert auf Daten von \"https://www.navigium.de/suchfunktion/_search?q={}\". © Rechteinhaber: Navigium.de":
513
+ continue
514
+ elif sheet == "Nomen":
515
  combined_str = f"{row.iloc[0]}, {row.iloc[1]}; {row.iloc[2]}; {row.iloc[3]}:\n{row.iloc[-1]}"
516
  elif sheet == "Verben":
517
  combined_str = f"{row.iloc[0]}, {row.iloc[1]}, {row.iloc[2]}, {row.iloc[3]}; {row.iloc[4]}:\n{row.iloc[-1]}"