Spaces:

unijoh
/

ord

Sleeping

unijoh commited on Jun 19, 2024

Commit

34202a3

verified ·

1 Parent(s): b8a1a0d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,32 +3,31 @@ import pandas as pd
 # Load and parse the CSV file from Hugging Face
 def load_data():
-    url = "https://huggingface.co/datasets/unijoh/RAVNlex/blob/main/RAVNlex_small.csv"
     lemmas = {}
     current_lemma = None
-    with open(url, 'r', encoding='iso-8859-10') as file:
-        reader = pd.read_csv(file, delimiter='\t', encoding='iso-8859-10', dtype=str, quoting=csv.QUOTE_NONE)
-        for row in reader.itertuples(index=False, name=None):
-            if len(row) < 5:
-                print(f"Skipping problematic line {reader.line_num}: {row}")
-                continue
-            orto, ppos, phon1, phon2, comm, *pronunciations = row
-            if orto == '---':
-                current_lemma = None
-            elif current_lemma is None:
-                current_lemma = orto.replace("ORTO:", "")
-                lemmas[current_lemma] = []
-            else:
-                lemma_data = {
-                    'word': orto.replace("ORTO:", "") if orto else "",
-                    'PPOS': ppos.replace("PPOS:", "") if ppos else "",
-                    'PHON1': phon1.replace("PHON:", "") if phon1 else "",
-                    'PHON2': phon2.replace("PHON:", "") if phon2 else "",
-                    'COMM': comm if comm else "",
-                    'pronunciations': pronunciations
-                }
-                lemmas[current_lemma].append(lemma_data)
     print("Loaded lemmas:", lemmas)  # Debugging output
     return lemmas

 # Load and parse the CSV file from Hugging Face
 def load_data():
+    url = "https://huggingface.co/datasets/unijoh/RAVNlex/resolve/main/RAVNlex_small.csv"
+    df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', dtype=str)
     lemmas = {}
     current_lemma = None
+    for row in df.itertuples(index=False, name=None):
+        if len(row) < 5:
+            print(f"Skipping problematic line: {row}")
+            continue
+        orto, ppos, phon1, phon2, comm, *pronunciations = row
+        if orto == '---':
+            current_lemma = None
+        elif current_lemma is None:
+            current_lemma = orto.replace("ORTO:", "")
+            lemmas[current_lemma] = []
+        else:
+            lemma_data = {
+                'word': orto.replace("ORTO:", "") if orto else "",
+                'PPOS': ppos.replace("PPOS:", "") if ppos else "",
+                'PHON1': phon1.replace("PHON:", "") if phon1 else "",
+                'PHON2': phon2.replace("PHON:", "") if phon2 else "",
+                'COMM': comm if comm else "",
+                'pronunciations': pronunciations
+            }
+            lemmas[current_lemma].append(lemma_data)
     print("Loaded lemmas:", lemmas)  # Debugging output
     return lemmas