Spaces:

unijoh
/

ord

Sleeping

unijoh commited on Jun 19, 2024

Commit

b8a1a0d

verified ·

1 Parent(s): 18a5f3f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import gradio as gr
 import pandas as pd
-import csv
 # Load and parse the CSV file from Hugging Face
 def load_data():
@@ -9,12 +8,12 @@ def load_data():
     current_lemma = None
     with open(url, 'r', encoding='iso-8859-10') as file:
-        reader = csv.reader(file, delimiter='\t')
-        for row in reader:
-            if len(row) != 5:
                 print(f"Skipping problematic line {reader.line_num}: {row}")
                 continue
-            orto, ppos, phon1, phon2, comm = row
             if orto == '---':
                 current_lemma = None
             elif current_lemma is None:
@@ -26,7 +25,8 @@ def load_data():
                     'PPOS': ppos.replace("PPOS:", "") if ppos else "",
                     'PHON1': phon1.replace("PHON:", "") if phon1 else "",
                     'PHON2': phon2.replace("PHON:", "") if phon2 else "",
-                    'COMM': comm if comm else ""
                 }
                 lemmas[current_lemma].append(lemma_data)

 import gradio as gr
 import pandas as pd
 # Load and parse the CSV file from Hugging Face
 def load_data():
     current_lemma = None
     with open(url, 'r', encoding='iso-8859-10') as file:
+        reader = pd.read_csv(file, delimiter='\t', encoding='iso-8859-10', dtype=str, quoting=csv.QUOTE_NONE)
+        for row in reader.itertuples(index=False, name=None):
+            if len(row) < 5:
                 print(f"Skipping problematic line {reader.line_num}: {row}")
                 continue
+            orto, ppos, phon1, phon2, comm, *pronunciations = row
             if orto == '---':
                 current_lemma = None
             elif current_lemma is None:
                     'PPOS': ppos.replace("PPOS:", "") if ppos else "",
                     'PHON1': phon1.replace("PHON:", "") if phon1 else "",
                     'PHON2': phon2.replace("PHON:", "") if phon2 else "",
+                    'COMM': comm if comm else "",
+                    'pronunciations': pronunciations
                 }
                 lemmas[current_lemma].append(lemma_data)