Spaces:

unijoh
/

ord

Sleeping

App Files Files Community

unijoh commited on Jun 19, 2024

Commit

18a5f3f

verified ·

1 Parent(s): 343fbe1

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -24

app.py CHANGED Viewed

@@ -1,28 +1,34 @@
 import gradio as gr
 import pandas as pd
 # Load and parse the CSV file from Hugging Face
 def load_data():
-    url = "https://huggingface.co/datasets/unijoh/RAVNlex/blob/main/RAVNlex.csv"
-    df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', names=["#ORTO", "#PPOS", "#PHON1", "#PHON2", "#COMM"], dtype=str)
     lemmas = {}
     current_lemma = None
-    for _, row in df.iterrows():
-        if pd.isna(row['#ORTO']) or row['#ORTO'] == '---':
-            current_lemma = None
-        elif current_lemma is None:
-            current_lemma = row['#ORTO'].replace("ORTO:", "")
-            lemmas[current_lemma] = []
-        else:
-            lemma_data = {
-                'word': row['#ORTO'].replace("ORTO:", "") if pd.notna(row['#ORTO']) else "",
-                'PPOS': row['#PPOS'].replace("PPOS:", "") if pd.notna(row['#PPOS']) else "",
-                'PHON1': row['#PHON1'].replace("PHON:", "") if pd.notna(row['#PHON1']) else "",
-                'PHON2': row['#PHON2'].replace("PHON:", "") if pd.notna(row['#PHON2']) else "",
-                'COMM': row['#COMM'] if pd.notna(row['#COMM']) else ""
-            }
-            lemmas[current_lemma].append(lemma_data)
     print("Loaded lemmas:", lemmas)  # Debugging output
     return lemmas
@@ -31,10 +37,11 @@ lemmas = load_data()
 def create_noun_table(lemma, forms):
     table_data = {
-        'ncmns': '', 'ncmsn==duu': '', 'ncmsa': '', 'ncmsa==duu': '',
-        'ncmsd': '', 'ncmsd==duu': '', 'ncmsg': '', 'ncmsg==dou': '',
-        'ncmpn': '', 'ncmpn==duu': '', 'ncmpa': '', 'ncmpa==duu': '',
-        'ncmpd': '', 'ncmpd==duu': '', 'ncmpg': '', 'ncmpg==dou': ''
     }
     for form in forms:
@@ -63,9 +70,9 @@ def create_noun_table(lemma, forms):
         </thead>
         <tbody>
             <tr>
-                <td>{table_data['ncmns']}</td>
-                <td>{table_data['ncmsn==duu']}</td>
-                <td>{table_data['ncmpn']}</td>
                 <td>{table_data['ncmpn==duu']}</td>
             </tr>
             <tr>

 import gradio as gr
 import pandas as pd
+import csv
 # Load and parse the CSV file from Hugging Face
 def load_data():
+    url = "https://huggingface.co/datasets/unijoh/RAVNlex/blob/main/RAVNlex_small.csv"
     lemmas = {}
     current_lemma = None
+    with open(url, 'r', encoding='iso-8859-10') as file:
+        reader = csv.reader(file, delimiter='\t')
+        for row in reader:
+            if len(row) != 5:
+                print(f"Skipping problematic line {reader.line_num}: {row}")
+                continue
+            orto, ppos, phon1, phon2, comm = row
+            if orto == '---':
+                current_lemma = None
+            elif current_lemma is None:
+                current_lemma = orto.replace("ORTO:", "")
+                lemmas[current_lemma] = []
+            else:
+                lemma_data = {
+                    'word': orto.replace("ORTO:", "") if orto else "",
+                    'PPOS': ppos.replace("PPOS:", "") if ppos else "",
+                    'PHON1': phon1.replace("PHON:", "") if phon1 else "",
+                    'PHON2': phon2.replace("PHON:", "") if phon2 else "",
+                    'COMM': comm if comm else ""
+                }
+                lemmas[current_lemma].append(lemma_data)
     print("Loaded lemmas:", lemmas)  # Debugging output
     return lemmas
 def create_noun_table(lemma, forms):
     table_data = {
+        'nsmns': '', 'nsns': '', 'nsmsn': '', 'nsas': '', 'nsds': '', 'nsgs': '',
+        'npns': '', 'npas': '', 'npds': '', 'npgs': '',
+        'nsmsn==duu': '', 'ncmsa': '', 'ncmsd': '', 'ncmsg': '',
+        'ncmsd==duu': '', 'ncmsg==dou': '', 'ncmpn': '', 'ncmpa': '', 'ncmpd': '', 'ncmpg': '',
+        'ncmpn==duu': '', 'ncmpa==duu': '', 'ncmpd==duu': '', 'ncmpg==dou': ''
     }
     for form in forms:
         </thead>
         <tbody>
             <tr>
+                <td>{table_data['nsns']}</td>
+                <td>{table_data['nsmsn==duu']}</td>
+                <td>{table_data['npns']}</td>
                 <td>{table_data['ncmpn==duu']}</td>
             </tr>
             <tr>