Spaces:

unijoh
/

ord

Sleeping

App Files Files Community

unijoh commited on Jun 19, 2024

Commit

d81a6b9

verified ·

1 Parent(s): 29f0493

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -43

app.py CHANGED Viewed

@@ -26,6 +26,19 @@ def load_data():
         return [f"{item}=={rest}" for item in expanded]
     for row in df.itertuples(index=False, name=None):
         if len(row) < 5:
             print(f"Skipping problematic line: {row}")
@@ -38,27 +51,31 @@ def load_data():
             lemmas[current_lemma] = []
             expanded_ppos = expand_ppos(ppos.replace("PPOS:", "") if ppos else "")
             for ep in expanded_ppos:
-                lemma_data = {
-                    'word': current_lemma,
-                    'PPOS': ep,
-                    'PHON1': phon1.replace("PHON:", "") if phon1 else "",
-                    'PHON2': phon2.replace("PHON:", "") if phon2 else "",
-                    'COMM': comm if comm else "",
-                    'pronunciations': pronunciations
-                }
-                lemmas[current_lemma].append(lemma_data)
         else:
             expanded_ppos = expand_ppos(ppos.replace("PPOS:", "") if ppos else "")
             for ep in expanded_ppos:
-                lemma_data = {
-                    'word': orto.replace("ORTO:", "") if orto else "",
-                    'PPOS': ep,
-                    'PHON1': phon1.replace("PHON:", "") if phon1 else "",
-                    'PHON2': phon2.replace("PHON:", "") if phon2 else "",
-                    'COMM': comm if comm else "",
-                    'pronunciations': pronunciations
-                }
-                lemmas[current_lemma].append(lemma_data)
     print("Loaded lemmas:", lemmas)  # Debugging output
     return lemmas
@@ -67,19 +84,16 @@ lemmas = load_data()
 def create_noun_table(lemma, forms):
     table_data = {
-        'ncmsn==iuu': '', 'ncmsn==duu': '', 'ncfsn==iuu': '', 'ncfsn==duu': '', 'ncnsn==iuu': '', 'ncnsn==duu': '',
-        'ncmsa==iuu': '', 'ncmsa==duu': '', 'ncfsa==iuu': '', 'ncfsa==duu': '', 'ncnsa==iuu': '', 'ncnsa==duu': '',
-        'ncmsd==iuu': '', 'ncmsd==duu': '', 'ncfsd==iuu': '', 'ncfsd==duu': '', 'ncnsd==iuu': '', 'ncnsd==duu': '',
-        'ncmsg==iou': '', 'ncmsg==dou': '', 'ncfsg==iou': '', 'ncfsg==dou': '', 'ncnsg==iou': '', 'ncnsg==dou': '',
-        'ncmpn==iuu': '', 'ncmpn==duu': '', 'ncfnn==iuu': '', 'ncfnn==duu': '', 'ncnnn==iuu': '', 'ncnnn==duu': '',
-        'ncmpa==iuu': '', 'ncmpa==duu': '', 'ncfna==iuu': '', 'ncfna==duu': '', 'ncnna==iuu': '', 'ncnna==duu': '',
-        'ncmpd==iuu': '', 'ncmpd==duu': '', 'ncmpg==iou': '', 'ncmpg==dou': '', 'ncfnn==iou': '', 'ncfnn==dou': '', 'ncnnn==iou': '', 'ncnnn==dou': ''
     }
     for form in forms:
-        ppos = form['PPOS'].lower()  # Normalize to lowercase
         word = form['word']
-        print(f"Processing: word={word}, ppos={ppos}, key={ppos}")
         if ppos in table_data:
             table_data[ppos] = word
         else:
@@ -103,28 +117,28 @@ def create_noun_table(lemma, forms):
         </thead>
         <tbody>
             <tr>
-                <td>{table_data['ncmsn==iuu'] or table_data['ncfsn==iuu'] or table_data['ncnsn==iuu']}</td>
-                <td>{table_data['ncmsn==duu'] or table_data['ncfsn==duu'] or table_data['ncnsn==duu']}</td>
-                <td>{table_data['ncmpn==iuu'] or table_data['ncfnn==iuu'] or table_data['ncnnn==iuu']}</td>
-                <td>{table_data['ncmpn==duu'] or table_data['ncfnn==duu'] or table_data['ncnnn==duu']}</td>
             </tr>
             <tr>
-                <td>{table_data['ncmsa==iuu'] or table_data['ncfsa==iuu'] or table_data['ncnsa==iuu']}</td>
-                <td>{table_data['ncmsa==duu'] or table_data['ncfsa==duu'] or table_data['ncnsa==duu']}</td>
-                <td>{table_data['ncmpa==iuu'] or table_data['ncfna==iuu'] or table_data['ncnna==iuu']}</td>
-                <td>{table_data['ncmpa==duu'] or table_data['ncfna==duu'] or table_data['ncnna==duu']}</td>
             </tr>
             <tr>
-                <td>{table_data['ncmsd==iuu'] or table_data['ncfsd==iuu'] or table_data['ncnsd==iuu']}</td>
-                <td>{table_data['ncmsd==duu'] or table_data['ncfsd==duu'] or table_data['ncnsd==duu']}</td>
-                <td>{table_data['ncmpd==iuu'] or table_data['ncfsd==iuu'] or table_data['ncnnn==iuu']}</td>
-                <td>{table_data['ncmpd==duu'] or table_data['ncfsd==duu'] or table_data['ncnnn==duu']}</td>
             </tr>
             <tr>
-                <td>{table_data['ncmsg==iou'] or table_data['ncfsg==iou'] or table_data['ncnsg==iou']}</td>
-                <td>{table_data['ncmsg==dou'] or table_data['ncfsg==dou'] or table_data['ncnsg==dou']}</td>
-                <td>{table_data['ncmpg==iou'] or table_data['ncfnn==iou'] or table_data['ncnnn==iou']}</td>
-                <td>{table_data['ncmpg==dou'] or table_data['ncfnn==dou'] or table_data['ncnnn==dou']}</td>
             </tr>
         </tbody>
     </table>

         return [f"{item}=={rest}" for item in expanded]
+    def simplify_ppos(ppos):
+        ppos_parts = ppos.split('==')
+        if len(ppos_parts) != 2:
+            return None
+        tag, case = ppos_parts
+        if len(tag) < 5:
+            return None
+        number = 's' if tag[3] == 's' else 'p'
+        gender = tag[2]
+        case = tag[4]
+        definiteness = 'i' if case == 'iuu' else 'd'
+        return f"{number}{case}{definiteness}"
     for row in df.itertuples(index=False, name=None):
         if len(row) < 5:
             print(f"Skipping problematic line: {row}")
             lemmas[current_lemma] = []
             expanded_ppos = expand_ppos(ppos.replace("PPOS:", "") if ppos else "")
             for ep in expanded_ppos:
+                simple_ppos = simplify_ppos(ep.lower())
+                if simple_ppos:
+                    lemma_data = {
+                        'word': current_lemma,
+                        'PPOS': simple_ppos,
+                        'PHON1': phon1.replace("PHON:", "") if phon1 else "",
+                        'PHON2': phon2.replace("PHON:", "") if phon2 else "",
+                        'COMM': comm if comm else "",
+                        'pronunciations': pronunciations
+                    }
+                    lemmas[current_lemma].append(lemma_data)
         else:
             expanded_ppos = expand_ppos(ppos.replace("PPOS:", "") if ppos else "")
             for ep in expanded_ppos:
+                simple_ppos = simplify_ppos(ep.lower())
+                if simple_ppos:
+                    lemma_data = {
+                        'word': orto.replace("ORTO:", "") if orto else "",
+                        'PPOS': simple_ppos,
+                        'PHON1': phon1.replace("PHON:", "") if phon1 else "",
+                        'PHON2': phon2.replace("PHON:", "") if phon2 else "",
+                        'COMM': comm if comm else "",
+                        'pronunciations': pronunciations
+                    }
+                    lemmas[current_lemma].append(lemma_data)
     print("Loaded lemmas:", lemmas)  # Debugging output
     return lemmas
 def create_noun_table(lemma, forms):
     table_data = {
+        'sn==i': '', 'sn==d': '', 'sa==i': '', 'sa==d': '',
+        'sd==i': '', 'sd==d': '', 'sg==i': '', 'sg==d': '',
+        'pn==i': '', 'pn==d': '', 'pa==i': '', 'pa==d': '',
+        'pd==i': '', 'pd==d': '', 'pg==i': '', 'pg==d': ''
     }
     for form in forms:
+        ppos = form['PPOS']
         word = form['word']
+        print(f"Processing: word={word}, ppos={ppos}")
         if ppos in table_data:
             table_data[ppos] = word
         else:
         </thead>
         <tbody>
             <tr>
+                <td>{table_data['sn==i']}</td>
+                <td>{table_data['sn==d']}</td>
+                <td>{table_data['pn==i']}</td>
+                <td>{table_data['pn==d']}</td>
             </tr>
             <tr>
+                <td>{table_data['sa==i']}</td>
+                <td>{table_data['sa==d']}</td>
+                <td>{table_data['pa==i']}</td>
+                <td>{table_data['pa==d']}</td>
             </tr>
             <tr>
+                <td>{table_data['sd==i']}</td>
+                <td>{table_data['sd==d']}</td>
+                <td>{table_data['pd==i']}</td>
+                <td>{table_data['pd==d']}</td>
             </tr>
             <tr>
+                <td>{table_data['sg==i']}</td>
+                <td>{table_data['sg==d']}</td>
+                <td>{table_data['pg==i']}</td>
+                <td>{table_data['pg==d']}</td>
             </tr>
         </tbody>
     </table>