Spaces:

unijoh
/

ord

Sleeping

App Files Files Community

unijoh commited on Jun 19, 2024

Commit

b605bba

verified ·

1 Parent(s): d81a6b9

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -84

app.py CHANGED Viewed

@@ -8,37 +8,7 @@ def load_data():
     df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', dtype=str)
     lemmas = {}
     current_lemma = None
-    def expand_ppos(ppos):
-        parts = ppos.split('==')
-        tag = parts[0]
-        rest = parts[1] if len(parts) > 1 else ''
-        expanded = [tag]
-        bracket_matches = re.findall(r'\[([^\]]+)\]', tag)
-        if bracket_matches:
-            for match in bracket_matches:
-                new_expanded = []
-                for char in match:
-                    for item in expanded:
-                        new_expanded.append(item.replace(f'[{match}]', char))
-                expanded = new_expanded
-        return [f"{item}=={rest}" for item in expanded]
-    def simplify_ppos(ppos):
-        ppos_parts = ppos.split('==')
-        if len(ppos_parts) != 2:
-            return None
-        tag, case = ppos_parts
-        if len(tag) < 5:
-            return None
-        number = 's' if tag[3] == 's' else 'p'
-        gender = tag[2]
-        case = tag[4]
-        definiteness = 'i' if case == 'iuu' else 'd'
-        return f"{number}{case}{definiteness}"
     for row in df.itertuples(index=False, name=None):
         if len(row) < 5:
             print(f"Skipping problematic line: {row}")
@@ -49,55 +19,63 @@ def load_data():
         elif current_lemma is None:
             current_lemma = orto.replace("ORTO:", "")
             lemmas[current_lemma] = []
-            expanded_ppos = expand_ppos(ppos.replace("PPOS:", "") if ppos else "")
-            for ep in expanded_ppos:
-                simple_ppos = simplify_ppos(ep.lower())
-                if simple_ppos:
-                    lemma_data = {
-                        'word': current_lemma,
-                        'PPOS': simple_ppos,
-                        'PHON1': phon1.replace("PHON:", "") if phon1 else "",
-                        'PHON2': phon2.replace("PHON:", "") if phon2 else "",
-                        'COMM': comm if comm else "",
-                        'pronunciations': pronunciations
-                    }
-                    lemmas[current_lemma].append(lemma_data)
         else:
-            expanded_ppos = expand_ppos(ppos.replace("PPOS:", "") if ppos else "")
-            for ep in expanded_ppos:
-                simple_ppos = simplify_ppos(ep.lower())
-                if simple_ppos:
-                    lemma_data = {
-                        'word': orto.replace("ORTO:", "") if orto else "",
-                        'PPOS': simple_ppos,
-                        'PHON1': phon1.replace("PHON:", "") if phon1 else "",
-                        'PHON2': phon2.replace("PHON:", "") if phon2 else "",
-                        'COMM': comm if comm else "",
-                        'pronunciations': pronunciations
-                    }
-                    lemmas[current_lemma].append(lemma_data)
     print("Loaded lemmas:", lemmas)  # Debugging output
     return lemmas
 lemmas = load_data()
 def create_noun_table(lemma, forms):
     table_data = {
-        'sn==i': '', 'sn==d': '', 'sa==i': '', 'sa==d': '',
-        'sd==i': '', 'sd==d': '', 'sg==i': '', 'sg==d': '',
-        'pn==i': '', 'pn==d': '', 'pa==i': '', 'pa==d': '',
-        'pd==i': '', 'pd==d': '', 'pg==i': '', 'pg==d': ''
     }
     for form in forms:
-        ppos = form['PPOS']
         word = form['word']
-        print(f"Processing: word={word}, ppos={ppos}")
-        if ppos in table_data:
-            table_data[ppos] = word
-        else:
-            print(f"Unmatched key: {ppos} for word: {word} with PPOS: {ppos}")
     print(f"Final table data for {lemma}: {table_data}")  # Debugging output
@@ -117,28 +95,28 @@ def create_noun_table(lemma, forms):
         </thead>
         <tbody>
             <tr>
-                <td>{table_data['sn==i']}</td>
-                <td>{table_data['sn==d']}</td>
-                <td>{table_data['pn==i']}</td>
-                <td>{table_data['pn==d']}</td>
             </tr>
             <tr>
-                <td>{table_data['sa==i']}</td>
-                <td>{table_data['sa==d']}</td>
-                <td>{table_data['pa==i']}</td>
-                <td>{table_data['pa==d']}</td>
             </tr>
             <tr>
-                <td>{table_data['sd==i']}</td>
-                <td>{table_data['sd==d']}</td>
-                <td>{table_data['pd==i']}</td>
-                <td>{table_data['pd==d']}</td>
             </tr>
             <tr>
-                <td>{table_data['sg==i']}</td>
-                <td>{table_data['sg==d']}</td>
-                <td>{table_data['pg==i']}</td>
-                <td>{table_data['pg==d']}</td>
             </tr>
         </tbody>
     </table>

     df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', dtype=str)
     lemmas = {}
     current_lemma = None
     for row in df.itertuples(index=False, name=None):
         if len(row) < 5:
             print(f"Skipping problematic line: {row}")
         elif current_lemma is None:
             current_lemma = orto.replace("ORTO:", "")
             lemmas[current_lemma] = []
+            lemma_data = {
+                'word': current_lemma,
+                'PPOS': ppos.replace("PPOS:", "") if ppos else "",
+                'PHON1': phon1.replace("PHON:", "") if phon1 else "",
+                'PHON2': phon2.replace("PHON:", "") if phon2 else "",
+                'COMM': comm if comm else "",
+                'pronunciations': pronunciations
+            }
+            lemmas[current_lemma].append(lemma_data)
         else:
+            lemma_data = {
+                'word': orto.replace("ORTO:", "") if orto else "",
+                'PPOS': ppos.replace("PPOS:", "") if ppos else "",
+                'PHON1': phon1.replace("PHON:", "") if phon1 else "",
+                'PHON2': phon2.replace("PHON:", "") if phon2 else "",
+                'COMM': comm if comm else "",
+                'pronunciations': pronunciations
+            }
+            lemmas[current_lemma].append(lemma_data)
     print("Loaded lemmas:", lemmas)  # Debugging output
     return lemmas
 lemmas = load_data()
+def expand_ppos(ppos):
+    matches = re.findall(r'\[([^\]]+)\]', ppos)
+    if matches:
+        expanded = []
+        for match in matches[0]:
+            expanded.append(ppos.replace(f'[{matches[0]}]', match))
+        return expanded
+    else:
+        return [ppos]
 def create_noun_table(lemma, forms):
     table_data = {
+        'ncmsn==iuu': '', 'ncmsn==duu': '', 'ncfsn==iuu': '', 'ncfsn==duu': '', 'ncnsn==iuu': '', 'ncnsn==duu': '',
+        'ncmsa==iuu': '', 'ncmsa==duu': '', 'ncfsa==iuu': '', 'ncfsa==duu': '', 'ncnsa==iuu': '', 'ncnsa==duu': '',
+        'ncmsd==iuu': '', 'ncmsd==duu': '', 'ncfsd==iuu': '', 'ncfsd==duu': '', 'ncnsd==iuu': '', 'ncnsd==duu': '',
+        'ncmsg==iou': '', 'ncmsg==dou': '', 'ncfsg==iou': '', 'ncfsg==dou': '', 'ncnsg==iou': '', 'ncnsg==dou': '',
+        'ncmpn==iuu': '', 'ncmpn==duu': '', 'ncfnn==iuu': '', 'ncfnn==duu': '', 'ncnnn==iuu': '', 'ncnnn==duu': '',
+        'ncmpa==iuu': '', 'ncmpa==duu': '', 'ncfna==iuu': '', 'ncfna==duu': '', 'ncnna==iuu': '', 'ncnna==duu': '',
+        'ncmpd==iuu': '', 'ncmpd==duu': '', 'ncmpg==iou': '', 'ncmpg==dou': '', 'ncfnn==iou': '', 'ncfnn==dou': '', 'ncnnn==iou': '', 'ncnnn==dou': ''
     }
     for form in forms:
+        ppos = form['PPOS'].lower()  # Normalize to lowercase
         word = form['word']
+        print(f"Processing: word={word}, ppos={ppos}, key={ppos}")
+        expanded_ppos_list = expand_ppos(ppos)
+        for expanded_ppos in expanded_ppos_list:
+            key = expanded_ppos
+            if key in table_data:
+                table_data[key] = word
+            else:
+                print(f"Unmatched key: {key} for word: {word} with PPOS: {ppos}")
     print(f"Final table data for {lemma}: {table_data}")  # Debugging output
         </thead>
         <tbody>
             <tr>
+                <td>{table_data['ncmsn==iuu'] or table_data['ncfsn==iuu'] or table_data['ncnsn==iuu']}</td>
+                <td>{table_data['ncmsn==duu'] or table_data['ncfsn==duu'] or table_data['ncnsn==duu']}</td>
+                <td>{table_data['ncmpn==iuu'] or table_data['ncfnn==iuu'] or table_data['ncnnn==iuu']}</td>
+                <td>{table_data['ncmpn==duu'] or table_data['ncfnn==duu'] or table_data['ncnnn==duu']}</td>
             </tr>
             <tr>
+                <td>{table_data['ncmsa==iuu'] or table_data['ncfsa==iuu'] or table_data['ncnsa==iuu']}</td>
+                <td>{table_data['ncmsa==duu'] or table_data['ncfsa==duu'] or table_data['ncnsa==duu']}</td>
+                <td>{table_data['ncmpa==iuu'] or table_data['ncfna==iuu'] or table_data['ncnna==iuu']}</td>
+                <td>{table_data['ncmpa==duu'] or table_data['ncfna==duu'] or table_data['ncnna==duu']}</td>
             </tr>
             <tr>
+                <td>{table_data['ncmsd==iuu'] or table_data['ncfsd==iuu'] or table_data['ncnsd==iuu']}</td>
+                <td>{table_data['ncmsd==duu'] or table_data['ncfsd==duu'] or table_data['ncnsd==duu']}</td>
+                <td>{table_data['ncmpd==iuu'] or table_data['ncfnn==iuu'] or table_data['ncnnn==iuu']}</td>
+                <td>{table_data['ncmpd==duu'] or table_data['ncfnn==duu'] or table_data['ncnnn==duu']}</td>
             </tr>
             <tr>
+                <td>{table_data['ncmsg==iou'] or table_data['ncfsg==iou'] or table_data['ncnsg==iou']}</td>
+                <td>{table_data['ncmsg==dou'] or table_data['ncfsg==dou'] or table_data['ncnsg==dou']}</td>
+                <td>{table_data['ncmpg==iou'] or table_data['ncfnn==iou'] or table_data['ncnnn==iou']}</td>
+                <td>{table_data['ncmpg==dou'] or table_data['ncfnn==dou'] or table_data['ncnnn==dou']}</td>
             </tr>
         </tbody>
     </table>