Spaces:

unijoh
/

ord

Sleeping

App Files Files Community

unijoh commited on Jun 19, 2024

Commit

37c5513

verified ·

1 Parent(s): eb06599

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -37

app.py CHANGED Viewed

@@ -8,7 +8,22 @@ def load_data():
     df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', dtype=str)
     lemmas = {}
     current_lemma = None
     for row in df.itertuples(index=False, name=None):
         if len(row) < 5:
             print(f"Skipping problematic line: {row}")
@@ -19,41 +34,35 @@ def load_data():
         elif current_lemma is None:
             current_lemma = orto.replace("ORTO:", "")
             lemmas[current_lemma] = []
-            lemma_data = {
-                'word': current_lemma,
-                'PPOS': ppos.replace("PPOS:", "") if ppos else "",
-                'PHON1': phon1.replace("PHON:", "") if phon1 else "",
-                'PHON2': phon2.replace("PHON:", "") if phon2 else "",
-                'COMM': comm if comm else "",
-                'pronunciations': pronunciations
-            }
-            lemmas[current_lemma].append(lemma_data)
         else:
-            lemma_data = {
-                'word': orto.replace("ORTO:", "") if orto else "",
-                'PPOS': ppos.replace("PPOS:", "") if ppos else "",
-                'PHON1': phon1.replace("PHON:", "") if phon1 else "",
-                'PHON2': phon2.replace("PHON:", "") if phon2 else "",
-                'COMM': comm if comm else "",
-                'pronunciations': pronunciations
-            }
-            lemmas[current_lemma].append(lemma_data)
     print("Loaded lemmas:", lemmas)  # Debugging output
     return lemmas
 lemmas = load_data()
-def expand_ppos(ppos):
-    matches = re.findall(r'\[([^\]]+)\]', ppos)
-    if matches:
-        expanded = []
-        for match in matches[0]:
-            expanded.append(ppos.replace(f'[{matches[0]}]', match))
-        return expanded
-    else:
-        return [ppos]
 def create_noun_table(lemma, forms):
     table_data = {
         'ncmsn==iuu': '', 'ncmsn==duu': '', 'ncfsn==iuu': '', 'ncfsn==duu': '', 'ncnsn==iuu': '', 'ncnsn==duu': '',
@@ -69,13 +78,10 @@ def create_noun_table(lemma, forms):
         ppos = form['PPOS'].lower()  # Normalize to lowercase
         word = form['word']
         print(f"Processing: word={word}, ppos={ppos}, key={ppos}")
-        expanded_ppos_list = expand_ppos(ppos)
-        for expanded_ppos in expanded_ppos_list:
-            key = expanded_ppos
-            if key in table_data:
-                table_data[key] = word
-            else:
-                print(f"Unmatched key: {key} for word: {word} with PPOS: {ppos}")
     print(f"Final table data for {lemma}: {table_data}")  # Debugging output

     df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', dtype=str)
     lemmas = {}
     current_lemma = None
+    def expand_ppos(ppos):
+        parts = ppos.split('==')
+        tag = parts[0]
+        rest = parts[1] if len(parts) > 1 else ''
+        expanded = []
+        bracket_matches = re.findall(r'\[([^\]]+)\]', tag)
+        if bracket_matches:
+            for match in bracket_matches:
+                for char in match:
+                    new_tag = tag.replace(f'[{match}]', char)
+                    expanded.append(f"{new_tag}=={rest}")
+        else:
+            expanded.append(ppos)
+        return expanded
     for row in df.itertuples(index=False, name=None):
         if len(row) < 5:
             print(f"Skipping problematic line: {row}")
         elif current_lemma is None:
             current_lemma = orto.replace("ORTO:", "")
             lemmas[current_lemma] = []
+            expanded_ppos = expand_ppos(ppos.replace("PPOS:", "") if ppos else "")
+            for ep in expanded_ppos:
+                lemma_data = {
+                    'word': current_lemma,
+                    'PPOS': ep,
+                    'PHON1': phon1.replace("PHON:", "") if phon1 else "",
+                    'PHON2': phon2.replace("PHON:", "") if phon2 else "",
+                    'COMM': comm if comm else "",
+                    'pronunciations': pronunciations
+                }
+                lemmas[current_lemma].append(lemma_data)
         else:
+            expanded_ppos = expand_ppos(ppos.replace("PPOS:", "") if ppos else "")
+            for ep in expanded_ppos:
+                lemma_data = {
+                    'word': orto.replace("ORTO:", "") if orto else "",
+                    'PPOS': ep,
+                    'PHON1': phon1.replace("PHON:", "") if phon1 else "",
+                    'PHON2': phon2.replace("PHON:", "") if phon2 else "",
+                    'COMM': comm if comm else "",
+                    'pronunciations': pronunciations
+                }
+                lemmas[current_lemma].append(lemma_data)
     print("Loaded lemmas:", lemmas)  # Debugging output
     return lemmas
 lemmas = load_data()
 def create_noun_table(lemma, forms):
     table_data = {
         'ncmsn==iuu': '', 'ncmsn==duu': '', 'ncfsn==iuu': '', 'ncfsn==duu': '', 'ncnsn==iuu': '', 'ncnsn==duu': '',
         ppos = form['PPOS'].lower()  # Normalize to lowercase
         word = form['word']
         print(f"Processing: word={word}, ppos={ppos}, key={ppos}")
+        if ppos in table_data:
+            table_data[ppos] = word
+        else:
+            print(f"Unmatched key: {ppos} for word: {word} with PPOS: {ppos}")
     print(f"Final table data for {lemma}: {table_data}")  # Debugging output