unijoh commited on
Commit
d81a6b9
·
verified ·
1 Parent(s): 29f0493

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -43
app.py CHANGED
@@ -26,6 +26,19 @@ def load_data():
26
 
27
  return [f"{item}=={rest}" for item in expanded]
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  for row in df.itertuples(index=False, name=None):
30
  if len(row) < 5:
31
  print(f"Skipping problematic line: {row}")
@@ -38,27 +51,31 @@ def load_data():
38
  lemmas[current_lemma] = []
39
  expanded_ppos = expand_ppos(ppos.replace("PPOS:", "") if ppos else "")
40
  for ep in expanded_ppos:
41
- lemma_data = {
42
- 'word': current_lemma,
43
- 'PPOS': ep,
44
- 'PHON1': phon1.replace("PHON:", "") if phon1 else "",
45
- 'PHON2': phon2.replace("PHON:", "") if phon2 else "",
46
- 'COMM': comm if comm else "",
47
- 'pronunciations': pronunciations
48
- }
49
- lemmas[current_lemma].append(lemma_data)
 
 
50
  else:
51
  expanded_ppos = expand_ppos(ppos.replace("PPOS:", "") if ppos else "")
52
  for ep in expanded_ppos:
53
- lemma_data = {
54
- 'word': orto.replace("ORTO:", "") if orto else "",
55
- 'PPOS': ep,
56
- 'PHON1': phon1.replace("PHON:", "") if phon1 else "",
57
- 'PHON2': phon2.replace("PHON:", "") if phon2 else "",
58
- 'COMM': comm if comm else "",
59
- 'pronunciations': pronunciations
60
- }
61
- lemmas[current_lemma].append(lemma_data)
 
 
62
 
63
  print("Loaded lemmas:", lemmas) # Debugging output
64
  return lemmas
@@ -67,19 +84,16 @@ lemmas = load_data()
67
 
68
  def create_noun_table(lemma, forms):
69
  table_data = {
70
- 'ncmsn==iuu': '', 'ncmsn==duu': '', 'ncfsn==iuu': '', 'ncfsn==duu': '', 'ncnsn==iuu': '', 'ncnsn==duu': '',
71
- 'ncmsa==iuu': '', 'ncmsa==duu': '', 'ncfsa==iuu': '', 'ncfsa==duu': '', 'ncnsa==iuu': '', 'ncnsa==duu': '',
72
- 'ncmsd==iuu': '', 'ncmsd==duu': '', 'ncfsd==iuu': '', 'ncfsd==duu': '', 'ncnsd==iuu': '', 'ncnsd==duu': '',
73
- 'ncmsg==iou': '', 'ncmsg==dou': '', 'ncfsg==iou': '', 'ncfsg==dou': '', 'ncnsg==iou': '', 'ncnsg==dou': '',
74
- 'ncmpn==iuu': '', 'ncmpn==duu': '', 'ncfnn==iuu': '', 'ncfnn==duu': '', 'ncnnn==iuu': '', 'ncnnn==duu': '',
75
- 'ncmpa==iuu': '', 'ncmpa==duu': '', 'ncfna==iuu': '', 'ncfna==duu': '', 'ncnna==iuu': '', 'ncnna==duu': '',
76
- 'ncmpd==iuu': '', 'ncmpd==duu': '', 'ncmpg==iou': '', 'ncmpg==dou': '', 'ncfnn==iou': '', 'ncfnn==dou': '', 'ncnnn==iou': '', 'ncnnn==dou': ''
77
  }
78
 
79
  for form in forms:
80
- ppos = form['PPOS'].lower() # Normalize to lowercase
81
  word = form['word']
82
- print(f"Processing: word={word}, ppos={ppos}, key={ppos}")
83
  if ppos in table_data:
84
  table_data[ppos] = word
85
  else:
@@ -103,28 +117,28 @@ def create_noun_table(lemma, forms):
103
  </thead>
104
  <tbody>
105
  <tr>
106
- <td>{table_data['ncmsn==iuu'] or table_data['ncfsn==iuu'] or table_data['ncnsn==iuu']}</td>
107
- <td>{table_data['ncmsn==duu'] or table_data['ncfsn==duu'] or table_data['ncnsn==duu']}</td>
108
- <td>{table_data['ncmpn==iuu'] or table_data['ncfnn==iuu'] or table_data['ncnnn==iuu']}</td>
109
- <td>{table_data['ncmpn==duu'] or table_data['ncfnn==duu'] or table_data['ncnnn==duu']}</td>
110
  </tr>
111
  <tr>
112
- <td>{table_data['ncmsa==iuu'] or table_data['ncfsa==iuu'] or table_data['ncnsa==iuu']}</td>
113
- <td>{table_data['ncmsa==duu'] or table_data['ncfsa==duu'] or table_data['ncnsa==duu']}</td>
114
- <td>{table_data['ncmpa==iuu'] or table_data['ncfna==iuu'] or table_data['ncnna==iuu']}</td>
115
- <td>{table_data['ncmpa==duu'] or table_data['ncfna==duu'] or table_data['ncnna==duu']}</td>
116
  </tr>
117
  <tr>
118
- <td>{table_data['ncmsd==iuu'] or table_data['ncfsd==iuu'] or table_data['ncnsd==iuu']}</td>
119
- <td>{table_data['ncmsd==duu'] or table_data['ncfsd==duu'] or table_data['ncnsd==duu']}</td>
120
- <td>{table_data['ncmpd==iuu'] or table_data['ncfsd==iuu'] or table_data['ncnnn==iuu']}</td>
121
- <td>{table_data['ncmpd==duu'] or table_data['ncfsd==duu'] or table_data['ncnnn==duu']}</td>
122
  </tr>
123
  <tr>
124
- <td>{table_data['ncmsg==iou'] or table_data['ncfsg==iou'] or table_data['ncnsg==iou']}</td>
125
- <td>{table_data['ncmsg==dou'] or table_data['ncfsg==dou'] or table_data['ncnsg==dou']}</td>
126
- <td>{table_data['ncmpg==iou'] or table_data['ncfnn==iou'] or table_data['ncnnn==iou']}</td>
127
- <td>{table_data['ncmpg==dou'] or table_data['ncfnn==dou'] or table_data['ncnnn==dou']}</td>
128
  </tr>
129
  </tbody>
130
  </table>
 
26
 
27
  return [f"{item}=={rest}" for item in expanded]
28
 
29
+ def simplify_ppos(ppos):
30
+ ppos_parts = ppos.split('==')
31
+ if len(ppos_parts) != 2:
32
+ return None
33
+ tag, case = ppos_parts
34
+ if len(tag) < 5:
35
+ return None
36
+ number = 's' if tag[3] == 's' else 'p'
37
+ gender = tag[2]
38
+ case = tag[4]
39
+ definiteness = 'i' if case == 'iuu' else 'd'
40
+ return f"{number}{case}{definiteness}"
41
+
42
  for row in df.itertuples(index=False, name=None):
43
  if len(row) < 5:
44
  print(f"Skipping problematic line: {row}")
 
51
  lemmas[current_lemma] = []
52
  expanded_ppos = expand_ppos(ppos.replace("PPOS:", "") if ppos else "")
53
  for ep in expanded_ppos:
54
+ simple_ppos = simplify_ppos(ep.lower())
55
+ if simple_ppos:
56
+ lemma_data = {
57
+ 'word': current_lemma,
58
+ 'PPOS': simple_ppos,
59
+ 'PHON1': phon1.replace("PHON:", "") if phon1 else "",
60
+ 'PHON2': phon2.replace("PHON:", "") if phon2 else "",
61
+ 'COMM': comm if comm else "",
62
+ 'pronunciations': pronunciations
63
+ }
64
+ lemmas[current_lemma].append(lemma_data)
65
  else:
66
  expanded_ppos = expand_ppos(ppos.replace("PPOS:", "") if ppos else "")
67
  for ep in expanded_ppos:
68
+ simple_ppos = simplify_ppos(ep.lower())
69
+ if simple_ppos:
70
+ lemma_data = {
71
+ 'word': orto.replace("ORTO:", "") if orto else "",
72
+ 'PPOS': simple_ppos,
73
+ 'PHON1': phon1.replace("PHON:", "") if phon1 else "",
74
+ 'PHON2': phon2.replace("PHON:", "") if phon2 else "",
75
+ 'COMM': comm if comm else "",
76
+ 'pronunciations': pronunciations
77
+ }
78
+ lemmas[current_lemma].append(lemma_data)
79
 
80
  print("Loaded lemmas:", lemmas) # Debugging output
81
  return lemmas
 
84
 
85
  def create_noun_table(lemma, forms):
86
  table_data = {
87
+ 'sn==i': '', 'sn==d': '', 'sa==i': '', 'sa==d': '',
88
+ 'sd==i': '', 'sd==d': '', 'sg==i': '', 'sg==d': '',
89
+ 'pn==i': '', 'pn==d': '', 'pa==i': '', 'pa==d': '',
90
+ 'pd==i': '', 'pd==d': '', 'pg==i': '', 'pg==d': ''
 
 
 
91
  }
92
 
93
  for form in forms:
94
+ ppos = form['PPOS']
95
  word = form['word']
96
+ print(f"Processing: word={word}, ppos={ppos}")
97
  if ppos in table_data:
98
  table_data[ppos] = word
99
  else:
 
117
  </thead>
118
  <tbody>
119
  <tr>
120
+ <td>{table_data['sn==i']}</td>
121
+ <td>{table_data['sn==d']}</td>
122
+ <td>{table_data['pn==i']}</td>
123
+ <td>{table_data['pn==d']}</td>
124
  </tr>
125
  <tr>
126
+ <td>{table_data['sa==i']}</td>
127
+ <td>{table_data['sa==d']}</td>
128
+ <td>{table_data['pa==i']}</td>
129
+ <td>{table_data['pa==d']}</td>
130
  </tr>
131
  <tr>
132
+ <td>{table_data['sd==i']}</td>
133
+ <td>{table_data['sd==d']}</td>
134
+ <td>{table_data['pd==i']}</td>
135
+ <td>{table_data['pd==d']}</td>
136
  </tr>
137
  <tr>
138
+ <td>{table_data['sg==i']}</td>
139
+ <td>{table_data['sg==d']}</td>
140
+ <td>{table_data['pg==i']}</td>
141
+ <td>{table_data['pg==d']}</td>
142
  </tr>
143
  </tbody>
144
  </table>