unijoh commited on
Commit
b605bba
·
verified ·
1 Parent(s): d81a6b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -84
app.py CHANGED
@@ -8,37 +8,7 @@ def load_data():
8
  df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', dtype=str)
9
  lemmas = {}
10
  current_lemma = None
11
-
12
- def expand_ppos(ppos):
13
- parts = ppos.split('==')
14
- tag = parts[0]
15
- rest = parts[1] if len(parts) > 1 else ''
16
- expanded = [tag]
17
-
18
- bracket_matches = re.findall(r'\[([^\]]+)\]', tag)
19
- if bracket_matches:
20
- for match in bracket_matches:
21
- new_expanded = []
22
- for char in match:
23
- for item in expanded:
24
- new_expanded.append(item.replace(f'[{match}]', char))
25
- expanded = new_expanded
26
-
27
- return [f"{item}=={rest}" for item in expanded]
28
-
29
- def simplify_ppos(ppos):
30
- ppos_parts = ppos.split('==')
31
- if len(ppos_parts) != 2:
32
- return None
33
- tag, case = ppos_parts
34
- if len(tag) < 5:
35
- return None
36
- number = 's' if tag[3] == 's' else 'p'
37
- gender = tag[2]
38
- case = tag[4]
39
- definiteness = 'i' if case == 'iuu' else 'd'
40
- return f"{number}{case}{definiteness}"
41
-
42
  for row in df.itertuples(index=False, name=None):
43
  if len(row) < 5:
44
  print(f"Skipping problematic line: {row}")
@@ -49,55 +19,63 @@ def load_data():
49
  elif current_lemma is None:
50
  current_lemma = orto.replace("ORTO:", "")
51
  lemmas[current_lemma] = []
52
- expanded_ppos = expand_ppos(ppos.replace("PPOS:", "") if ppos else "")
53
- for ep in expanded_ppos:
54
- simple_ppos = simplify_ppos(ep.lower())
55
- if simple_ppos:
56
- lemma_data = {
57
- 'word': current_lemma,
58
- 'PPOS': simple_ppos,
59
- 'PHON1': phon1.replace("PHON:", "") if phon1 else "",
60
- 'PHON2': phon2.replace("PHON:", "") if phon2 else "",
61
- 'COMM': comm if comm else "",
62
- 'pronunciations': pronunciations
63
- }
64
- lemmas[current_lemma].append(lemma_data)
65
  else:
66
- expanded_ppos = expand_ppos(ppos.replace("PPOS:", "") if ppos else "")
67
- for ep in expanded_ppos:
68
- simple_ppos = simplify_ppos(ep.lower())
69
- if simple_ppos:
70
- lemma_data = {
71
- 'word': orto.replace("ORTO:", "") if orto else "",
72
- 'PPOS': simple_ppos,
73
- 'PHON1': phon1.replace("PHON:", "") if phon1 else "",
74
- 'PHON2': phon2.replace("PHON:", "") if phon2 else "",
75
- 'COMM': comm if comm else "",
76
- 'pronunciations': pronunciations
77
- }
78
- lemmas[current_lemma].append(lemma_data)
79
-
80
  print("Loaded lemmas:", lemmas) # Debugging output
81
  return lemmas
82
 
83
  lemmas = load_data()
84
 
 
 
 
 
 
 
 
 
 
 
85
  def create_noun_table(lemma, forms):
86
  table_data = {
87
- 'sn==i': '', 'sn==d': '', 'sa==i': '', 'sa==d': '',
88
- 'sd==i': '', 'sd==d': '', 'sg==i': '', 'sg==d': '',
89
- 'pn==i': '', 'pn==d': '', 'pa==i': '', 'pa==d': '',
90
- 'pd==i': '', 'pd==d': '', 'pg==i': '', 'pg==d': ''
 
 
 
91
  }
92
 
93
  for form in forms:
94
- ppos = form['PPOS']
95
  word = form['word']
96
- print(f"Processing: word={word}, ppos={ppos}")
97
- if ppos in table_data:
98
- table_data[ppos] = word
99
- else:
100
- print(f"Unmatched key: {ppos} for word: {word} with PPOS: {ppos}")
 
 
 
101
 
102
  print(f"Final table data for {lemma}: {table_data}") # Debugging output
103
 
@@ -117,28 +95,28 @@ def create_noun_table(lemma, forms):
117
  </thead>
118
  <tbody>
119
  <tr>
120
- <td>{table_data['sn==i']}</td>
121
- <td>{table_data['sn==d']}</td>
122
- <td>{table_data['pn==i']}</td>
123
- <td>{table_data['pn==d']}</td>
124
  </tr>
125
  <tr>
126
- <td>{table_data['sa==i']}</td>
127
- <td>{table_data['sa==d']}</td>
128
- <td>{table_data['pa==i']}</td>
129
- <td>{table_data['pa==d']}</td>
130
  </tr>
131
  <tr>
132
- <td>{table_data['sd==i']}</td>
133
- <td>{table_data['sd==d']}</td>
134
- <td>{table_data['pd==i']}</td>
135
- <td>{table_data['pd==d']}</td>
136
  </tr>
137
  <tr>
138
- <td>{table_data['sg==i']}</td>
139
- <td>{table_data['sg==d']}</td>
140
- <td>{table_data['pg==i']}</td>
141
- <td>{table_data['pg==d']}</td>
142
  </tr>
143
  </tbody>
144
  </table>
 
8
  df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', dtype=str)
9
  lemmas = {}
10
  current_lemma = None
11
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  for row in df.itertuples(index=False, name=None):
13
  if len(row) < 5:
14
  print(f"Skipping problematic line: {row}")
 
19
  elif current_lemma is None:
20
  current_lemma = orto.replace("ORTO:", "")
21
  lemmas[current_lemma] = []
22
+ lemma_data = {
23
+ 'word': current_lemma,
24
+ 'PPOS': ppos.replace("PPOS:", "") if ppos else "",
25
+ 'PHON1': phon1.replace("PHON:", "") if phon1 else "",
26
+ 'PHON2': phon2.replace("PHON:", "") if phon2 else "",
27
+ 'COMM': comm if comm else "",
28
+ 'pronunciations': pronunciations
29
+ }
30
+ lemmas[current_lemma].append(lemma_data)
 
 
 
 
31
  else:
32
+ lemma_data = {
33
+ 'word': orto.replace("ORTO:", "") if orto else "",
34
+ 'PPOS': ppos.replace("PPOS:", "") if ppos else "",
35
+ 'PHON1': phon1.replace("PHON:", "") if phon1 else "",
36
+ 'PHON2': phon2.replace("PHON:", "") if phon2 else "",
37
+ 'COMM': comm if comm else "",
38
+ 'pronunciations': pronunciations
39
+ }
40
+ lemmas[current_lemma].append(lemma_data)
41
+
 
 
 
 
42
  print("Loaded lemmas:", lemmas) # Debugging output
43
  return lemmas
44
 
45
  lemmas = load_data()
46
 
47
+ def expand_ppos(ppos):
48
+ matches = re.findall(r'\[([^\]]+)\]', ppos)
49
+ if matches:
50
+ expanded = []
51
+ for match in matches[0]:
52
+ expanded.append(ppos.replace(f'[{matches[0]}]', match))
53
+ return expanded
54
+ else:
55
+ return [ppos]
56
+
57
  def create_noun_table(lemma, forms):
58
  table_data = {
59
+ 'ncmsn==iuu': '', 'ncmsn==duu': '', 'ncfsn==iuu': '', 'ncfsn==duu': '', 'ncnsn==iuu': '', 'ncnsn==duu': '',
60
+ 'ncmsa==iuu': '', 'ncmsa==duu': '', 'ncfsa==iuu': '', 'ncfsa==duu': '', 'ncnsa==iuu': '', 'ncnsa==duu': '',
61
+ 'ncmsd==iuu': '', 'ncmsd==duu': '', 'ncfsd==iuu': '', 'ncfsd==duu': '', 'ncnsd==iuu': '', 'ncnsd==duu': '',
62
+ 'ncmsg==iou': '', 'ncmsg==dou': '', 'ncfsg==iou': '', 'ncfsg==dou': '', 'ncnsg==iou': '', 'ncnsg==dou': '',
63
+ 'ncmpn==iuu': '', 'ncmpn==duu': '', 'ncfnn==iuu': '', 'ncfnn==duu': '', 'ncnnn==iuu': '', 'ncnnn==duu': '',
64
+ 'ncmpa==iuu': '', 'ncmpa==duu': '', 'ncfna==iuu': '', 'ncfna==duu': '', 'ncnna==iuu': '', 'ncnna==duu': '',
65
+ 'ncmpd==iuu': '', 'ncmpd==duu': '', 'ncmpg==iou': '', 'ncmpg==dou': '', 'ncfnn==iou': '', 'ncfnn==dou': '', 'ncnnn==iou': '', 'ncnnn==dou': ''
66
  }
67
 
68
  for form in forms:
69
+ ppos = form['PPOS'].lower() # Normalize to lowercase
70
  word = form['word']
71
+ print(f"Processing: word={word}, ppos={ppos}, key={ppos}")
72
+ expanded_ppos_list = expand_ppos(ppos)
73
+ for expanded_ppos in expanded_ppos_list:
74
+ key = expanded_ppos
75
+ if key in table_data:
76
+ table_data[key] = word
77
+ else:
78
+ print(f"Unmatched key: {key} for word: {word} with PPOS: {ppos}")
79
 
80
  print(f"Final table data for {lemma}: {table_data}") # Debugging output
81
 
 
95
  </thead>
96
  <tbody>
97
  <tr>
98
+ <td>{table_data['ncmsn==iuu'] or table_data['ncfsn==iuu'] or table_data['ncnsn==iuu']}</td>
99
+ <td>{table_data['ncmsn==duu'] or table_data['ncfsn==duu'] or table_data['ncnsn==duu']}</td>
100
+ <td>{table_data['ncmpn==iuu'] or table_data['ncfnn==iuu'] or table_data['ncnnn==iuu']}</td>
101
+ <td>{table_data['ncmpn==duu'] or table_data['ncfnn==duu'] or table_data['ncnnn==duu']}</td>
102
  </tr>
103
  <tr>
104
+ <td>{table_data['ncmsa==iuu'] or table_data['ncfsa==iuu'] or table_data['ncnsa==iuu']}</td>
105
+ <td>{table_data['ncmsa==duu'] or table_data['ncfsa==duu'] or table_data['ncnsa==duu']}</td>
106
+ <td>{table_data['ncmpa==iuu'] or table_data['ncfna==iuu'] or table_data['ncnna==iuu']}</td>
107
+ <td>{table_data['ncmpa==duu'] or table_data['ncfna==duu'] or table_data['ncnna==duu']}</td>
108
  </tr>
109
  <tr>
110
+ <td>{table_data['ncmsd==iuu'] or table_data['ncfsd==iuu'] or table_data['ncnsd==iuu']}</td>
111
+ <td>{table_data['ncmsd==duu'] or table_data['ncfsd==duu'] or table_data['ncnsd==duu']}</td>
112
+ <td>{table_data['ncmpd==iuu'] or table_data['ncfnn==iuu'] or table_data['ncnnn==iuu']}</td>
113
+ <td>{table_data['ncmpd==duu'] or table_data['ncfnn==duu'] or table_data['ncnnn==duu']}</td>
114
  </tr>
115
  <tr>
116
+ <td>{table_data['ncmsg==iou'] or table_data['ncfsg==iou'] or table_data['ncnsg==iou']}</td>
117
+ <td>{table_data['ncmsg==dou'] or table_data['ncfsg==dou'] or table_data['ncnsg==dou']}</td>
118
+ <td>{table_data['ncmpg==iou'] or table_data['ncfnn==iou'] or table_data['ncnnn==iou']}</td>
119
+ <td>{table_data['ncmpg==dou'] or table_data['ncfnn==dou'] or table_data['ncnnn==dou']}</td>
120
  </tr>
121
  </tbody>
122
  </table>