unijoh commited on
Commit
37c5513
·
verified ·
1 Parent(s): eb06599

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -37
app.py CHANGED
@@ -8,7 +8,22 @@ def load_data():
8
  df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', dtype=str)
9
  lemmas = {}
10
  current_lemma = None
11
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  for row in df.itertuples(index=False, name=None):
13
  if len(row) < 5:
14
  print(f"Skipping problematic line: {row}")
@@ -19,41 +34,35 @@ def load_data():
19
  elif current_lemma is None:
20
  current_lemma = orto.replace("ORTO:", "")
21
  lemmas[current_lemma] = []
22
- lemma_data = {
23
- 'word': current_lemma,
24
- 'PPOS': ppos.replace("PPOS:", "") if ppos else "",
25
- 'PHON1': phon1.replace("PHON:", "") if phon1 else "",
26
- 'PHON2': phon2.replace("PHON:", "") if phon2 else "",
27
- 'COMM': comm if comm else "",
28
- 'pronunciations': pronunciations
29
- }
30
- lemmas[current_lemma].append(lemma_data)
 
 
31
  else:
32
- lemma_data = {
33
- 'word': orto.replace("ORTO:", "") if orto else "",
34
- 'PPOS': ppos.replace("PPOS:", "") if ppos else "",
35
- 'PHON1': phon1.replace("PHON:", "") if phon1 else "",
36
- 'PHON2': phon2.replace("PHON:", "") if phon2 else "",
37
- 'COMM': comm if comm else "",
38
- 'pronunciations': pronunciations
39
- }
40
- lemmas[current_lemma].append(lemma_data)
41
-
 
 
42
  print("Loaded lemmas:", lemmas) # Debugging output
43
  return lemmas
44
 
45
  lemmas = load_data()
46
 
47
- def expand_ppos(ppos):
48
- matches = re.findall(r'\[([^\]]+)\]', ppos)
49
- if matches:
50
- expanded = []
51
- for match in matches[0]:
52
- expanded.append(ppos.replace(f'[{matches[0]}]', match))
53
- return expanded
54
- else:
55
- return [ppos]
56
-
57
  def create_noun_table(lemma, forms):
58
  table_data = {
59
  'ncmsn==iuu': '', 'ncmsn==duu': '', 'ncfsn==iuu': '', 'ncfsn==duu': '', 'ncnsn==iuu': '', 'ncnsn==duu': '',
@@ -69,13 +78,10 @@ def create_noun_table(lemma, forms):
69
  ppos = form['PPOS'].lower() # Normalize to lowercase
70
  word = form['word']
71
  print(f"Processing: word={word}, ppos={ppos}, key={ppos}")
72
- expanded_ppos_list = expand_ppos(ppos)
73
- for expanded_ppos in expanded_ppos_list:
74
- key = expanded_ppos
75
- if key in table_data:
76
- table_data[key] = word
77
- else:
78
- print(f"Unmatched key: {key} for word: {word} with PPOS: {ppos}")
79
 
80
  print(f"Final table data for {lemma}: {table_data}") # Debugging output
81
 
 
8
  df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', dtype=str)
9
  lemmas = {}
10
  current_lemma = None
11
+
12
+ def expand_ppos(ppos):
13
+ parts = ppos.split('==')
14
+ tag = parts[0]
15
+ rest = parts[1] if len(parts) > 1 else ''
16
+ expanded = []
17
+ bracket_matches = re.findall(r'\[([^\]]+)\]', tag)
18
+ if bracket_matches:
19
+ for match in bracket_matches:
20
+ for char in match:
21
+ new_tag = tag.replace(f'[{match}]', char)
22
+ expanded.append(f"{new_tag}=={rest}")
23
+ else:
24
+ expanded.append(ppos)
25
+ return expanded
26
+
27
  for row in df.itertuples(index=False, name=None):
28
  if len(row) < 5:
29
  print(f"Skipping problematic line: {row}")
 
34
  elif current_lemma is None:
35
  current_lemma = orto.replace("ORTO:", "")
36
  lemmas[current_lemma] = []
37
+ expanded_ppos = expand_ppos(ppos.replace("PPOS:", "") if ppos else "")
38
+ for ep in expanded_ppos:
39
+ lemma_data = {
40
+ 'word': current_lemma,
41
+ 'PPOS': ep,
42
+ 'PHON1': phon1.replace("PHON:", "") if phon1 else "",
43
+ 'PHON2': phon2.replace("PHON:", "") if phon2 else "",
44
+ 'COMM': comm if comm else "",
45
+ 'pronunciations': pronunciations
46
+ }
47
+ lemmas[current_lemma].append(lemma_data)
48
  else:
49
+ expanded_ppos = expand_ppos(ppos.replace("PPOS:", "") if ppos else "")
50
+ for ep in expanded_ppos:
51
+ lemma_data = {
52
+ 'word': orto.replace("ORTO:", "") if orto else "",
53
+ 'PPOS': ep,
54
+ 'PHON1': phon1.replace("PHON:", "") if phon1 else "",
55
+ 'PHON2': phon2.replace("PHON:", "") if phon2 else "",
56
+ 'COMM': comm if comm else "",
57
+ 'pronunciations': pronunciations
58
+ }
59
+ lemmas[current_lemma].append(lemma_data)
60
+
61
  print("Loaded lemmas:", lemmas) # Debugging output
62
  return lemmas
63
 
64
  lemmas = load_data()
65
 
 
 
 
 
 
 
 
 
 
 
66
  def create_noun_table(lemma, forms):
67
  table_data = {
68
  'ncmsn==iuu': '', 'ncmsn==duu': '', 'ncfsn==iuu': '', 'ncfsn==duu': '', 'ncnsn==iuu': '', 'ncnsn==duu': '',
 
78
  ppos = form['PPOS'].lower() # Normalize to lowercase
79
  word = form['word']
80
  print(f"Processing: word={word}, ppos={ppos}, key={ppos}")
81
+ if ppos in table_data:
82
+ table_data[ppos] = word
83
+ else:
84
+ print(f"Unmatched key: {ppos} for word: {word} with PPOS: {ppos}")
 
 
 
85
 
86
  print(f"Final table data for {lemma}: {table_data}") # Debugging output
87