unijoh commited on
Commit
18a5f3f
·
verified ·
1 Parent(s): 343fbe1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -24
app.py CHANGED
@@ -1,28 +1,34 @@
1
  import gradio as gr
2
  import pandas as pd
 
3
 
4
  # Load and parse the CSV file from Hugging Face
5
  def load_data():
6
- url = "https://huggingface.co/datasets/unijoh/RAVNlex/blob/main/RAVNlex.csv"
7
- df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', names=["#ORTO", "#PPOS", "#PHON1", "#PHON2", "#COMM"], dtype=str)
8
  lemmas = {}
9
  current_lemma = None
10
 
11
- for _, row in df.iterrows():
12
- if pd.isna(row['#ORTO']) or row['#ORTO'] == '---':
13
- current_lemma = None
14
- elif current_lemma is None:
15
- current_lemma = row['#ORTO'].replace("ORTO:", "")
16
- lemmas[current_lemma] = []
17
- else:
18
- lemma_data = {
19
- 'word': row['#ORTO'].replace("ORTO:", "") if pd.notna(row['#ORTO']) else "",
20
- 'PPOS': row['#PPOS'].replace("PPOS:", "") if pd.notna(row['#PPOS']) else "",
21
- 'PHON1': row['#PHON1'].replace("PHON:", "") if pd.notna(row['#PHON1']) else "",
22
- 'PHON2': row['#PHON2'].replace("PHON:", "") if pd.notna(row['#PHON2']) else "",
23
- 'COMM': row['#COMM'] if pd.notna(row['#COMM']) else ""
24
- }
25
- lemmas[current_lemma].append(lemma_data)
 
 
 
 
 
 
26
 
27
  print("Loaded lemmas:", lemmas) # Debugging output
28
  return lemmas
@@ -31,10 +37,11 @@ lemmas = load_data()
31
 
32
  def create_noun_table(lemma, forms):
33
  table_data = {
34
- 'ncmns': '', 'ncmsn==duu': '', 'ncmsa': '', 'ncmsa==duu': '',
35
- 'ncmsd': '', 'ncmsd==duu': '', 'ncmsg': '', 'ncmsg==dou': '',
36
- 'ncmpn': '', 'ncmpn==duu': '', 'ncmpa': '', 'ncmpa==duu': '',
37
- 'ncmpd': '', 'ncmpd==duu': '', 'ncmpg': '', 'ncmpg==dou': ''
 
38
  }
39
 
40
  for form in forms:
@@ -63,9 +70,9 @@ def create_noun_table(lemma, forms):
63
  </thead>
64
  <tbody>
65
  <tr>
66
- <td>{table_data['ncmns']}</td>
67
- <td>{table_data['ncmsn==duu']}</td>
68
- <td>{table_data['ncmpn']}</td>
69
  <td>{table_data['ncmpn==duu']}</td>
70
  </tr>
71
  <tr>
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import csv
4
 
5
  # Load and parse the CSV file from Hugging Face
6
  def load_data():
7
+ url = "https://huggingface.co/datasets/unijoh/RAVNlex/blob/main/RAVNlex_small.csv"
 
8
  lemmas = {}
9
  current_lemma = None
10
 
11
+ with open(url, 'r', encoding='iso-8859-10') as file:
12
+ reader = csv.reader(file, delimiter='\t')
13
+ for row in reader:
14
+ if len(row) != 5:
15
+ print(f"Skipping problematic line {reader.line_num}: {row}")
16
+ continue
17
+ orto, ppos, phon1, phon2, comm = row
18
+ if orto == '---':
19
+ current_lemma = None
20
+ elif current_lemma is None:
21
+ current_lemma = orto.replace("ORTO:", "")
22
+ lemmas[current_lemma] = []
23
+ else:
24
+ lemma_data = {
25
+ 'word': orto.replace("ORTO:", "") if orto else "",
26
+ 'PPOS': ppos.replace("PPOS:", "") if ppos else "",
27
+ 'PHON1': phon1.replace("PHON:", "") if phon1 else "",
28
+ 'PHON2': phon2.replace("PHON:", "") if phon2 else "",
29
+ 'COMM': comm if comm else ""
30
+ }
31
+ lemmas[current_lemma].append(lemma_data)
32
 
33
  print("Loaded lemmas:", lemmas) # Debugging output
34
  return lemmas
 
37
 
38
  def create_noun_table(lemma, forms):
39
  table_data = {
40
+ 'nsmns': '', 'nsns': '', 'nsmsn': '', 'nsas': '', 'nsds': '', 'nsgs': '',
41
+ 'npns': '', 'npas': '', 'npds': '', 'npgs': '',
42
+ 'nsmsn==duu': '', 'ncmsa': '', 'ncmsd': '', 'ncmsg': '',
43
+ 'ncmsd==duu': '', 'ncmsg==dou': '', 'ncmpn': '', 'ncmpa': '', 'ncmpd': '', 'ncmpg': '',
44
+ 'ncmpn==duu': '', 'ncmpa==duu': '', 'ncmpd==duu': '', 'ncmpg==dou': ''
45
  }
46
 
47
  for form in forms:
 
70
  </thead>
71
  <tbody>
72
  <tr>
73
+ <td>{table_data['nsns']}</td>
74
+ <td>{table_data['nsmsn==duu']}</td>
75
+ <td>{table_data['npns']}</td>
76
  <td>{table_data['ncmpn==duu']}</td>
77
  </tr>
78
  <tr>