File size: 5,606 Bytes
38116a9 34202a3 38116a9 34202a3 8f7bb50 34202a3 38116a9 05dab99 38116a9 9fcb0ad 4095e7a fdcf728 4095e7a 4f5f3f9 4095e7a 971f291 4095e7a 4f5f3f9 645b14b 56e2fd6 645b14b c78e3ff 9fcb0ad fdcf728 56e2fd6 fdcf728 56e2fd6 fdcf728 56e2fd6 fdcf728 56e2fd6 9fcb0ad 38116a9 9fcb0ad 971f291 4095e7a 9fcb0ad 38116a9 9fcb0ad 38116a9 56e2fd6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import gradio as gr
import pandas as pd
# Load and parse the CSV file from Hugging Face
def load_data():
url = "https://huggingface.co/datasets/unijoh/RAVNlex/resolve/main/RAVNlex_small.csv"
df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', dtype=str)
lemmas = {}
current_lemma = None
for row in df.itertuples(index=False, name=None):
if len(row) < 5:
print(f"Skipping problematic line: {row}")
continue
orto, ppos, phon1, phon2, comm, *pronunciations = map(lambda x: x if isinstance(x, str) else "", row)
if orto == '---':
current_lemma = None
elif current_lemma is None:
current_lemma = orto.replace("ORTO:", "")
lemmas[current_lemma] = []
else:
lemma_data = {
'word': orto.replace("ORTO:", "") if orto else "",
'PPOS': ppos.replace("PPOS:", "") if ppos else "",
'PHON1': phon1.replace("PHON:", "") if phon1 else "",
'PHON2': phon2.replace("PHON:", "") if phon2 else "",
'COMM': comm if comm else "",
'pronunciations': pronunciations
}
lemmas[current_lemma].append(lemma_data)
print("Loaded lemmas:", lemmas) # Debugging output
return lemmas
lemmas = load_data()
def create_noun_table(lemma, forms):
table_data = {
'ncmsn==iuu': '', 'ncmsn==duu': '', 'ncfsn==iuu': '', 'ncfsn==duu': '', 'ncnsn==iuu': '', 'ncnsn==duu': '',
'ncmsa==iuu': '', 'ncmsa==duu': '', 'ncfsa==iuu': '', 'ncfsa==duu': '', 'ncnsa==iuu': '', 'ncnsa==duu': '',
'ncmsd==iuu': '', 'ncmsd==duu': '', 'ncfsd==iuu': '', 'ncfsd==duu': '', 'ncnsd==iuu': '', 'ncnsd==duu': '',
'ncmsg==iou': '', 'ncmsg==dou': '', 'ncfsg==iou': '', 'ncfsg==dou': '', 'ncnsg==iou': '', 'ncnsg==dou': '',
'ncmpn==iuu': '', 'ncmpn==duu': '', 'ncfnn==iuu': '', 'ncfnn==duu': '', 'ncnnn==iuu': '', 'ncnnn==duu': '',
'ncmpa==iuu': '', 'ncmpa==duu': '', 'ncfna==iuu': '', 'ncfna==duu': '', 'ncnna==iuu': '', 'ncnna==duu': '',
'ncmpd==iuu': '', 'ncmpd==duu': '', 'ncfnn==iuu': '', 'ncfnn==duu': '', 'ncnnn==iuu': '', 'ncnnn==duu': '',
'ncmpg==iou': '', 'ncmpg==dou': '', 'ncfnn==iou': '', 'ncfnn==dou': '', 'ncnnn==iou': '', 'ncnnn==dou': ''
}
for form in forms:
ppos = form['PPOS'].lower() # Normalize to lowercase
word = form['word']
key = ppos # Use full PPOS for the key
if key in table_data:
table_data[key] = word
else:
print(f"Unmatched key: {key} for word: {word} with PPOS: {ppos}")
print(f"Final table data for {lemma}: {table_data}") # Debugging output
table = f"""
<table border="1">
<thead>
<tr>
<th colspan="2">Eintal</th>
<th colspan="2">Fleirtal</th>
</tr>
<tr>
<th>Óbundið</th>
<th>Bundið</th>
<th>Óbundið</th>
<th>Bundið</th>
</tr>
</thead>
<tbody>
<tr>
<td>{table_data['ncmsn==iuu'] or table_data['ncfsn==iuu'] or table_data['ncnsn==iuu']}</td>
<td>{table_data['ncmsn==duu'] or table_data['ncfsn==duu'] or table_data['ncnsn==duu']}</td>
<td>{table_data['ncmpn==iuu'] or table_data['ncfnn==iuu'] or table_data['ncnnn==iuu']}</td>
<td>{table_data['ncmpn==duu'] or table_data['ncfnn==duu'] or table_data['ncnnn==duu']}</td>
</tr>
<tr>
<td>{table_data['ncmsa==iuu'] or table_data['ncfsa==iuu'] or table_data['ncnsa==iuu']}</td>
<td>{table_data['ncmsa==duu'] or table_data['ncfsa==duu'] or table_data['ncnsa==duu']}</td>
<td>{table_data['ncmpa==iuu'] or table_data['ncfna==iuu'] or table_data['ncnna==iuu']}</td>
<td>{table_data['ncmpa==duu'] or table_data['ncfna==duu'] or table_data['ncnna==duu']}</td>
</tr>
<tr>
<td>{table_data['ncmsd==iuu'] or table_data['ncfsd==iuu'] or table_data['ncnsd==iuu']}</td>
<td>{table_data['ncmsd==duu'] or table_data['ncfsd==duu'] or table_data['ncnsd==duu']}</td>
<td>{table_data['ncmpd==iuu'] or table_data['ncfnn==iuu'] or table_data['ncnnn==iuu']}</td>
<td>{table_data['ncmpd==duu'] or table_data['ncfnn==duu'] or table_data['ncnnn==duu']}</td>
</tr>
<tr>
<td>{table_data['ncmsg==iou'] or table_data['ncfsg==iou'] or table_data['ncnsg==iou']}</td>
<td>{table_data['ncmsg==dou'] or table_data['ncfsg==dou'] or table_data['ncnsg==dou']}</td>
<td>{table_data['ncmpg==iou'] or table_data['ncfnn==iou'] or table_data['ncnnn==iou']}</td>
<td>{table_data['ncmpg==dou'] or table_data['ncfnn==dou'] or table_data['ncnnn==dou']}</td>
</tr>
</tbody>
</table>
"""
return table
def search_lemma(lemma):
results = lemmas.get(lemma, None)
if not results:
return f"No results found for {lemma}"
if 'n' in results[0]['PPOS'].lower():
table = create_noun_table(lemma, results)
else:
table = "Only noun tables are currently supported."
return table
iface = gr.Interface(
fn=search_lemma,
inputs="text",
outputs="html",
title="Lemma Search",
description="Enter a lemma to search for its declensions and pronunciations."
)
if __name__ == "__main__":
iface.launch()
|