|
import gradio as gr |
|
import pandas as pd |
|
|
|
|
|
def load_data(): |
|
url = "https://huggingface.co/datasets/unijoh/RAVNlex/resolve/main/RAVNlex.csv" |
|
df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', names=["#ORTO", "#PPOS", "#PHON1", "#PHON2", "#COMM"], dtype=str) |
|
lemmas = {} |
|
current_lemma = None |
|
|
|
for _, row in df.iterrows(): |
|
if pd.isna(row['#ORTO']) or row['#ORTO'] == '---': |
|
current_lemma = None |
|
elif current_lemma is None: |
|
current_lemma = row['#ORTO'].replace("ORTO:", "") |
|
lemmas[current_lemma] = [] |
|
else: |
|
lemma_data = { |
|
'word': row['#ORTO'].replace("ORTO:", "") if pd.notna(row['#ORTO']) else "", |
|
'PPOS': row['#PPOS'].replace("PPOS:", "") if pd.notna(row['#PPOS']) else "", |
|
'PHON1': row['#PHON1'].replace("PHON:", "") if pd.notna(row['#PHON1']) else "", |
|
'PHON2': row['#PHON2'].replace("PHON:", "") if pd.notna(row['#PHON2']) else "", |
|
'COMM': row['#COMM'] if pd.notna(row['#COMM']) else "" |
|
} |
|
lemmas[current_lemma].append(lemma_data) |
|
|
|
return lemmas |
|
|
|
lemmas = load_data() |
|
|
|
def create_noun_table(lemma, forms): |
|
|
|
table_data = { |
|
'Nsns': '', 'Nsnst': '', 'Nsas': '', 'Nsast': '', |
|
'Nsds': '', 'Nsdst': '', 'Nsgs': '', 'Nsgst': '', |
|
'Npns': '', 'Npnst': '', 'Npas': '', 'Npast': '', |
|
'Npds': '', 'Npdt': '', 'Npgs': '', 'Npgst': '' |
|
} |
|
|
|
|
|
for form in forms: |
|
ppos = form['PPOS'] |
|
word = form['word'] |
|
key = ppos[1:5] |
|
if key in table_data: |
|
table_data[key] = word |
|
|
|
|
|
table = f""" |
|
<table border="1"> |
|
<thead> |
|
<tr> |
|
<th colspan="2">Eintal</th> |
|
<th colspan="2">Fleirtal</th> |
|
</tr> |
|
<tr> |
|
<th>Óbundið</th> |
|
<th>Bundið</th> |
|
<th>Óbundið</th> |
|
<th>Bundið</th> |
|
</tr> |
|
</thead> |
|
<tbody> |
|
<tr> |
|
<td>{table_data['Nsns']}</td> |
|
<td>{table_data['Nsnst']}</td> |
|
<td>{table_data['Npns']}</td> |
|
<td>{table_data['Npnst']}</td> |
|
</tr> |
|
<tr> |
|
<td>{table_data['Nsas']}</td> |
|
<td>{table_data['Nsast']}</td> |
|
<td>{table_data['Npas']}</td> |
|
<td>{table_data['Npast']}</td> |
|
</tr> |
|
<tr> |
|
<td>{table_data['Nsds']}</td> |
|
<td>{table_data['Nsdst']}</td> |
|
<td>{table_data['Npds']}</td> |
|
<td>{table_data['Npdt']}</td> |
|
</tr> |
|
<tr> |
|
<td>{table_data['Nsgs']}</td> |
|
<td>{table_data['Nsgst']}</td> |
|
<td>{table_data['Npgs']}</td> |
|
<td>{table_data['Npgst']}</td> |
|
</tr> |
|
</tbody> |
|
</table> |
|
""" |
|
return table |
|
|
|
def search_lemma(lemma): |
|
results = lemmas.get(lemma, None) |
|
if not results: |
|
return f"No results found for {lemma}" |
|
|
|
if 'N' in results[0]['PPOS']: |
|
table = create_noun_table(lemma, results) |
|
else: |
|
table = "Only noun tables are currently supported." |
|
|
|
return table |
|
|
|
iface = gr.Interface( |
|
fn=search_lemma, |
|
inputs="text", |
|
outputs="html", |
|
title="Lemma Search", |
|
description="Enter a lemma to search for its declensions and pronunciations." |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|