ord / app.py
unijoh's picture
Update app.py
645b14b verified
raw
history blame
3.71 kB
import gradio as gr
import pandas as pd
# Load and parse the CSV file from Hugging Face
def load_data():
url = "https://huggingface.co/datasets/unijoh/RAVNlex/resolve/main/RAVNlex.csv"
df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', names=["#ORTO", "#PPOS", "#PHON1", "#PHON2", "#COMM"], dtype=str)
lemmas = {}
current_lemma = None
for _, row in df.iterrows():
if pd.isna(row['#ORTO']) or row['#ORTO'] == '---':
current_lemma = None
elif current_lemma is None:
current_lemma = row['#ORTO'].replace("ORTO:", "")
lemmas[current_lemma] = []
else:
lemma_data = {
'word': row['#ORTO'].replace("ORTO:", "") if pd.notna(row['#ORTO']) else "",
'PPOS': row['#PPOS'].replace("PPOS:", "") if pd.notna(row['#PPOS']) else "",
'PHON1': row['#PHON1'].replace("PHON:", "") if pd.notna(row['#PHON1']) else "",
'PHON2': row['#PHON2'].replace("PHON:", "") if pd.notna(row['#PHON2']) else "",
'COMM': row['#COMM'] if pd.notna(row['#COMM']) else ""
}
lemmas[current_lemma].append(lemma_data)
return lemmas
lemmas = load_data()
def create_noun_table(lemma, forms):
# Initialize the table structure
table_data = {
'Nsns': '', 'Nsnst': '', 'Nsas': '', 'Nsast': '',
'Nsds': '', 'Nsdst': '', 'Nsgs': '', 'Nsgst': '',
'Npns': '', 'Npnst': '', 'Npas': '', 'Npast': '',
'Npds': '', 'Npdt': '', 'Npgs': '', 'Npgst': ''
}
# Fill the table data based on PPOS
for form in forms:
ppos = form['PPOS']
word = form['word']
key = ppos[1:5] # Extracting relevant part of PPOS
if key in table_data:
table_data[key] = word
# Create the HTML table
table = f"""
<table border="1">
<thead>
<tr>
<th colspan="2">Eintal</th>
<th colspan="2">Fleirtal</th>
</tr>
<tr>
<th>Óbundið</th>
<th>Bundið</th>
<th>Óbundið</th>
<th>Bundið</th>
</tr>
</thead>
<tbody>
<tr>
<td>{table_data['Nsns']}</td>
<td>{table_data['Nsnst']}</td>
<td>{table_data['Npns']}</td>
<td>{table_data['Npnst']}</td>
</tr>
<tr>
<td>{table_data['Nsas']}</td>
<td>{table_data['Nsast']}</td>
<td>{table_data['Npas']}</td>
<td>{table_data['Npast']}</td>
</tr>
<tr>
<td>{table_data['Nsds']}</td>
<td>{table_data['Nsdst']}</td>
<td>{table_data['Npds']}</td>
<td>{table_data['Npdt']}</td>
</tr>
<tr>
<td>{table_data['Nsgs']}</td>
<td>{table_data['Nsgst']}</td>
<td>{table_data['Npgs']}</td>
<td>{table_data['Npgst']}</td>
</tr>
</tbody>
</table>
"""
return table
def search_lemma(lemma):
results = lemmas.get(lemma, None)
if not results:
return f"No results found for {lemma}"
if 'N' in results[0]['PPOS']:
table = create_noun_table(lemma, results)
else:
table = "Only noun tables are currently supported."
return table
iface = gr.Interface(
fn=search_lemma,
inputs="text",
outputs="html",
title="Lemma Search",
description="Enter a lemma to search for its declensions and pronunciations."
)
if __name__ == "__main__":
iface.launch()