File size: 3,711 Bytes
38116a9 1e1d6b5 413b7fa 38116a9 413b7fa 38116a9 4095e7a 38116a9 9fcb0ad 4095e7a 645b14b 4095e7a 9fcb0ad 4095e7a 9fcb0ad 4095e7a 9fcb0ad 4095e7a 9fcb0ad 4095e7a 9fcb0ad 38116a9 9fcb0ad 4095e7a 9fcb0ad 38116a9 9fcb0ad 38116a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import gradio as gr
import pandas as pd
# Load and parse the CSV file from Hugging Face
def load_data():
url = "https://huggingface.co/datasets/unijoh/RAVNlex/resolve/main/RAVNlex.csv"
df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', names=["#ORTO", "#PPOS", "#PHON1", "#PHON2", "#COMM"], dtype=str)
lemmas = {}
current_lemma = None
for _, row in df.iterrows():
if pd.isna(row['#ORTO']) or row['#ORTO'] == '---':
current_lemma = None
elif current_lemma is None:
current_lemma = row['#ORTO'].replace("ORTO:", "")
lemmas[current_lemma] = []
else:
lemma_data = {
'word': row['#ORTO'].replace("ORTO:", "") if pd.notna(row['#ORTO']) else "",
'PPOS': row['#PPOS'].replace("PPOS:", "") if pd.notna(row['#PPOS']) else "",
'PHON1': row['#PHON1'].replace("PHON:", "") if pd.notna(row['#PHON1']) else "",
'PHON2': row['#PHON2'].replace("PHON:", "") if pd.notna(row['#PHON2']) else "",
'COMM': row['#COMM'] if pd.notna(row['#COMM']) else ""
}
lemmas[current_lemma].append(lemma_data)
return lemmas
lemmas = load_data()
def create_noun_table(lemma, forms):
# Initialize the table structure
table_data = {
'Nsns': '', 'Nsnst': '', 'Nsas': '', 'Nsast': '',
'Nsds': '', 'Nsdst': '', 'Nsgs': '', 'Nsgst': '',
'Npns': '', 'Npnst': '', 'Npas': '', 'Npast': '',
'Npds': '', 'Npdt': '', 'Npgs': '', 'Npgst': ''
}
# Fill the table data based on PPOS
for form in forms:
ppos = form['PPOS']
word = form['word']
key = ppos[1:5] # Extracting relevant part of PPOS
if key in table_data:
table_data[key] = word
# Create the HTML table
table = f"""
<table border="1">
<thead>
<tr>
<th colspan="2">Eintal</th>
<th colspan="2">Fleirtal</th>
</tr>
<tr>
<th>Óbundið</th>
<th>Bundið</th>
<th>Óbundið</th>
<th>Bundið</th>
</tr>
</thead>
<tbody>
<tr>
<td>{table_data['Nsns']}</td>
<td>{table_data['Nsnst']}</td>
<td>{table_data['Npns']}</td>
<td>{table_data['Npnst']}</td>
</tr>
<tr>
<td>{table_data['Nsas']}</td>
<td>{table_data['Nsast']}</td>
<td>{table_data['Npas']}</td>
<td>{table_data['Npast']}</td>
</tr>
<tr>
<td>{table_data['Nsds']}</td>
<td>{table_data['Nsdst']}</td>
<td>{table_data['Npds']}</td>
<td>{table_data['Npdt']}</td>
</tr>
<tr>
<td>{table_data['Nsgs']}</td>
<td>{table_data['Nsgst']}</td>
<td>{table_data['Npgs']}</td>
<td>{table_data['Npgst']}</td>
</tr>
</tbody>
</table>
"""
return table
def search_lemma(lemma):
results = lemmas.get(lemma, None)
if not results:
return f"No results found for {lemma}"
if 'N' in results[0]['PPOS']:
table = create_noun_table(lemma, results)
else:
table = "Only noun tables are currently supported."
return table
iface = gr.Interface(
fn=search_lemma,
inputs="text",
outputs="html",
title="Lemma Search",
description="Enter a lemma to search for its declensions and pronunciations."
)
if __name__ == "__main__":
iface.launch()
|