File size: 3,711 Bytes
38116a9
 
 
 
 
1e1d6b5
413b7fa
38116a9
 
 
 
413b7fa
38116a9
 
 
 
 
 
4095e7a
38116a9
 
 
 
 
 
 
 
 
 
 
9fcb0ad
4095e7a
 
 
 
 
 
 
 
 
 
 
 
645b14b
 
 
 
4095e7a
9fcb0ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4095e7a
 
 
 
9fcb0ad
 
4095e7a
 
 
 
9fcb0ad
 
4095e7a
 
 
 
9fcb0ad
 
4095e7a
 
 
 
9fcb0ad
 
 
 
 
 
38116a9
 
 
 
9fcb0ad
 
4095e7a
9fcb0ad
 
 
 
38116a9
 
 
 
9fcb0ad
38116a9
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import gradio as gr
import pandas as pd

# Load and parse the CSV file from Hugging Face
def load_data():
    url = "https://huggingface.co/datasets/unijoh/RAVNlex/resolve/main/RAVNlex.csv"
    df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', names=["#ORTO", "#PPOS", "#PHON1", "#PHON2", "#COMM"], dtype=str)
    lemmas = {}
    current_lemma = None
    
    for _, row in df.iterrows():
        if pd.isna(row['#ORTO']) or row['#ORTO'] == '---':
            current_lemma = None
        elif current_lemma is None:
            current_lemma = row['#ORTO'].replace("ORTO:", "")
            lemmas[current_lemma] = []
        else:
            lemma_data = {
                'word': row['#ORTO'].replace("ORTO:", "") if pd.notna(row['#ORTO']) else "",
                'PPOS': row['#PPOS'].replace("PPOS:", "") if pd.notna(row['#PPOS']) else "",
                'PHON1': row['#PHON1'].replace("PHON:", "") if pd.notna(row['#PHON1']) else "",
                'PHON2': row['#PHON2'].replace("PHON:", "") if pd.notna(row['#PHON2']) else "",
                'COMM': row['#COMM'] if pd.notna(row['#COMM']) else ""
            }
            lemmas[current_lemma].append(lemma_data)
    
    return lemmas

lemmas = load_data()

def create_noun_table(lemma, forms):
    # Initialize the table structure
    table_data = {
        'Nsns': '', 'Nsnst': '', 'Nsas': '', 'Nsast': '',
        'Nsds': '', 'Nsdst': '', 'Nsgs': '', 'Nsgst': '',
        'Npns': '', 'Npnst': '', 'Npas': '', 'Npast': '',
        'Npds': '', 'Npdt': '', 'Npgs': '', 'Npgst': ''
    }
    
    # Fill the table data based on PPOS
    for form in forms:
        ppos = form['PPOS']
        word = form['word']
        key = ppos[1:5]  # Extracting relevant part of PPOS
        if key in table_data:
            table_data[key] = word

    # Create the HTML table
    table = f"""
    <table border="1">
        <thead>
            <tr>
                <th colspan="2">Eintal</th>
                <th colspan="2">Fleirtal</th>
            </tr>
            <tr>
                <th>Óbundið</th>
                <th>Bundið</th>
                <th>Óbundið</th>
                <th>Bundið</th>
            </tr>
        </thead>
        <tbody>
            <tr>
                <td>{table_data['Nsns']}</td>
                <td>{table_data['Nsnst']}</td>
                <td>{table_data['Npns']}</td>
                <td>{table_data['Npnst']}</td>
            </tr>
            <tr>
                <td>{table_data['Nsas']}</td>
                <td>{table_data['Nsast']}</td>
                <td>{table_data['Npas']}</td>
                <td>{table_data['Npast']}</td>
            </tr>
            <tr>
                <td>{table_data['Nsds']}</td>
                <td>{table_data['Nsdst']}</td>
                <td>{table_data['Npds']}</td>
                <td>{table_data['Npdt']}</td>
            </tr>
            <tr>
                <td>{table_data['Nsgs']}</td>
                <td>{table_data['Nsgst']}</td>
                <td>{table_data['Npgs']}</td>
                <td>{table_data['Npgst']}</td>
            </tr>
        </tbody>
    </table>
    """
    return table

def search_lemma(lemma):
    results = lemmas.get(lemma, None)
    if not results:
        return f"No results found for {lemma}"
    
    if 'N' in results[0]['PPOS']:
        table = create_noun_table(lemma, results)
    else:
        table = "Only noun tables are currently supported."

    return table

iface = gr.Interface(
    fn=search_lemma,
    inputs="text",
    outputs="html",
    title="Lemma Search",
    description="Enter a lemma to search for its declensions and pronunciations."
)

if __name__ == "__main__":
    iface.launch()