File size: 5,606 Bytes
38116a9
 
 
 
 
34202a3
 
38116a9
 
 
34202a3
 
 
 
8f7bb50
34202a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38116a9
05dab99
38116a9
 
 
 
9fcb0ad
4095e7a
fdcf728
 
 
 
 
 
 
 
4095e7a
4f5f3f9
4095e7a
971f291
4095e7a
4f5f3f9
645b14b
 
56e2fd6
 
645b14b
c78e3ff
 
9fcb0ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fdcf728
 
 
 
56e2fd6
 
fdcf728
 
 
 
56e2fd6
 
fdcf728
 
 
 
56e2fd6
 
fdcf728
 
 
 
56e2fd6
9fcb0ad
 
 
 
 
38116a9
 
 
 
9fcb0ad
971f291
4095e7a
9fcb0ad
 
 
 
38116a9
 
 
 
9fcb0ad
38116a9
 
 
 
 
56e2fd6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
import pandas as pd

# Load and parse the CSV file from Hugging Face
def load_data():
    url = "https://huggingface.co/datasets/unijoh/RAVNlex/resolve/main/RAVNlex_small.csv"
    df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', dtype=str)
    lemmas = {}
    current_lemma = None
    
    for row in df.itertuples(index=False, name=None):
        if len(row) < 5:
            print(f"Skipping problematic line: {row}")
            continue
        orto, ppos, phon1, phon2, comm, *pronunciations = map(lambda x: x if isinstance(x, str) else "", row)
        if orto == '---':
            current_lemma = None
        elif current_lemma is None:
            current_lemma = orto.replace("ORTO:", "")
            lemmas[current_lemma] = []
        else:
            lemma_data = {
                'word': orto.replace("ORTO:", "") if orto else "",
                'PPOS': ppos.replace("PPOS:", "") if ppos else "",
                'PHON1': phon1.replace("PHON:", "") if phon1 else "",
                'PHON2': phon2.replace("PHON:", "") if phon2 else "",
                'COMM': comm if comm else "",
                'pronunciations': pronunciations
            }
            lemmas[current_lemma].append(lemma_data)
    
    print("Loaded lemmas:", lemmas)  # Debugging output
    return lemmas

lemmas = load_data()

def create_noun_table(lemma, forms):
    table_data = {
        'ncmsn==iuu': '', 'ncmsn==duu': '', 'ncfsn==iuu': '', 'ncfsn==duu': '', 'ncnsn==iuu': '', 'ncnsn==duu': '',
        'ncmsa==iuu': '', 'ncmsa==duu': '', 'ncfsa==iuu': '', 'ncfsa==duu': '', 'ncnsa==iuu': '', 'ncnsa==duu': '',
        'ncmsd==iuu': '', 'ncmsd==duu': '', 'ncfsd==iuu': '', 'ncfsd==duu': '', 'ncnsd==iuu': '', 'ncnsd==duu': '',
        'ncmsg==iou': '', 'ncmsg==dou': '', 'ncfsg==iou': '', 'ncfsg==dou': '', 'ncnsg==iou': '', 'ncnsg==dou': '',
        'ncmpn==iuu': '', 'ncmpn==duu': '', 'ncfnn==iuu': '', 'ncfnn==duu': '', 'ncnnn==iuu': '', 'ncnnn==duu': '',
        'ncmpa==iuu': '', 'ncmpa==duu': '', 'ncfna==iuu': '', 'ncfna==duu': '', 'ncnna==iuu': '', 'ncnna==duu': '',
        'ncmpd==iuu': '', 'ncmpd==duu': '', 'ncfnn==iuu': '', 'ncfnn==duu': '', 'ncnnn==iuu': '', 'ncnnn==duu': '',
        'ncmpg==iou': '', 'ncmpg==dou': '', 'ncfnn==iou': '', 'ncfnn==dou': '', 'ncnnn==iou': '', 'ncnnn==dou': ''
    }
    
    for form in forms:
        ppos = form['PPOS'].lower()  # Normalize to lowercase
        word = form['word']
        key = ppos  # Use full PPOS for the key
        if key in table_data:
            table_data[key] = word
        else:
            print(f"Unmatched key: {key} for word: {word} with PPOS: {ppos}")

    print(f"Final table data for {lemma}: {table_data}")  # Debugging output

    table = f"""
    <table border="1">
        <thead>
            <tr>
                <th colspan="2">Eintal</th>
                <th colspan="2">Fleirtal</th>
            </tr>
            <tr>
                <th>Óbundið</th>
                <th>Bundið</th>
                <th>Óbundið</th>
                <th>Bundið</th>
            </tr>
        </thead>
        <tbody>
            <tr>
                <td>{table_data['ncmsn==iuu'] or table_data['ncfsn==iuu'] or table_data['ncnsn==iuu']}</td>
                <td>{table_data['ncmsn==duu'] or table_data['ncfsn==duu'] or table_data['ncnsn==duu']}</td>
                <td>{table_data['ncmpn==iuu'] or table_data['ncfnn==iuu'] or table_data['ncnnn==iuu']}</td>
                <td>{table_data['ncmpn==duu'] or table_data['ncfnn==duu'] or table_data['ncnnn==duu']}</td>
            </tr>
            <tr>
                <td>{table_data['ncmsa==iuu'] or table_data['ncfsa==iuu'] or table_data['ncnsa==iuu']}</td>
                <td>{table_data['ncmsa==duu'] or table_data['ncfsa==duu'] or table_data['ncnsa==duu']}</td>
                <td>{table_data['ncmpa==iuu'] or table_data['ncfna==iuu'] or table_data['ncnna==iuu']}</td>
                <td>{table_data['ncmpa==duu'] or table_data['ncfna==duu'] or table_data['ncnna==duu']}</td>
            </tr>
            <tr>
                <td>{table_data['ncmsd==iuu'] or table_data['ncfsd==iuu'] or table_data['ncnsd==iuu']}</td>
                <td>{table_data['ncmsd==duu'] or table_data['ncfsd==duu'] or table_data['ncnsd==duu']}</td>
                <td>{table_data['ncmpd==iuu'] or table_data['ncfnn==iuu'] or table_data['ncnnn==iuu']}</td>
                <td>{table_data['ncmpd==duu'] or table_data['ncfnn==duu'] or table_data['ncnnn==duu']}</td>
            </tr>
            <tr>
                <td>{table_data['ncmsg==iou'] or table_data['ncfsg==iou'] or table_data['ncnsg==iou']}</td>
                <td>{table_data['ncmsg==dou'] or table_data['ncfsg==dou'] or table_data['ncnsg==dou']}</td>
                <td>{table_data['ncmpg==iou'] or table_data['ncfnn==iou'] or table_data['ncnnn==iou']}</td>
                <td>{table_data['ncmpg==dou'] or table_data['ncfnn==dou'] or table_data['ncnnn==dou']}</td>
            </tr>
        </tbody>
    </table>
    """
    return table

def search_lemma(lemma):
    results = lemmas.get(lemma, None)
    if not results:
        return f"No results found for {lemma}"
    
    if 'n' in results[0]['PPOS'].lower():
        table = create_noun_table(lemma, results)
    else:
        table = "Only noun tables are currently supported."

    return table

iface = gr.Interface(
    fn=search_lemma,
    inputs="text",
    outputs="html",
    title="Lemma Search",
    description="Enter a lemma to search for its declensions and pronunciations."
)

if __name__ == "__main__":
    iface.launch()