anzorq commited on
Commit
c746df9
·
verified ·
1 Parent(s): 222ede1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import fasttext
3
+ from huggingface_hub import hf_hub_download
4
+ import re
5
+ import string
6
+
7
+ def load_GlotLID():
8
+ model_path = hf_hub_download(repo_id="cis-lmu/glotlid", filename="model_v3.bin")
9
+ model = fasttext.load_model(model_path)
10
+ return model
11
+
12
+ model = load_GlotLID()
13
+
14
+ def preprocess_text(text):
15
+ text = text.replace('\n', ' ')
16
+ replace_by = " "
17
+ replacement_map = {ord(c): replace_by for c in ':•#{|}' + string.digits}
18
+ text = text.translate(replacement_map)
19
+ text = re.sub(r'\s+', ' ', text)
20
+ return text.strip()
21
+
22
+ def compute(sentence):
23
+ sentence = preprocess_text(sentence)
24
+
25
+ # Get top 3 predictions
26
+ output = model.predict(sentence, k=3)
27
+
28
+ results = []
29
+ for label, score in zip(output[0], output[1]):
30
+ label = label.split('__')[-1]
31
+ results.append(f"{label}: {score:.4f}")
32
+
33
+ return "\n".join(results)
34
+
35
+ iface = gr.Interface(
36
+ fn=compute,
37
+ inputs=gr.Textbox(label="Enter a sentence"),
38
+ outputs=gr.Textbox(label="Top 3 Language Predictions"),
39
+ title="GlotLID: Language Identification (v3)",
40
+ description="This app uses GlotLID v3 to identify the top 3 most likely languages for the input text."
41
+ )
42
+
43
+ iface.launch()