Spaces:

anzorq
/

glotlid

Running

anzorq commited on Jul 7, 2024

Commit

c746df9

verified ·

1 Parent(s): 222ede1

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+import fasttext
+from huggingface_hub import hf_hub_download
+import re
+import string
+def load_GlotLID():
+    model_path = hf_hub_download(repo_id="cis-lmu/glotlid", filename="model_v3.bin")
+    model = fasttext.load_model(model_path)
+    return model
+model = load_GlotLID()
+def preprocess_text(text):
+    text = text.replace('\n', ' ')
+    replace_by = " "
+    replacement_map = {ord(c): replace_by for c in ':•#{|}' + string.digits}
+    text = text.translate(replacement_map)
+    text = re.sub(r'\s+', ' ', text)
+    return text.strip()
+def compute(sentence):
+    sentence = preprocess_text(sentence)
+    # Get top 3 predictions
+    output = model.predict(sentence, k=3)
+    results = []
+    for label, score in zip(output[0], output[1]):
+        label = label.split('__')[-1]
+        results.append(f"{label}: {score:.4f}")
+    return "\n".join(results)
+iface = gr.Interface(
+    fn=compute,
+    inputs=gr.Textbox(label="Enter a sentence"),
+    outputs=gr.Textbox(label="Top 3 Language Predictions"),
+    title="GlotLID: Language Identification (v3)",
+    description="This app uses GlotLID v3 to identify the top 3 most likely languages for the input text."
+)
+iface.launch()