Spaces:
Running
Running
Commit
·
e7677fd
1
Parent(s):
ded4b0f
update app.py
Browse files- app.py +51 -5
- assets/{GlotLID_logo.svg → glotlid_logo.svg} +0 -0
app.py
CHANGED
|
@@ -18,6 +18,7 @@ import altair as alt
|
|
| 18 |
from altair import X, Y, Scale
|
| 19 |
import base64
|
| 20 |
import json
|
|
|
|
| 21 |
|
| 22 |
@st.cache_resource
|
| 23 |
def load_sp():
|
|
@@ -71,6 +72,34 @@ def render_svg(svg):
|
|
| 71 |
c.write(html, unsafe_allow_html=True)
|
| 72 |
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
@st.cache_data
|
| 75 |
def convert_df(df):
|
| 76 |
# IMPORTANT: Cache the conversion to prevent computation on every rerun
|
|
@@ -93,7 +122,7 @@ def load_GlotLID_v2(model_name, file_name):
|
|
| 93 |
model_1 = load_GlotLID_v1(constants.MODEL_NAME, "model_v1.bin")
|
| 94 |
model_2 = load_GlotLID_v2(constants.MODEL_NAME, "model_v2.bin")
|
| 95 |
|
| 96 |
-
@st.cache_resource
|
| 97 |
def plot(label, prob):
|
| 98 |
|
| 99 |
ORANGE_COLOR = "#FF8000"
|
|
@@ -164,6 +193,11 @@ st.markdown("[.read())
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
tab1, tab2 = st.tabs(["Input a Sentence", "Upload a File"])
|
| 168 |
|
| 169 |
with tab1:
|
|
@@ -192,18 +226,26 @@ with tab1:
|
|
| 192 |
clicked = st.button("Submit")
|
| 193 |
|
| 194 |
if sent:
|
| 195 |
-
sent = sent.replace('\n', '')
|
| 196 |
|
| 197 |
probs, labels = compute([sent], version=version)
|
| 198 |
prob = probs[0]
|
| 199 |
label = labels[0]
|
| 200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
# plot
|
| 202 |
plot(label, prob)
|
| 203 |
|
| 204 |
-
|
| 205 |
-
with open("logs.txt", "a") as f:
|
| 206 |
-
f.write(sent + "\n")
|
| 207 |
with tab2:
|
| 208 |
|
| 209 |
version = st.radio(
|
|
@@ -255,3 +297,7 @@ with tab2:
|
|
| 255 |
file_name="GlotLID.csv",
|
| 256 |
mime="text/csv",
|
| 257 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
from altair import X, Y, Scale
|
| 19 |
import base64
|
| 20 |
import json
|
| 21 |
+
import os
|
| 22 |
|
| 23 |
@st.cache_resource
|
| 24 |
def load_sp():
|
|
|
|
| 72 |
c.write(html, unsafe_allow_html=True)
|
| 73 |
|
| 74 |
|
| 75 |
+
@st.cache_data
|
| 76 |
+
def render_metadata():
|
| 77 |
+
"""Renders the metadata."""
|
| 78 |
+
html = r"""<p align="center">
|
| 79 |
+
<a href="https://huggingface.co/cis-lmu/glotlid"><img alt="HuggingFace Model" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-8A2BE2"></a>
|
| 80 |
+
<a href="https://github.com/cisnlp/GlotLID"><img alt="GitHub" src="https://img.shields.io/badge/%F0%9F%93%A6%20GitHub-orange"></a>
|
| 81 |
+
<a href="https://github.com/cisnlp/GlotLID/blob/main/LICENSE"><img alt="GitHub license" src="https://img.shields.io/github/license/cisnlp/GlotLID?logoColor=blue"></a>
|
| 82 |
+
<a href="."><img alt="GitHub stars" src="https://img.shields.io/github/stars/cisnlp/GlotLID"></a>
|
| 83 |
+
<a href="https://arxiv.org/abs/2310.16248"><img alt="arXiv" src="https://img.shields.io/badge/arXiv-2310.16248-b31b1b.svg"></a>
|
| 84 |
+
</p>"""
|
| 85 |
+
c = st.container()
|
| 86 |
+
c.write(html, unsafe_allow_html=True)
|
| 87 |
+
|
| 88 |
+
@st.cache_data
|
| 89 |
+
def citation():
|
| 90 |
+
"""Renders the metadata."""
|
| 91 |
+
_CITATION = """
|
| 92 |
+
@inproceedings{
|
| 93 |
+
kargaran2023glotlid,
|
| 94 |
+
title={GlotLID: Language Identification for Low-Resource Languages},
|
| 95 |
+
author={Kargaran, Amir Hossein and Imani, Ayyoob and Yvon, Fran{\c{c}}ois and Sch{\"u}tze, Hinrich},
|
| 96 |
+
booktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},
|
| 97 |
+
year={2023},
|
| 98 |
+
url={https://openreview.net/forum?id=dl4e3EBz5j}
|
| 99 |
+
}"""
|
| 100 |
+
st.code(_CITATION, language="python", line_numbers=False)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
@st.cache_data
|
| 104 |
def convert_df(df):
|
| 105 |
# IMPORTANT: Cache the conversion to prevent computation on every rerun
|
|
|
|
| 122 |
model_1 = load_GlotLID_v1(constants.MODEL_NAME, "model_v1.bin")
|
| 123 |
model_2 = load_GlotLID_v2(constants.MODEL_NAME, "model_v2.bin")
|
| 124 |
|
| 125 |
+
# @st.cache_resource
|
| 126 |
def plot(label, prob):
|
| 127 |
|
| 128 |
ORANGE_COLOR = "#FF8000"
|
|
|
|
| 193 |
|
| 194 |
render_svg(open("assets/glotlid_logo.svg").read())
|
| 195 |
|
| 196 |
+
render_metadata()
|
| 197 |
+
|
| 198 |
+
st.markdown("**GlotLID** is an open-source language identification model with support for more than **1600 languages**.")
|
| 199 |
+
|
| 200 |
+
|
| 201 |
tab1, tab2 = st.tabs(["Input a Sentence", "Upload a File"])
|
| 202 |
|
| 203 |
with tab1:
|
|
|
|
| 226 |
clicked = st.button("Submit")
|
| 227 |
|
| 228 |
if sent:
|
| 229 |
+
sent = sent.replace('\n', ' ')
|
| 230 |
|
| 231 |
probs, labels = compute([sent], version=version)
|
| 232 |
prob = probs[0]
|
| 233 |
label = labels[0]
|
| 234 |
|
| 235 |
+
|
| 236 |
+
# Check if the file exists
|
| 237 |
+
if not os.path.exists('logs.txt'):
|
| 238 |
+
with open('logs.txt', 'w') as file:
|
| 239 |
+
pass
|
| 240 |
+
|
| 241 |
+
print(f"{sent}, {label}: {prob}")
|
| 242 |
+
with open("logs.txt", "a") as f:
|
| 243 |
+
f.write(f"{sent}, {label}: {prob}\n")
|
| 244 |
+
|
| 245 |
# plot
|
| 246 |
plot(label, prob)
|
| 247 |
|
| 248 |
+
|
|
|
|
|
|
|
| 249 |
with tab2:
|
| 250 |
|
| 251 |
version = st.radio(
|
|
|
|
| 297 |
file_name="GlotLID.csv",
|
| 298 |
mime="text/csv",
|
| 299 |
)
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
# citation()
|
assets/{GlotLID_logo.svg → glotlid_logo.svg}
RENAMED
|
File without changes
|