File size: 2,813 Bytes
f03ee05 73dfaab f03ee05 277e947 f03ee05 73dfaab f03ee05 277e947 f03ee05 73dfaab 896d88b 73dfaab f03ee05 d536a72 f03ee05 d536a72 e97a1f1 5b98a9c 45e3d7e 5b98a9c ce13d97 2e3fd22 793bb5a 2e3fd22 ce13d97 d7da2d0 0b739e4 793bb5a 0b739e4 fc28430 f95a7f7 f03ee05 742d18f f03ee05 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import os
import time
import requests
import streamlit as st
API_URL = "https://api-inference.huggingface.co/models/pere/nb-nn-translation"
def translate(text, wait=True):
headers = {"Authorization": f"Bearer {os.environ['BEARER']}"}
payload = {
"inputs": text,
"options": {
"wait_for_model": not wait
}
}
response = requests.post(API_URL, headers=headers, json=payload)
json_response = response.json()
if (isinstance(json_response, dict)
and "error" in json_response
and "estimated_time" in json_response):
st.write(json_response)
if wait:
with st.spinner(json_response["error"]):
bar = st.progress(0)
time_to_load = int(json_response["estimated_time"]) + 1
for progress in range(time_to_load):
bar.progress(progress / time_to_load)
time.sleep(1)
bar.empty()
return translate(text, wait=False)
else:
return "We could not load the model"
elif (isinstance(json_response, list)
and "translation_text" in json_response[0]):
return json_response[0]["translation_text"]
else:
return f"Oops, something went wrong: {str(json_response)}"
st.set_page_config(
page_title='Norwegian Bokmål to Nynorsk',
page_icon='translator-icon.png',
)
st.title("Bokmål ⇔ Nynorsk")
st.sidebar.title("Translation Demo")
st.sidebar.write("""
Here are some sample texts in Norwegian Bokmål and Norwegian Nynorsk that you can try to translate. They are here presented in pairs (Bokmål, Nynorsk, Bokmål...). This way you can also see a suggested translation of the text. The model will automatically understand if the input is in Nynorsk or Bokmål, and switch the translation.
""")
masked_texts = [
"Hvordan kan man lære maskinen å oversette?",
"Ho vil ikkje gi bort dei personlege dataa sine.",
"Jeg tror vi har lært maskinene både nynorsk og bokmål",
"Aust er ikkje riktig retning.",
]
input_text = st.sidebar.selectbox("Select a Text", options=masked_texts)
st.sidebar.write("""
As you can see there are a lot of similarities between the languages. Since there also are some grammatical differences, simple dictionary replacements do not give a good result. A finetuned model on top of a pretrained t5-base from a balanced corpus, seem to solve the task with a SACREBLEU-score of 88.17.
""")
text = st.text_area(" ",
input_text,
height=None,
max_chars=None,
key=None,
help="Enter your text here",
)
if st.button('Translate'):
if str(text).strip() == "":
st.warning('Please **enter text** for translation')
else:
st.info(str(translate(text)))
|