File size: 2,813 Bytes
f03ee05
73dfaab
f03ee05
 
 
 
277e947
f03ee05
73dfaab
 
f03ee05
 
 
277e947
 
 
f03ee05
73dfaab
 
 
 
 
 
 
 
 
896d88b
 
 
73dfaab
 
 
 
 
 
 
 
 
 
f03ee05
 
 
d536a72
f03ee05
 
d536a72
e97a1f1
5b98a9c
45e3d7e
5b98a9c
ce13d97
 
2e3fd22
793bb5a
2e3fd22
 
ce13d97
 
d7da2d0
0b739e4
793bb5a
0b739e4
 
fc28430
f95a7f7
f03ee05
 
 
 
 
 
742d18f
 
 
 
 
f03ee05
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import time

import requests
import streamlit as st

API_URL = "https://api-inference.huggingface.co/models/pere/nb-nn-translation"


def translate(text, wait=True):
    headers = {"Authorization": f"Bearer {os.environ['BEARER']}"}
    payload  = {
        "inputs": text,
        "options": {
            "wait_for_model": not wait
        }
    }
    response = requests.post(API_URL, headers=headers, json=payload)
    json_response = response.json()
    if (isinstance(json_response, dict)
        and "error" in json_response
        and "estimated_time" in json_response):
        st.write(json_response)
        if wait:
            with st.spinner(json_response["error"]):
                bar = st.progress(0)
                time_to_load = int(json_response["estimated_time"]) + 1
                for progress in range(time_to_load):
                    bar.progress(progress / time_to_load)
                    time.sleep(1)
                bar.empty()
            return translate(text, wait=False)
        else:
            return "We could not load the model"
    elif (isinstance(json_response, list)
        and "translation_text" in json_response[0]):
        return json_response[0]["translation_text"]
    else:
        return f"Oops, something went wrong: {str(json_response)}"


st.set_page_config(
    page_title='Norwegian Bokmål to Nynorsk',
    page_icon='translator-icon.png',
)
st.title("Bokmål ⇔ Nynorsk")
st.sidebar.title("Translation Demo")
st.sidebar.write("""
Here are some sample texts in Norwegian Bokmål and Norwegian Nynorsk that you can try to translate. They are here presented in pairs (Bokmål, Nynorsk, Bokmål...). This way you can also see a suggested translation of the text. The model will automatically understand if the input is in Nynorsk or Bokmål, and switch the translation.
""")

masked_texts = [
        "Hvordan kan man lære maskinen å oversette?",
        "Ho vil ikkje gi bort dei personlege dataa sine.",
         "Jeg tror vi har lært maskinene både nynorsk og bokmål",
          "Aust er ikkje riktig retning.",
        ]
input_text = st.sidebar.selectbox("Select a Text", options=masked_texts)

st.sidebar.write("""
As you can see there are a lot of similarities between the languages. Since there also are some grammatical differences, simple dictionary replacements do not give a good result. A finetuned model on top of a pretrained t5-base from a balanced corpus, seem to solve the task with a SACREBLEU-score of 88.17.
""")

text = st.text_area(" ",
    input_text,
    height=None,
    max_chars=None,
    key=None,
    help="Enter your text here",
)

if st.button('Translate'):
    if str(text).strip() == "":
        st.warning('Please **enter text** for translation')
    else:
        st.info(str(translate(text)))