File size: 4,555 Bytes
9481a42
 
 
 
 
 
c5139c6
9481a42
 
 
 
c5139c6
 
9481a42
 
c5139c6
9481a42
 
 
 
 
 
f249f4d
9481a42
 
 
 
 
 
 
 
 
 
 
 
c5139c6
9481a42
 
 
 
 
 
 
 
c5139c6
 
 
 
 
 
 
 
9481a42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d37e38
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import streamlit as st
from datasets import load_dataset
import csv
import datetime as dt
import random
import os
from huggingface_hub import Repository, HfApi


HF_API_KEY = os.environ.get("HF_TOKEN", None)

api = HfApi(token=HF_API_KEY)

REPO_ID = "imomayiz/darija-english"
DATASET_REPO_URL = f"https://huggingface.co/datasets/{REPO_ID}"
SUBMISSIONS_DATA_FILE = os.path.join("submissions", "submissions.csv")

submissions_repo = Repository(
    local_dir="submissions", clone_from=DATASET_REPO_URL, use_auth_token=HF_API_KEY
)

def load_data(repo_id):
    dataset = load_dataset(f'{repo_id}', name='sentences', split='sentences')
    return dataset

def fetch_sentence(dataset, column_name="darija_ar"):
    
    # Get a random sentence
    random_sentence_index = random.randint(0, len(dataset) - 1)
    random_sentence = dataset[random_sentence_index][column_name]

    return random_sentence

def store_submission(sentence: str, translation: str, translation_fr: str):
    if sentence and (translation or translation_fr):
        with open(SUBMISSIONS_DATA_FILE, "a") as csvfile:
            writer = csv.DictWriter(csvfile,
                                    fieldnames=["darija", "eng", "darija_ar", "time"])
            writer.writerow(
                {"darija_ar": sentence, 
                 "eng": translation, 
                 "darija": translation_fr,
                 "time": str(dt.datetime.now())}
            )
            api.upload_file(
                    path_or_fileobj=SUBMISSIONS_DATA_FILE,
                    path_in_repo=SUBMISSIONS_DATA_FILE,
                    repo_id=REPO_ID,
                    repo_type="dataset",
                    commit_message="New submission",
                )
            st.success(f"Submitted at {DATASET_REPO_URL}/{SUBMISSIONS_DATA_FILE}")


# Load the dataset
dataset = load_data(REPO_ID)


def main():

    if "sentence" not in st.session_state:
        st.session_state.sentence = fetch_sentence(dataset) 
    if 'translation_input' not in st.session_state:
        st.session_state.translation_input = ""
    if 'translation_input_fr' not in st.session_state:
        st.session_state.translation_input_fr = ""
    if 'display_new' not in st.session_state:
        st.session_state.display_new = False

    st.title("Translate From Arabic to English")

    st.markdown(
    """This mini-app allows you to contribute to the **darija-english** dataset 
    as part of [DODa](https://darija-open-dataset.github.io/)
    project. To contribute, simply translate the given sentence from Arabic to English.
    The translated sentence will be submitted to the dataset 
    [here](https://huggingface.co/datasets/imomayiz/darija-english)."""
    )
    
    st.text("")

    st.write(f"""
        <div style="
            padding: 5px;
            border: 1px solid #000000;
            border-radius: 5px;
        ">
            <p style="font-size: 20px;">{st.session_state.sentence}.</p>
        </div>""", unsafe_allow_html=True)
        

    # Display new sentence button
    st.session_state.display_new = st.button("New Sentence",
                                             on_click=fetch_sentence,
                                             args=(dataset,))
    

    # Input field for translation
    translation_input_placeholder = st.empty()

    with translation_input_placeholder.container():
        translation_input = st.text_input("Enter translation to english: ",
                                           st.session_state.translation_input)
        st.session_state.translation_input = translation_input

    # Input field for translation
    translation_input_placeholder_fr = st.empty()

    with translation_input_placeholder_fr.container():
        translation_input_fr = st.text_input(
            "Enter translation to darija in latin characters: ",
            st.session_state.translation_input_fr
            )
        st.session_state.translation_input_fr = translation_input_fr

    # Submit button
    if st.button("Submit Translation"):
        if translation_input:
            st.success("Translation submitted successfully!")
        
        elif translation_input_fr:
            st.success("Translation submitted successfully!")

        else:
            st.warning("Please enter a translation before submitting.")

        
        store_submission(st.session_state.sentence, 
                        st.session_state.translation_input,
                        st.session_state.translation_input_fr)
        
if __name__ == "__main__":
    main()