File size: 4,555 Bytes
9481a42 c5139c6 9481a42 c5139c6 9481a42 c5139c6 9481a42 f249f4d 9481a42 c5139c6 9481a42 c5139c6 9481a42 3d37e38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import streamlit as st
from datasets import load_dataset
import csv
import datetime as dt
import random
import os
from huggingface_hub import Repository, HfApi
HF_API_KEY = os.environ.get("HF_TOKEN", None)
api = HfApi(token=HF_API_KEY)
REPO_ID = "imomayiz/darija-english"
DATASET_REPO_URL = f"https://huggingface.co/datasets/{REPO_ID}"
SUBMISSIONS_DATA_FILE = os.path.join("submissions", "submissions.csv")
submissions_repo = Repository(
local_dir="submissions", clone_from=DATASET_REPO_URL, use_auth_token=HF_API_KEY
)
def load_data(repo_id):
dataset = load_dataset(f'{repo_id}', name='sentences', split='sentences')
return dataset
def fetch_sentence(dataset, column_name="darija_ar"):
# Get a random sentence
random_sentence_index = random.randint(0, len(dataset) - 1)
random_sentence = dataset[random_sentence_index][column_name]
return random_sentence
def store_submission(sentence: str, translation: str, translation_fr: str):
if sentence and (translation or translation_fr):
with open(SUBMISSIONS_DATA_FILE, "a") as csvfile:
writer = csv.DictWriter(csvfile,
fieldnames=["darija", "eng", "darija_ar", "time"])
writer.writerow(
{"darija_ar": sentence,
"eng": translation,
"darija": translation_fr,
"time": str(dt.datetime.now())}
)
api.upload_file(
path_or_fileobj=SUBMISSIONS_DATA_FILE,
path_in_repo=SUBMISSIONS_DATA_FILE,
repo_id=REPO_ID,
repo_type="dataset",
commit_message="New submission",
)
st.success(f"Submitted at {DATASET_REPO_URL}/{SUBMISSIONS_DATA_FILE}")
# Load the dataset
dataset = load_data(REPO_ID)
def main():
if "sentence" not in st.session_state:
st.session_state.sentence = fetch_sentence(dataset)
if 'translation_input' not in st.session_state:
st.session_state.translation_input = ""
if 'translation_input_fr' not in st.session_state:
st.session_state.translation_input_fr = ""
if 'display_new' not in st.session_state:
st.session_state.display_new = False
st.title("Translate From Arabic to English")
st.markdown(
"""This mini-app allows you to contribute to the **darija-english** dataset
as part of [DODa](https://darija-open-dataset.github.io/)
project. To contribute, simply translate the given sentence from Arabic to English.
The translated sentence will be submitted to the dataset
[here](https://huggingface.co/datasets/imomayiz/darija-english)."""
)
st.text("")
st.write(f"""
<div style="
padding: 5px;
border: 1px solid #000000;
border-radius: 5px;
">
<p style="font-size: 20px;">{st.session_state.sentence}.</p>
</div>""", unsafe_allow_html=True)
# Display new sentence button
st.session_state.display_new = st.button("New Sentence",
on_click=fetch_sentence,
args=(dataset,))
# Input field for translation
translation_input_placeholder = st.empty()
with translation_input_placeholder.container():
translation_input = st.text_input("Enter translation to english: ",
st.session_state.translation_input)
st.session_state.translation_input = translation_input
# Input field for translation
translation_input_placeholder_fr = st.empty()
with translation_input_placeholder_fr.container():
translation_input_fr = st.text_input(
"Enter translation to darija in latin characters: ",
st.session_state.translation_input_fr
)
st.session_state.translation_input_fr = translation_input_fr
# Submit button
if st.button("Submit Translation"):
if translation_input:
st.success("Translation submitted successfully!")
elif translation_input_fr:
st.success("Translation submitted successfully!")
else:
st.warning("Please enter a translation before submitting.")
store_submission(st.session_state.sentence,
st.session_state.translation_input,
st.session_state.translation_input_fr)
if __name__ == "__main__":
main() |