File size: 2,007 Bytes
d9514f5
 
 
 
 
 
 
c437348
d853c9c
c437348
d9514f5
 
c437348
d9514f5
d853c9c
c437348
d853c9c
7f57b0a
 
 
 
 
 
d9514f5
c437348
d9514f5
 
 
 
 
c437348
d9514f5
 
 
1b7e918
d9514f5
c437348
d9514f5
 
 
 
 
 
 
 
 
 
7f57b0a
d9514f5
 
 
 
28c72ae
 
d8ad733
d9514f5
 
 
 
d853c9c
d9514f5
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import streamlit as st
import datetime as dt
import random
import json 
import os
from huggingface_hub import CommitScheduler
from datasets import load_dataset

today = dt.datetime.now().strftime("%Y%m%d_%H%M")

REPO_ID = "imomayiz/darija-english"
DATASET_REPO_URL = f"https://huggingface.co/datasets/{REPO_ID}"

submissions_folder = "submissions"
submissions_file = os.path.join(submissions_folder, f"submissions_{today}.json")

@st.cache_data
def load_data(repo_id, column_name="darija_ar"):
    dataset = load_dataset(
        f'{repo_id}',
        name='sentences',
        split='sentences')
    dataset = dataset.select_columns(column_name)
    return dataset

def fetch_sentence(dataset, column_name="darija_ar"):
    
    # Get a random sentence
    random_sentence_index = random.randint(0, len(dataset) - 1)
    random_sentence = dataset[random_sentence_index][column_name]

    st.session_state.sentence = random_sentence
    st.session_state.translation_input = ""
    st.session_state.translation_input_fr = ""
    
    return random_sentence

def store_submission(
        scheduler: CommitScheduler, sentence: str, translation: str, translation_fr: str
        ): 
    """
    Append input/outputs and user feedback to a JSON Lines file 
    using a thread lock to avoid concurrent writes from different users.
    """
    ts = dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")

    with scheduler.lock:
        with open(submissions_file, "a", encoding='utf-8') as f:
            f.write(json.dumps({
                "darija": translation_fr, 
                "eng": translation, 
                "darija_ar": sentence,
                "timestamp": ts}, 
                ensure_ascii=False
                ))
            f.write("\n")

    st.success(
        f"""Translation submitted successfully.
        You will see your commit in a few minutes at 
        {DATASET_REPO_URL}/tree/main/{submissions_folder}.
        You can submit another translation or check the dataset."""
        )