File size: 3,578 Bytes
5bcc73a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import streamlit as st
from pathlib import Path
from uuid import uuid4
import csv
from datetime import datetime, timezone

from huggingface_hub import CommitScheduler


CSV_DATASET_DIR = Path("flagged_rows")
CSV_DATASET_DIR.mkdir(parents=True, exist_ok=True)

CSV_DATASET_PATH = CSV_DATASET_DIR / f"train-{uuid4()}.csv"

wrote_header = False


def write_header(writer):
    writer.writerow(
        [
            "date",
            "grascii",
            "longhand",
            "incorrect_grascii",
            "incorrect_longhand",
            "incorrect_shorthand",
            "improperly_cropped",
            "extraneous_marks",
        ]
    )
    global wrote_header
    wrote_header = True


scheduler = CommitScheduler(
    repo_id=st.secrets.FEEDBACK_REPO,
    repo_type="dataset",
    folder_path=CSV_DATASET_DIR,
    path_in_repo="data",
    every=15,
    token=st.secrets.HF_TOKEN,
)


@st.dialog("Flag Results for Review", width="large")
def report_dialog(data):
    st.write("Please select one or more reasons for flagging each row:")

    report_df = data
    report_df["3"] = True
    report_df["4"] = False
    report_df["5"] = False
    report_df["6"] = False
    report_df["7"] = False
    report_df["8"] = False
    final_report = st.data_editor(
        report_df,
        hide_index=True,
        column_config={
            "0": "Grascii",
            "1": "Longhand",
            "2": st.column_config.ImageColumn("Shorthand", width="medium"),
            "3": st.column_config.CheckboxColumn("Flag"),
            "4": st.column_config.CheckboxColumn("Grascii is incorrect"),
            "5": st.column_config.CheckboxColumn("Longhand is incorrect"),
            "6": st.column_config.CheckboxColumn("Shorthand image is incorrect"),
            "7": st.column_config.CheckboxColumn(
                "Shorthand image is improperly cropped"
            ),
            "8": st.column_config.CheckboxColumn(
                "Shorthand image contains extraneous marks"
            ),
        },
        disabled=["0", "1", "2"],
        use_container_width=True,
    )

    st.write(
        "If you decide that a listed row does not need to be flagged, uncheck its 'Flag' box to prevent it from being included in the submission."
    )

    if st.button("Submit"):
        with scheduler.lock:
            with open(CSV_DATASET_PATH, "a", newline="") as f:
                writer = csv.writer(f, dialect="unix")

                def write_row(row):
                    if not wrote_header:
                        write_header(writer)
                    if row.iloc[3] and any(
                        [
                            row.iloc[4],
                            row.iloc[5],
                            row.iloc[6],
                            row.iloc[7],
                            row.iloc[8],
                        ]
                    ):
                        writer.writerow(
                            [
                                datetime.now(timezone.utc).date(),
                                row.iloc[0],
                                row.iloc[1],
                                1 if row.iloc[4] else 0,
                                1 if row.iloc[5] else 0,
                                1 if row.iloc[6] else 0,
                                1 if row.iloc[7] else 0,
                                1 if row.iloc[8] else 0,
                            ]
                        )

                final_report.apply(write_row, axis=1)

        st.session_state["report_submitted"] = True
        st.rerun()