File size: 3,018 Bytes
46af628
14ff9c4
b62e42f
6169f27
02d9582
e2393a1
776f615
2fc2e11
02d9582
e2393a1
d70c8d8
e2393a1
85fa59b
e2393a1
85fa59b
 
 
e2393a1
 
85fa59b
44e0cdf
 
85fa59b
44e0cdf
 
85fa59b
 
 
 
 
 
 
44e0cdf
85fa59b
 
 
 
26fc0ba
 
 
 
 
 
 
 
 
 
 
74bf666
85fa59b
74bf666
 
b0e183d
74bf666
 
 
ae013ce
74bf666
 
49b7338
 
 
 
 
cd42a41
49b7338
 
517f50a
cd42a41
49b7338
 
 
 
 
 
6ca2a04
 
c516297
 
 
7bb6ae5
efd14d3
043fe71
da7e215
 
 
 
 
e2393a1
e8ad8b1
c475583
da7e215
 
44e0cdf
 
 
 
 
 
 
 
 
 
da7e215
85fa59b
5b25195
da7e215
 
b3d7164
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import streamlit as st
import pandas as pd
from huggingface_hub import Repository
import os 
from pathlib import Path
import json
import numpy as np

 
# Declaring the variables for later use to talk to dataset

# the token is saved as secret key-value pair in the environment which can be access as shown below
auth_token = os.environ.get("space_to_dataset") or True

DATASET_REPO_URL = 'ppsingh/annotation_data'   # path to dataset repo
DATA_FILENAME = "paralist.json"
DATA_FILE = os.path.join("data", DATA_FILENAME)

# cloning the dataset repo


# Data file name
file_name = 'paralist.json'

# reading the json
@st.cache
def read_dataset():
    repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, repo_type="dataset", use_auth_token= auth_token)
    with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file:
      paraList = json.load(json_file)
    
    return repo, paraList

st.sidebar.markdown(""" 
   # Data Annotation Demo 
This app is demo how to use the space to provide user interface for the data annotation/tagging. The data resides in repo_type 'dataset'.
""")
# sidebar with info and drop down to select from the keys

topic = None
repo, paraList = read_dataset()
# getting outer level keys in json 
keys = paraList.keys()  

if keys is not None:
  topic = st.sidebar.selectbox(label="Choose dataset topic to load", options=keys )

 
with st.container():

  if topic is not None:
      subtopics = list(paraList[topic].keys())
  #st.write(subtopics)
      val = np.random.randint(0,len(subtopics)-1)
      tag = subtopics[val]
      st.write(tag)
  
      idx = np.random.randint(0,3)
      st.write(idx)
      
      c1, c2, c3 = st.columns([3, 1, 1])
      with c1:
          st.header('Text')
          st.write(paraList[topic][tag][idx]['textsegment'])
  
      with c2:
          st.header('Tag')
          st.text(tag)
  
      with c3:
          st.header('Feedback')
          feedback = None
          feedback = st.selectbox('0 If Tag is not a good keyword for text, 5 for prefect match',(0,1,2,3,4,5)) 
          if feedback:
              st.write(feedback)
#      if st.button('Submit'):
#        paraList[topic][choice][idx]['annotation'].append(feedback)
#      with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file:
 #       json.dump(paraList,json_file, ensure_ascii = True)
  #      repo.push_to_hub('added new annotation')
        
#st.write(paraList)      
    #new_row  = title
#  data = data.append(new_row, ignore_index=True)
#  st.write(data)
#  st.write(os.getcwd())
#  data.to_csv('test.csv', index= False)


#st.write(df)
#   st.write('data/test.csv')
# iterate over files in
# that directory        
#directory = os.getcwd()
#files = Path(directory).glob('*')
#for file in files:
#    st.write(file)

#with open(DATA_FILE, "a") as csvfile:
#  writer = csv.DictWriter(csvfile, fieldnames=["Sentences"])
#  writer.writerow({'Sentences': new_row})
#  repo.push_to_hub('adding new line')
#  st.write('Succcess')