Spaces:
Sleeping
Sleeping
File size: 3,018 Bytes
46af628 14ff9c4 b62e42f 6169f27 02d9582 e2393a1 776f615 2fc2e11 02d9582 e2393a1 d70c8d8 e2393a1 85fa59b e2393a1 85fa59b e2393a1 85fa59b 44e0cdf 85fa59b 44e0cdf 85fa59b 44e0cdf 85fa59b 26fc0ba 74bf666 85fa59b 74bf666 b0e183d 74bf666 ae013ce 74bf666 49b7338 cd42a41 49b7338 517f50a cd42a41 49b7338 6ca2a04 c516297 7bb6ae5 efd14d3 043fe71 da7e215 e2393a1 e8ad8b1 c475583 da7e215 44e0cdf da7e215 85fa59b 5b25195 da7e215 b3d7164 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import streamlit as st
import pandas as pd
from huggingface_hub import Repository
import os
from pathlib import Path
import json
import numpy as np
# Declaring the variables for later use to talk to dataset
# the token is saved as secret key-value pair in the environment which can be access as shown below
auth_token = os.environ.get("space_to_dataset") or True
DATASET_REPO_URL = 'ppsingh/annotation_data' # path to dataset repo
DATA_FILENAME = "paralist.json"
DATA_FILE = os.path.join("data", DATA_FILENAME)
# cloning the dataset repo
# Data file name
file_name = 'paralist.json'
# reading the json
@st.cache
def read_dataset():
repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, repo_type="dataset", use_auth_token= auth_token)
with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file:
paraList = json.load(json_file)
return repo, paraList
st.sidebar.markdown("""
# Data Annotation Demo
This app is demo how to use the space to provide user interface for the data annotation/tagging. The data resides in repo_type 'dataset'.
""")
# sidebar with info and drop down to select from the keys
topic = None
repo, paraList = read_dataset()
# getting outer level keys in json
keys = paraList.keys()
if keys is not None:
topic = st.sidebar.selectbox(label="Choose dataset topic to load", options=keys )
with st.container():
if topic is not None:
subtopics = list(paraList[topic].keys())
#st.write(subtopics)
val = np.random.randint(0,len(subtopics)-1)
tag = subtopics[val]
st.write(tag)
idx = np.random.randint(0,3)
st.write(idx)
c1, c2, c3 = st.columns([3, 1, 1])
with c1:
st.header('Text')
st.write(paraList[topic][tag][idx]['textsegment'])
with c2:
st.header('Tag')
st.text(tag)
with c3:
st.header('Feedback')
feedback = None
feedback = st.selectbox('0 If Tag is not a good keyword for text, 5 for prefect match',(0,1,2,3,4,5))
if feedback:
st.write(feedback)
# if st.button('Submit'):
# paraList[topic][choice][idx]['annotation'].append(feedback)
# with open('data/{}'.format(file_name), 'r', encoding="utf8") as json_file:
# json.dump(paraList,json_file, ensure_ascii = True)
# repo.push_to_hub('added new annotation')
#st.write(paraList)
#new_row = title
# data = data.append(new_row, ignore_index=True)
# st.write(data)
# st.write(os.getcwd())
# data.to_csv('test.csv', index= False)
#st.write(df)
# st.write('data/test.csv')
# iterate over files in
# that directory
#directory = os.getcwd()
#files = Path(directory).glob('*')
#for file in files:
# st.write(file)
#with open(DATA_FILE, "a") as csvfile:
# writer = csv.DictWriter(csvfile, fieldnames=["Sentences"])
# writer.writerow({'Sentences': new_row})
# repo.push_to_hub('adding new line')
# st.write('Succcess')
|