Spaces:
Runtime error
Runtime error
"""The following program will read in 2 XL sheets of KP matches and the user will evaluate the quality of the matching""" | |
import streamlit as st | |
import ast | |
import pandas as pd | |
import random | |
from time import sleep | |
threshold = st.radio("Select threshold", ["0.7", "0.8", "0.85", "0.87", "0.9", "0.95"], 2) | |
num_kp = st.slider("Number of key-phrases to select", min_value=10, max_value=100,value=50,step=5) | |
xl1 = st.file_uploader("Choose first file", key="xl1") | |
#xl2 = st.file_uploader("Choose second file", key="xl2") | |
def merge_dicts(x): | |
return {k: v for d in x.dropna() for k, v in d.items()} | |
def clean_dict(x): | |
return x.replace("'", '"') | |
if xl1 is not None : | |
#assert that the first few columns are the same | |
df1 = pd.read_excel(xl1, sheet_name= f"{threshold} Threshold") | |
#first convert strings into dicts | |
df1["Matched KPs"] = df1["Matched KPs"].apply(clean_dict) | |
df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: ast.literal_eval(x)) | |
df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: {key: x[key] for key in x.keys() if x[key]!="null"}) | |
#now pop direct matches | |
df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: {key:x[key] for key in x.keys() if x[key] <0.99}) | |
df1.drop(df1[df1["Matched KPs"] == {}].index, inplace=True) | |
#now merge same KPs and their respective dicts | |
new_df = df1[["KP","Matched KPs"]].groupby("KP").agg(merge_dicts) | |
new_df["dict len"] = new_df["Matched KPs"].apply(lambda x: len(list(x.keys()))) | |
new_df = new_df.sort_values(by="dict len", ascending=False) | |
new_df.reset_index(inplace=True) | |
#new_df = new_df.drop("dict len", axis=1) | |
with st.form("First excel file"): | |
choices = [] | |
i = 0 | |
if num_kp > new_df.shape[0] : | |
num_kp = new_df.shape[0] | |
for t1 in new_df.sample(n=num_kp,random_state=42).iterrows(): | |
#for t1 in new_df.sample(n=10, random_state=42).iterrows(): | |
r1 = t1[1] | |
kps1 = r1["Matched KPs"] | |
curr_keys = list(kps1.keys()).copy() | |
for kp1 in curr_keys: | |
if kps1[kp1] > 0.99: | |
kps1.pop(kp1) | |
# now display the kps | |
if kps1 == {}: | |
continue | |
else: | |
col1, col2 = st.columns(2) | |
with col1: | |
st.write(r1["KP"]) | |
with col2: | |
#if number of keys > 5, then shuffle them and select 5 random | |
if len(list(kps1.keys())) > 5: | |
#we can repurpose curr_keys as it was used to pop direct matches | |
curr_keys = list(kps1.keys()) | |
random.Random(42).shuffle(curr_keys) | |
curr_keys = curr_keys[:5] | |
else: | |
curr_keys = list(kps1.keys()) | |
for kp1 in curr_keys: | |
choices.append(st.checkbox(f"{kp1}: {kps1[kp1]:0.2f}", key = i)) | |
i+=1 | |
st.markdown("""---""") | |
submitted = st.form_submit_button("Submit") | |
if submitted: | |
st.write(len([x for x in choices if x]), i, f"{len([x for x in choices if x])/i : 0.3f}") |