Spaces:
Sleeping
Sleeping
File size: 5,743 Bytes
d9d1579 93aa2e5 d9d1579 cc4b5e3 d9d1579 cc4b5e3 d9d1579 d216e64 d9d1579 85ad75f d9d1579 85ad75f cc4b5e3 d9d1579 0610a27 d9d1579 170551b d9d1579 cc4b5e3 85ad75f d9d1579 cc4b5e3 b549483 cc4b5e3 e0138d6 cc4b5e3 e571d6c b149d51 d9d1579 cc4b5e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
# Import from 3rd party libraries
import streamlit as st
import streamlit.components.v1 as components
# import streamlit_analytics
import pandas as pd
import numpy as np
import re
from sklearn.metrics.pairwise import cosine_similarity
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download("stopwords")
nltk.download('wordnet')
from sentence_transformers import SentenceTransformer
import plotly.express as px
import pandas as pd
from sklearn.decomposition import PCA
import time
st.set_page_config(page_title="Mental disorder by description", page_icon="π€")
def convert_string_to_numpy_array(s):
'''Function to convert a string to a NumPy array'''
numbers_list = re.findall(r'-?\d+\.\d+', s)
return np.array(numbers_list, dtype=np.float64)
#load the model
@st.cache_resource
def get_models():
st.write('*Loading the model...*')
name = "stsb-bert-large"
model = SentenceTransformer(name)
st.write("*The app is loaded and ready to use! :tada:*")
lemmatizer = WordNetLemmatizer()
return model, lemmatizer
model, lemmatizer = get_models()
stop_words = set(stopwords.words('english'))
#load the dataframe with disorder embeddings
@st.cache_data # π Add the caching decorator
def load_data():
df_icd = pd.read_csv('icd_embedded.csv')
df_icd['numpy_array'] = df_icd['Embeddings'].apply(convert_string_to_numpy_array)
icd_embeddings = np.array(df_icd["numpy_array"].tolist())
return df_icd, icd_embeddings
df_icd, icd_embeddings = load_data()
#create a list of disease names
@st.cache_data # π Add the caching decorator
def create_disease_list():
disease_names = []
for name in df_icd["Disease"]:
disease_names.append(name)
return disease_names
disease_names = create_disease_list()
if 'descriptions' not in st.session_state:
st.session_state.descriptions = []
def similarity_top(descr_emb, disorder_embs):
# reshaping the character_embedding to match the shape of mental_disorder_embeddings
descr_emb = descr_emb.reshape(1, -1)
# calculating the cosine similarity
similarity_scores = cosine_similarity(disorder_embs, descr_emb)
scores_names = []
for score, name in zip(similarity_scores, disease_names):
data = {"disease_name": name, "similarity_score": score}
scores_names.append(data)
scores_names = sorted(scores_names, key=lambda x: x['similarity_score'], reverse=True)
results = []
for item in scores_names:
disease_name = item['disease_name']
similarity_score = item['similarity_score'][0]
results.append((disease_name, similarity_score))
return results[:5]
def vis_results_2d(input_embed):
# performing dimensionality reduction using PCA
pca = PCA(n_components=2)
disease_embeddings_2d = pca.fit_transform(icd_embeddings)
# creating a DataFrame for disease embeddings plot
disease_data_df = pd.DataFrame(disease_embeddings_2d, columns=['PC1', 'PC2'])
disease_data_df['Type'] = 'Disease'
disease_data_df['Name'] = disease_names
input_embed_2d = input_embed.reshape(1, -1)
input_embed_2d = pca.transform(input_embed_2d)
# creating a DataFrame for character embedding plot
pca_2d = pd.DataFrame(input_embed_2d, columns=['PC1', 'PC2'])
pca_2d['Type'] = 'Character'
pca_2d['Your character'] = 'Your character'
# concatenating the two DataFrames
combined_2d = pd.concat([disease_data_df, pca_2d], ignore_index=True)
# creating an interactive 3D scatter plot
fig = px.scatter(combined_2d, x='PC1', y='PC2', text='Name', color='Type', symbol='Type', width=800, height=800)
fig.show()
def vis_results_3d(input_embed):
# performing dimensionality reduction using PCA
pca = PCA(n_components=3)
disease_embeddings_3d = pca.fit_transform(icd_embeddings)
# creating a DataFrame for disease embeddings plot
disease_data_df = pd.DataFrame(disease_embeddings_3d, columns=['PC1', 'PC2', 'PC3'])
disease_data_df['Type'] = 'Disease'
disease_data_df['Name'] = disease_names
input_embed_2d = input_embed.reshape(1, -1)
input_embed_3d = pca.transform(input_embed_2d)
# creating a DataFrame for character embedding plot
pca_3d = pd.DataFrame(input_embed_3d, columns=['PC1', 'PC2', 'PC3'])
pca_3d['Type'] = 'Character'
pca_3d['Your character'] = 'Your character'
# concatenating the two DataFrames
combined_3d = pd.concat([disease_data_df, pca_3d], ignore_index=True)
# creating an interactive 3D scatter plot
fig = px.scatter_3d(combined_3d, x='PC1', y='PC2', z='PC3', text='Name', color='Type', symbol='Type', width=800, height=800)
fig.show()
# Configure Streamlit page and state
st.title("Detect your character's mental disorder! :books: :mag:")
st.markdown(
"This mini-app predicts top-5 most likely mental disorders based on your description. The more information you provide, the more informative the results will be."
)
st.caption("Note that this app can't be used for diagnostic purposes.")
input = st.text_input(label="Your description", placeholder="Insert a description of your character")
if input:
input_embed = model.encode(input)
sim_score = similarity_top(input_embed, icd_embeddings)
i = 1
nums = {1: 'one', 2: 'two', 3: 'three', 4:'four', 5:'five'}
for dis, value in sim_score:
st.write(f":green[*Prediction number*] :{i}: :")
st.write(f"{dis} (similarity score:", value, ")")
i+= 1
text_spinner_placeholder = st.empty()
# with st.spinner("Please wait while your visualizations are being generated..."):
# time.sleep(5)
# vis_results_2d(input_embed)
# vis_results_3d(input_embed)
|