Spaces:
Sleeping
Sleeping
import streamlit as st | |
import json | |
import pandas as pd | |
import plotly.express as px | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import streamlit.components.v1 as components | |
# Global variable to hold selected row index | |
selected_row_index = None | |
# Initialize an empty DataFrame | |
filtered_data = pd.DataFrame() | |
# Function to load JSONL file into a DataFrame | |
def load_jsonl(file_path): | |
data = [] | |
with open(file_path, 'r') as f: | |
for line in f: | |
data.append(json.loads(line)) | |
return pd.DataFrame(data) | |
# Function to filter DataFrame by keyword | |
def filter_by_keyword(df, keyword): | |
return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)] | |
# Function to generate HTML5 code with embedded text | |
def generate_html(question_text, answer_text): | |
return f''' | |
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>Read It Aloud</title> | |
<script type="text/javascript"> | |
function readAloud(id) {{ | |
const text = document.getElementById(id).innerText; | |
const speech = new SpeechSynthesisUtterance(text); | |
window.speechSynthesis.speak(speech); | |
}} | |
</script> | |
</head> | |
<body> | |
<h1>π Read It Aloud</h1> | |
<p id="questionArea">{question_text}</p> | |
<button onclick="readAloud('questionArea')">π Read Question Aloud</button> | |
<p id="answerArea">{answer_text}</p> | |
<button onclick="readAloud('answerArea')">π Read Answer Aloud</button> | |
</body> | |
</html> | |
''' | |
# Streamlit App | |
st.title("Medical Licensing Exam Explorer with Speech Synthesis, Plotly and Seaborn π") | |
# Dropdown for file selection | |
file_option = st.selectbox("Select file:", ["small_file.jsonl", "large_file.jsonl"]) | |
st.write(f"You selected: {file_option}") | |
# Load the data | |
small_data = load_jsonl("usmle_16.2MB.jsonl") | |
large_data = load_jsonl("usmle_2.08MB.jsonl") | |
# Show filtered data grid | |
if file_option == "small_file.jsonl": | |
data = small_data | |
else: | |
data = large_data | |
# Text input for search keyword | |
search_keyword = st.text_input("Enter a keyword to filter data (e.g., Heart, Lung, Pain, Memory):") | |
# Button to trigger search | |
if st.button("Search"): | |
filtered_data = filter_by_keyword(data, search_keyword) | |
st.write(f"Filtered Dataset by '{search_keyword}'") | |
selected_data = st.dataframe(filtered_data) | |
# Button to read selected row aloud | |
if st.button("Read Selected Row"): | |
if selected_row_index is not None: | |
selected_row = filtered_data.loc[selected_row_index] | |
question_text = selected_row.get("question", "No question field") | |
answer_text = selected_row.get("answer", "No answer field") | |
documentHTML5 = generate_html(question_text, answer_text) | |
components.html(documentHTML5, width=1280, height=1024) | |
else: | |
st.warning("Please select a row first.") | |
# Plotly and Seaborn charts for EDA | |
if st.button("Generate Charts"): | |
st.subheader("Plotly Charts π") | |
# 1. Scatter Plot | |
fig = px.scatter(data, x=data.columns[0], y=data.columns[1]) | |
st.plotly_chart(fig) | |
# 2. Line Plot | |
fig = px.line(data, x=data.columns[0], y=data.columns[1]) | |
st.plotly_chart(fig) | |
# 3. Bar Plot | |
fig = px.bar(data, x=data.columns[0], y=data.columns[1]) | |
st.plotly_chart(fig) | |
# 4. Histogram | |
fig = px.histogram(data, x=data.columns[0]) | |
st.plotly_chart(fig) | |
# 5. Box Plot | |
fig = px.box(data, x=data.columns[0], y=data.columns[1]) | |
st.plotly_chart(fig) | |
st.subheader("Seaborn Charts π") | |
# 6. Violin Plot | |
fig, ax = plt.subplots() | |
sns.violinplot(x=data.columns[0], y=data.columns[1], data=data) | |
st.pyplot(fig) | |
# 7. Swarm Plot | |
fig, ax = plt.subplots() | |
sns.swarmplot(x=data.columns[0], y=data.columns[1], data=data) | |
st.pyplot(fig) | |
# 8. Pair Plot | |
fig = sns.pairplot(data) | |
st.pyplot(fig) | |
# 9. Heatmap | |
fig, ax = plt.subplots() | |
sns.heatmap(data.corr(), annot=True) | |
st.pyplot(fig) | |
# 10. Regplot (Regression Plot) | |
fig, ax = plt.subplots() | |
sns.regplot(x=data.columns[0], y=data.columns[1], data=data) | |
st.pyplot(fig) |