Spaces:
Sleeping
Sleeping
File size: 3,171 Bytes
4bce033 af59780 4bce033 9412e3a 3fddc37 9412e3a 3fddc37 9412e3a 4bce033 9412e3a 4bce033 9412e3a 725e817 9412e3a 725e817 9412e3a 0b8c05e fefca88 9412e3a fefca88 9412e3a fefca88 3fddc37 9412e3a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import streamlit as st
import json
import pandas as pd
import streamlit.components.v1 as components
# Function to load JSONL file into a DataFrame
def load_jsonl(file_path):
data = []
with open(file_path, 'r') as f:
for line in f:
data.append(json.loads(line))
return pd.DataFrame(data)
# Function to filter DataFrame by keyword
def filter_by_keyword(df, keyword):
return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)]
# Function to generate HTML with textarea
def generate_html_with_textarea(text_to_speak):
return f'''
<!DOCTYPE html>
<html>
<head>
<title>Read It Aloud</title>
<script type="text/javascript">
function readAloud() {{
const text = document.getElementById("textArea").value;
const speech = new SpeechSynthesisUtterance(text);
window.speechSynthesis.speak(speech);
}}
</script>
</head>
<body>
<h1>π Read It Aloud</h1>
<textarea id="textArea" rows="10" cols="80">
{text_to_speak}
</textarea>
<br>
<button onclick="readAloud()">π Read Aloud</button>
</body>
</html>
'''
# Streamlit App π
st.title("USMLE Medical Questions Explorer with Speech Synthesis π")
# Dropdown for file selection
file_option = st.selectbox("Select file:", ["usmle_16.2MB.jsonl", "usmle_2.08MB.jsonl"])
st.write(f"You selected: {file_option}")
# Load data
large_data = load_jsonl("usmle_16.2MB.jsonl")
small_data = load_jsonl("usmle_2.08MB.jsonl")
data = small_data if file_option == "usmle_16.2MB.jsonl" else small_data
# Top 20 healthcare terms for USMLE
top_20_terms = ['Heart', 'Lung', 'Pain', 'Memory', 'Kidney', 'Diabetes', 'Cancer', 'Infection', 'Virus', 'Bacteria', 'Neurology', 'Psychiatry', 'Gastrointestinal', 'Pediatrics', 'Oncology', 'Skin', 'Blood', 'Surgery', 'Epidemiology', 'Genetics']
# Create Expander and Columns UI for terms
with st.expander("Search by Common Terms π"):
cols = st.columns(4)
for term in top_20_terms:
with cols[top_20_terms.index(term) % 4]:
if st.button(f"{term}"):
filtered_data = filter_by_keyword(data, term)
st.write(f"Filtered Dataset by '{term}' π")
st.dataframe(filtered_data)
# Text input for search keyword
search_keyword = st.text_input("Or, enter a keyword to filter data:")
if st.button("Search π΅οΈββοΈ"):
filtered_data = filter_by_keyword(data, search_keyword)
st.write(f"Filtered Dataset by '{search_keyword}' π")
st.dataframe(filtered_data)
# Button to read all filtered rows
if st.button("Read All Rows π"):
if not filtered_data.empty:
html_blocks = []
for idx, row in filtered_data.iterrows():
question_text = row.get("question", "No question field")
documentHTML5 = generate_html_with_textarea(question_text)
html_blocks.append(documentHTML5)
all_html = ''.join(html_blocks)
components.html(all_html, width=1280, height=1024)
else:
st.warning("No rows to read. π¨")
|