Spaces:

awacke1
/

RT-MLE

Sleeping

App Files Files Community

RT-MLE / app.py

awacke1

Update app.py

9412e3a almost 2 years ago

raw

history blame

3.17 kB

	import streamlit as st
	import json
	import pandas as pd
	import streamlit.components.v1 as components

	# Function to load JSONL file into a DataFrame
	def load_jsonl(file_path):
	data = []
	with open(file_path, 'r') as f:
	for line in f:
	data.append(json.loads(line))
	return pd.DataFrame(data)

	# Function to filter DataFrame by keyword
	def filter_by_keyword(df, keyword):
	return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)]

	# Function to generate HTML with textarea
	def generate_html_with_textarea(text_to_speak):
	return f'''
	<!DOCTYPE html>
	<html>
	<head>
	<title>Read It Aloud</title>
	<script type="text/javascript">
	function readAloud() {{
	const text = document.getElementById("textArea").value;
	const speech = new SpeechSynthesisUtterance(text);
	window.speechSynthesis.speak(speech);
	}}
	</script>
	</head>
	<body>
	<h1>🔊 Read It Aloud</h1>
	<textarea id="textArea" rows="10" cols="80">
	{text_to_speak}
	</textarea>
	<br>
	<button onclick="readAloud()">🔊 Read Aloud</button>
	</body>
	</html>
	'''

	# Streamlit App 🚀
	st.title("USMLE Medical Questions Explorer with Speech Synthesis 🎙")

	# Dropdown for file selection
	file_option = st.selectbox("Select file:", ["usmle_16.2MB.jsonl", "usmle_2.08MB.jsonl"])
	st.write(f"You selected: {file_option}")

	# Load data
	large_data = load_jsonl("usmle_16.2MB.jsonl")
	small_data = load_jsonl("usmle_2.08MB.jsonl")

	data = small_data if file_option == "usmle_16.2MB.jsonl" else small_data

	# Top 20 healthcare terms for USMLE
	top_20_terms = ['Heart', 'Lung', 'Pain', 'Memory', 'Kidney', 'Diabetes', 'Cancer', 'Infection', 'Virus', 'Bacteria', 'Neurology', 'Psychiatry', 'Gastrointestinal', 'Pediatrics', 'Oncology', 'Skin', 'Blood', 'Surgery', 'Epidemiology', 'Genetics']

	# Create Expander and Columns UI for terms
	with st.expander("Search by Common Terms 📚"):
	cols = st.columns(4)
	for term in top_20_terms:
	with cols[top_20_terms.index(term) % 4]:
	if st.button(f"{term}"):
	filtered_data = filter_by_keyword(data, term)
	st.write(f"Filtered Dataset by '{term}' 📊")
	st.dataframe(filtered_data)

	# Text input for search keyword
	search_keyword = st.text_input("Or, enter a keyword to filter data:")
	if st.button("Search 🕵️‍♀️"):
	filtered_data = filter_by_keyword(data, search_keyword)
	st.write(f"Filtered Dataset by '{search_keyword}' 📊")
	st.dataframe(filtered_data)

	# Button to read all filtered rows
	if st.button("Read All Rows 📖"):
	if not filtered_data.empty:
	html_blocks = []
	for idx, row in filtered_data.iterrows():
	question_text = row.get("question", "No question field")
	documentHTML5 = generate_html_with_textarea(question_text)
	html_blocks.append(documentHTML5)
	all_html = ''.join(html_blocks)
	components.html(all_html, width=1280, height=1024)
	else:
	st.warning("No rows to read. 🚨")