Spaces:

awacke1
/

RT-MLE

Sleeping

File size: 3,171 Bytes

4bce033
 
 
af59780
4bce033
 
 
 
 
 
 
 
 
 
 
 
 
9412e3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fddc37
 
9412e3a
3fddc37
 
9412e3a
 
 
4bce033
9412e3a
4bce033
9412e3a
 
725e817
9412e3a
 
 
 
 
 
 
 
 
725e817
9412e3a
 
 
 
 
 
0b8c05e
fefca88
9412e3a
fefca88
 
 
 
9412e3a
fefca88
 
 
3fddc37
9412e3a

import streamlit as st
import json
import pandas as pd
import streamlit.components.v1 as components

# Function to load JSONL file into a DataFrame
def load_jsonl(file_path):
    data = []
    with open(file_path, 'r') as f:
        for line in f:
            data.append(json.loads(line))
    return pd.DataFrame(data)

# Function to filter DataFrame by keyword
def filter_by_keyword(df, keyword):
    return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)]

# Function to generate HTML with textarea
def generate_html_with_textarea(text_to_speak):
    return f'''
    <!DOCTYPE html>
    <html>
    <head>
        <title>Read It Aloud</title>
        <script type="text/javascript">
            function readAloud() {{
                const text = document.getElementById("textArea").value;
                const speech = new SpeechSynthesisUtterance(text);
                window.speechSynthesis.speak(speech);
            }}
        </script>
    </head>
    <body>
        <h1>🔊 Read It Aloud</h1>
        <textarea id="textArea" rows="10" cols="80">
    {text_to_speak}
        </textarea>
        <br>
        <button onclick="readAloud()">🔊 Read Aloud</button>
    </body>
    </html>
    '''

# Streamlit App 🚀
st.title("USMLE Medical Questions Explorer with Speech Synthesis 🎙")

# Dropdown for file selection
file_option = st.selectbox("Select file:", ["usmle_16.2MB.jsonl", "usmle_2.08MB.jsonl"])
st.write(f"You selected: {file_option}")

# Load data
large_data = load_jsonl("usmle_16.2MB.jsonl")
small_data = load_jsonl("usmle_2.08MB.jsonl")

data = small_data if file_option == "usmle_16.2MB.jsonl" else small_data

# Top 20 healthcare terms for USMLE
top_20_terms = ['Heart', 'Lung', 'Pain', 'Memory', 'Kidney', 'Diabetes', 'Cancer', 'Infection', 'Virus', 'Bacteria', 'Neurology', 'Psychiatry', 'Gastrointestinal', 'Pediatrics', 'Oncology', 'Skin', 'Blood', 'Surgery', 'Epidemiology', 'Genetics']

# Create Expander and Columns UI for terms
with st.expander("Search by Common Terms 📚"):
    cols = st.columns(4)
    for term in top_20_terms:
        with cols[top_20_terms.index(term) % 4]:
            if st.button(f"{term}"):
                filtered_data = filter_by_keyword(data, term)
                st.write(f"Filtered Dataset by '{term}' 📊")
                st.dataframe(filtered_data)

# Text input for search keyword
search_keyword = st.text_input("Or, enter a keyword to filter data:")
if st.button("Search 🕵️‍♀️"):
    filtered_data = filter_by_keyword(data, search_keyword)
    st.write(f"Filtered Dataset by '{search_keyword}' 📊")
    st.dataframe(filtered_data)

# Button to read all filtered rows
if st.button("Read All Rows 📖"):
    if not filtered_data.empty:
        html_blocks = []
        for idx, row in filtered_data.iterrows():
            question_text = row.get("question", "No question field")
            documentHTML5 = generate_html_with_textarea(question_text)
            html_blocks.append(documentHTML5)
        all_html = ''.join(html_blocks)
        components.html(all_html, width=1280, height=1024)
    else:
        st.warning("No rows to read. 🚨")