File size: 3,171 Bytes
4bce033
 
 
af59780
4bce033
 
 
 
 
 
 
 
 
 
 
 
 
9412e3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fddc37
 
9412e3a
3fddc37
 
9412e3a
 
 
4bce033
9412e3a
4bce033
9412e3a
 
725e817
9412e3a
 
 
 
 
 
 
 
 
725e817
9412e3a
 
 
 
 
 
0b8c05e
fefca88
9412e3a
fefca88
 
 
 
9412e3a
fefca88
 
 
3fddc37
9412e3a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import streamlit as st
import json
import pandas as pd
import streamlit.components.v1 as components

# Function to load JSONL file into a DataFrame
def load_jsonl(file_path):
    data = []
    with open(file_path, 'r') as f:
        for line in f:
            data.append(json.loads(line))
    return pd.DataFrame(data)

# Function to filter DataFrame by keyword
def filter_by_keyword(df, keyword):
    return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)]

# Function to generate HTML with textarea
def generate_html_with_textarea(text_to_speak):
    return f'''
    <!DOCTYPE html>
    <html>
    <head>
        <title>Read It Aloud</title>
        <script type="text/javascript">
            function readAloud() {{
                const text = document.getElementById("textArea").value;
                const speech = new SpeechSynthesisUtterance(text);
                window.speechSynthesis.speak(speech);
            }}
        </script>
    </head>
    <body>
        <h1>πŸ”Š Read It Aloud</h1>
        <textarea id="textArea" rows="10" cols="80">
    {text_to_speak}
        </textarea>
        <br>
        <button onclick="readAloud()">πŸ”Š Read Aloud</button>
    </body>
    </html>
    '''

# Streamlit App πŸš€
st.title("USMLE Medical Questions Explorer with Speech Synthesis πŸŽ™")

# Dropdown for file selection
file_option = st.selectbox("Select file:", ["usmle_16.2MB.jsonl", "usmle_2.08MB.jsonl"])
st.write(f"You selected: {file_option}")

# Load data
large_data = load_jsonl("usmle_16.2MB.jsonl")
small_data = load_jsonl("usmle_2.08MB.jsonl")

data = small_data if file_option == "usmle_16.2MB.jsonl" else small_data

# Top 20 healthcare terms for USMLE
top_20_terms = ['Heart', 'Lung', 'Pain', 'Memory', 'Kidney', 'Diabetes', 'Cancer', 'Infection', 'Virus', 'Bacteria', 'Neurology', 'Psychiatry', 'Gastrointestinal', 'Pediatrics', 'Oncology', 'Skin', 'Blood', 'Surgery', 'Epidemiology', 'Genetics']

# Create Expander and Columns UI for terms
with st.expander("Search by Common Terms πŸ“š"):
    cols = st.columns(4)
    for term in top_20_terms:
        with cols[top_20_terms.index(term) % 4]:
            if st.button(f"{term}"):
                filtered_data = filter_by_keyword(data, term)
                st.write(f"Filtered Dataset by '{term}' πŸ“Š")
                st.dataframe(filtered_data)

# Text input for search keyword
search_keyword = st.text_input("Or, enter a keyword to filter data:")
if st.button("Search πŸ•΅οΈβ€β™€οΈ"):
    filtered_data = filter_by_keyword(data, search_keyword)
    st.write(f"Filtered Dataset by '{search_keyword}' πŸ“Š")
    st.dataframe(filtered_data)

# Button to read all filtered rows
if st.button("Read All Rows πŸ“–"):
    if not filtered_data.empty:
        html_blocks = []
        for idx, row in filtered_data.iterrows():
            question_text = row.get("question", "No question field")
            documentHTML5 = generate_html_with_textarea(question_text)
            html_blocks.append(documentHTML5)
        all_html = ''.join(html_blocks)
        components.html(all_html, width=1280, height=1024)
    else:
        st.warning("No rows to read. 🚨")