File size: 5,229 Bytes
ce2d794
 
 
 
 
 
4a66f10
 
 
 
 
 
 
ce2d794
0ac3298
ce2d794
 
 
 
 
 
 
 
 
 
 
 
 
4a66f10
ce2d794
 
 
 
 
4a66f10
 
 
 
ce2d794
 
 
 
 
 
8cebd63
 
 
 
 
 
 
4a66f10
 
0ac3298
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a66f10
0ac3298
 
ce2d794
0ac3298
 
 
 
 
 
ce2d794
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a66f10
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import streamlit as st
import json
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import streamlit.components.v1 as components

# Global variable to hold selected row index
selected_row_index = None

# Initialize an empty DataFrame
filtered_data = pd.DataFrame()


# Function to load JSONL file into a DataFrame
def load_jsonl(file_path):
    data = []
    with open(file_path, 'r') as f:
        for line in f:
            data.append(json.loads(line))
    return pd.DataFrame(data)

# Function to filter DataFrame by keyword
def filter_by_keyword(df, keyword):
    return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)]

# Streamlit App
st.title("Medical Licensing Exam Explorer with Speech Synthesis, Plotly and Seaborn 📊")

# Dropdown for file selection
file_option = st.selectbox("Select file:", ["small_file.jsonl", "large_file.jsonl"])
st.write(f"You selected: {file_option}")

# Load the data
small_data = load_jsonl("usmle_16.2MB.jsonl")
large_data = load_jsonl("usmle_2.08MB.jsonl")

# Show filtered data grid
if file_option == "small_file.jsonl":
    data = small_data
else:
    data = large_data

# Text input for search keyword
search_keyword = st.text_input("Enter a keyword to filter data (e.g., Heart, Lung, Pain, Memory):")

# Button to trigger search
if st.button("Search"):
    filtered_data = filter_by_keyword(data, search_keyword)
    st.write(f"Filtered Dataset by '{search_keyword}'")
    selected_data = st.dataframe(filtered_data)



def generate_html_with_textarea(text_to_speak):
    return f'''
<!DOCTYPE html>
<html>
<head>
    <title>Read It Aloud</title>
    <script type="text/javascript">
        function readAloud() {{
            const text = document.getElementById("textArea").value;
            const speech = new SpeechSynthesisUtterance(text);
            window.speechSynthesis.speak(speech);
        }}
    </script>
</head>
<body>
    <h1>🔊 Read It Aloud</h1>
    <textarea id="textArea" rows="10" cols="80">
{text_to_speak}
    </textarea>
    <br>
    <button onclick="readAloud()">🔊 Read Aloud</button>
</body>
</html>
    '''

# Define your text passage
text_passage = "A 60-year-old man is brought to the emergency department by police officers because he was acting strangely in public. The patient was found talking nonsensically to characters on cereal boxes in the store. Past medical history is significant for multiple hospitalizations for alcohol-related injuries and seizures. The patient’s vital signs are within normal limits. Physical examination shows a disheveled male who is oriented to person, but not time or place. Neurologic examination shows nystagmus and severe gait ataxia. A T1/T2 MRI is performed and demonstrates evidence of damage to the mammillary bodies. The patient is given the appropriate treatment for recovering most of his cognitive functions. However, significant short-term memory deficits persist. The patient remembers events from his past such as the school and college he attended, his current job, and the names of family members quite well. Which of the following is the most likely diagnosis in this patient?"

# Generate HTML code
documentHTML5 = generate_html_with_textarea(text_passage)

    
# Button to read all filtered rows
if st.button("Read All Rows"):
    if not filtered_data.empty:
        html_blocks = []
        for idx, row in filtered_data.iterrows():
            question_text = row.get("question", "No question field")
            documentHTML5 = generate_html(question_text, "", idx)
            html_blocks.append(documentHTML5)
        all_html = ''.join(html_blocks)
        components.html(all_html, width=1280, height=1024)
    else:
        st.warning("No rows to read.")


# Insert the HTML into Streamlit
# Button to read all filtered rows
if st.button("Read Aloud Text"):
    components.html(documentHTML5, width=1280, height=1024)

        
# Plotly and Seaborn charts for EDA
if st.button("Generate Charts"):
    st.subheader("Plotly Charts 📈")

    # 1. Scatter Plot
    fig = px.scatter(data, x=data.columns[0], y=data.columns[1])
    st.plotly_chart(fig)

    # 2. Line Plot
    fig = px.line(data, x=data.columns[0], y=data.columns[1])
    st.plotly_chart(fig)

    # 3. Bar Plot
    fig = px.bar(data, x=data.columns[0], y=data.columns[1])
    st.plotly_chart(fig)

    # 4. Histogram
    fig = px.histogram(data, x=data.columns[0])
    st.plotly_chart(fig)

    # 5. Box Plot
    fig = px.box(data, x=data.columns[0], y=data.columns[1])
    st.plotly_chart(fig)

    st.subheader("Seaborn Charts 📊")

    # 6. Violin Plot
    fig, ax = plt.subplots()
    sns.violinplot(x=data.columns[0], y=data.columns[1], data=data)
    st.pyplot(fig)

    # 7. Swarm Plot
    fig, ax = plt.subplots()
    sns.swarmplot(x=data.columns[0], y=data.columns[1], data=data)
    st.pyplot(fig)

    # 8. Pair Plot
    fig = sns.pairplot(data)
    st.pyplot(fig)

    # 9. Heatmap
    fig, ax = plt.subplots()
    sns.heatmap(data.corr(), annot=True)
    st.pyplot(fig)

    # 10. Regplot (Regression Plot)
    fig, ax = plt.subplots()
    sns.regplot(x=data.columns[0], y=data.columns[1], data=data)
    st.pyplot(fig)