making plots better at handling initial state
Browse files
@@ -9,34 +9,46 @@ from plotly.subplots import make_subplots
9 |
from matplotlib import pyplot as plt
10 |
from wordcloud import WordCloud
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
Helper functions for Plotly charts
33 |
34 |
35 |
36 |
37 |
if potus
38 |
# Filter on the potus
39 |
potus_df =
40 |
# Create a counter generator for the n-grams
41 |
trigrams = (
42 |
@@ -63,59 +75,58 @@ def plotly_ngrams(n_grams, potus):
63 |
return fig4
64 |
65 |
66 |
def plotly_word_and_ari(president):
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
return fig6
115 |
116 |
117 |
# Create a Gradio interface with blocks
118 |
with gr.Blocks() as demo:
119 |
# Build out the top level static charts and content
120 |
121 |
@@ -214,23 +225,24 @@ with gr.Blocks() as demo:
214 |
215 |
# get all unique president names
216 |
presidents = df["potus"].unique()
217 |
# convert presidents to a list
218 |
presidents = presidents.tolist()
219 |
# create a dropdown to select a president
220 |
president = gr.Dropdown(label="Select a President", choices=presidents)
221 |
# create a slider for number of word grams
222 |
grams = gr.Slider(
223 |
224 |
# show a bar chart of the top n-grams for a selected president
225 |
226 |
gr.Plot(plotly_ngrams, inputs=[grams, president])
227 |
228 |
229 |
gr.Plot(plt_wordcloud, scale=2, inputs=[president])
230 |
231 |
# show a line chart of word count and ARI for a selected president
232 |
233 |
gr.Plot(plotly_word_and_ari, inputs=[president])
234 |
235 |
236 |
9 |
from matplotlib import pyplot as plt
10 |
from wordcloud import WordCloud
11 |
12 |
13 |
def load_transform_dataset():
14 |
# Load the dataset and convert it to a Pandas dataframe
15 |
sotu_dataset = "jsulz/state-of-the-union-addresses"
16 |
dataset = load_dataset(sotu_dataset)
17 |
df = dataset["train"].to_pandas()
18 |
# Do some on-the-fly calculations
19 |
# calcualte the number of words in each address
20 |
df["word_count"] = df["speech_html"].apply(lambda x: len(x.split()))
21 |
# calculate the automated readibility index reading ease score for each address
22 |
# automated readability index = 4.71 * (characters/words) + 0.5 * (words/sentences) - 21.43
23 |
df["ari"] = df["no-contractions"].apply(
24 |
lambda x: (4.71 * (len(x.replace(" ", "")) / len(x.split())))
25 |
+ (0.5 * (len(x.split()) / len(x.split("."))))
26 |
- 21.43
27 |
28 |
# Sort the dataframe by date because Plotly doesn't do any of this automatically
29 |
df = df.sort_values(by="date")
30 |
written = df[df["categories"] == "Written"]
31 |
spoken = df[df["categories"] == "Spoken"]
32 |
return df, written, spoken
33 |
34 |
35 |
36 |
Helper functions for Plotly charts
37 |
38 |
39 |
40 |
def filter_potus(potus, _df):
41 |
if potus != "All":
42 |
# Filter on the potus
43 |
potus_df = _df[_df["potus"] == potus]
44 |
45 |
potus_df = _df
46 |
return potus_df
47 |
48 |
49 |
def plotly_ngrams(n_grams, potus, _df):
50 |
if potus is not None:
51 |
potus_df = filter_potus(potus, _df)
52 |
# Create a counter generator for the n-grams
53 |
trigrams = (
54 |
75 |
return fig4
76 |
77 |
78 |
def plotly_word_and_ari(president, _df):
79 |
potus_df = filter_potus(president, _df)
80 |
fig5 = make_subplots(specs=[[{"secondary_y": True}]])
81 |
82 |
83 |
84 |
85 |
name="Word Count",
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
# Add figure title
98 |
fig5.update_layout(title_text="Address Word Count and ARI")
99 |
100 |
# Set x-axis title
101 |
fig5.update_xaxes(title_text="Date of Address")
102 |
103 |
# Set y-axes titles
104 |
fig5.update_yaxes(title_text="Word Count", secondary_y=False)
105 |
fig5.update_yaxes(title_text="ARI", secondary_y=True)
106 |
return fig5
107 |
108 |
109 |
def plt_wordcloud(president, _df):
110 |
potus_df = filter_potus(president, _df)
111 |
lemmatized = potus_df["lemmatized"].apply(lambda x: " ".join(x))
112 |
# build a single string from lemmatized
113 |
lemmatized = " ".join(lemmatized)
114 |
# create a wordcloud from the lemmatized column of the dataframe
115 |
wordcloud = WordCloud(background_color="white", width=800, height=400).generate(
116 |
117 |
118 |
# create a matplotlib figure
119 |
fig6 = plt.figure(figsize=(8, 4))
120 |
# add the wordcloud to the figure
121 |
122 |
plt.imshow(wordcloud, interpolation="bilinear")
123 |
124 |
return fig6
125 |
126 |
127 |
# Create a Gradio interface with blocks
128 |
with gr.Blocks() as demo:
129 |
df, written, spoken = load_transform_dataset()
130 |
# Build out the top level static charts and content
131 |
132 |
225 |
226 |
# get all unique president names
227 |
presidents = df["potus"].unique()
228 |
presidents = presidents.tolist()
229 |
230 |
# create a dropdown to select a president
231 |
president = gr.Dropdown(label="Select a President", choices=presidents, value="All")
232 |
# create a slider for number of word grams
233 |
grams = gr.Slider(
234 |
minimum=1, maximum=4, step=1, label="N-grams", interactive=True, value=1
235 |
236 |
237 |
df_state = gr.State(df)
238 |
239 |
# show a bar chart of the top n-grams for a selected president
240 |
gr.Plot(plotly_ngrams, inputs=[grams, president, df_state])
241 |
242 |
gr.Plot(plt_wordcloud, scale=2, inputs=[president, df_state])
243 |
244 |
# show a line chart of word count and ARI for a selected president
245 |
gr.Plot(plotly_word_and_ari, inputs=[president, df_state])
246 |
247 |
248 |