Spaces:
Sleeping
Sleeping
updating to qwen model
Browse files
app.py
CHANGED
@@ -163,26 +163,26 @@ def summarization(speech_key, _df):
|
|
163 |
def streaming(speech_key, _df):
|
164 |
client = InferenceClient(token=os.environ["HF_TOKEN"])
|
165 |
speech = _df[_df["speech_key"] == speech_key]["speech_html"].values[0]
|
166 |
-
|
167 |
messages = []
|
168 |
for message in client.chat_completion(
|
169 |
-
model="
|
170 |
messages=[
|
171 |
{
|
172 |
"role": "system",
|
173 |
-
"content": "You are a
|
174 |
},
|
175 |
{
|
176 |
"role": "user",
|
177 |
-
"content": f"The following speech is a State of the Union address from {
|
178 |
},
|
179 |
],
|
180 |
-
max_tokens=
|
181 |
-
stream=
|
182 |
):
|
183 |
# yield message.choices[0].delta.content
|
184 |
-
print(message)
|
185 |
-
|
186 |
return "".join(messages)
|
187 |
|
188 |
|
@@ -195,10 +195,14 @@ with gr.Blocks() as demo:
|
|
195 |
# Build out the top level static charts and content
|
196 |
gr.Markdown(
|
197 |
"""
|
198 |
-
#
|
199 |
This dashboard provides an analysis of all State of the Union (SOTU) addresses from 1790 to 2020 including written and spoken addresses. The data is sourced from the [State of the Union Addresses dataset](https://huggingface.co/datasets/jsulz/state-of-the-union-addresses) on the Hugging Face Datasets Hub. You can read more about how the data was gathered and cleaned on the dataset card. To read the speeches, you can visit the [The American Presidency Project's State of the Union page](https://www.presidency.ucsb.edu/documents/presidential-documents-archive-guidebook/annual-messages-congress-the-state-the-union) where this data was sourced.
|
200 |
"""
|
201 |
)
|
|
|
|
|
|
|
|
|
202 |
# Basic line chart showing the total number of words in each address
|
203 |
with gr.Row():
|
204 |
gr.Markdown(
|
@@ -273,14 +277,6 @@ with gr.Blocks() as demo:
|
|
273 |
The drop off is quite noticeable, don't you think? ;)
|
274 |
"""
|
275 |
)
|
276 |
-
gr.Markdown("## Summarize a Speech")
|
277 |
-
speeches = df["speech_key"].unique()
|
278 |
-
speeches = speeches.tolist()
|
279 |
-
speech = gr.Dropdown(label="Select a Speech", choices=speeches)
|
280 |
-
# create a dropdown to select a speech from a president
|
281 |
-
run_summarization = gr.Button(value="Summarize")
|
282 |
-
fin_speech = gr.Textbox(label="Summarized Speech", type="text", lines=10)
|
283 |
-
run_summarization.click(streaming, inputs=[speech, df_state], outputs=[fin_speech])
|
284 |
gr.Markdown(
|
285 |
"""
|
286 |
## Dive Deeper on Each President
|
@@ -317,5 +313,19 @@ with gr.Blocks() as demo:
|
|
317 |
# show a line chart of word count and ARI for a selected president
|
318 |
gr.Plot(plotly_word_and_ari, inputs=[president, df_state])
|
319 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
320 |
|
321 |
demo.launch()
|
|
|
163 |
def streaming(speech_key, _df):
|
164 |
client = InferenceClient(token=os.environ["HF_TOKEN"])
|
165 |
speech = _df[_df["speech_key"] == speech_key]["speech_html"].values[0]
|
166 |
+
speech_info = speech_key.split(" - ")
|
167 |
messages = []
|
168 |
for message in client.chat_completion(
|
169 |
+
model="Qwen/Qwen2.5-72B-Instruct",
|
170 |
messages=[
|
171 |
{
|
172 |
"role": "system",
|
173 |
+
"content": "You are a political scholar with a deep knowledge of State of the Union addresses. You are tasked with summarizing a speech from a given president. The speech is a mix of written and spoken addresses. The goal is to provide a concise summary of the speech with the proper historical and political context.",
|
174 |
},
|
175 |
{
|
176 |
"role": "user",
|
177 |
+
"content": f"The following speech is a State of the Union address from {speech_info[0]} on {speech_info[1]}. Summarize it: {speech}",
|
178 |
},
|
179 |
],
|
180 |
+
max_tokens=700,
|
181 |
+
stream=True,
|
182 |
):
|
183 |
# yield message.choices[0].delta.content
|
184 |
+
# print(message)
|
185 |
+
messages.append(message.choices[0].delta.content)
|
186 |
return "".join(messages)
|
187 |
|
188 |
|
|
|
195 |
# Build out the top level static charts and content
|
196 |
gr.Markdown(
|
197 |
"""
|
198 |
+
# An Interactive Dashboard for State of the Union Addresses
|
199 |
This dashboard provides an analysis of all State of the Union (SOTU) addresses from 1790 to 2020 including written and spoken addresses. The data is sourced from the [State of the Union Addresses dataset](https://huggingface.co/datasets/jsulz/state-of-the-union-addresses) on the Hugging Face Datasets Hub. You can read more about how the data was gathered and cleaned on the dataset card. To read the speeches, you can visit the [The American Presidency Project's State of the Union page](https://www.presidency.ucsb.edu/documents/presidential-documents-archive-guidebook/annual-messages-congress-the-state-the-union) where this data was sourced.
|
200 |
"""
|
201 |
)
|
202 |
+
|
203 |
+
gr.Markdown(
|
204 |
+
"In addition to analyzing the content, this space also leverages the Qwen/2.5-72B-Instruct model to summarize a speech. The model is tasked with providing a concise summary of a speech from a given president. The speech is a mix of written and spoken addresses. The goal is to provide a concise summary of the speech with the proper historical and political context."
|
205 |
+
)
|
206 |
# Basic line chart showing the total number of words in each address
|
207 |
with gr.Row():
|
208 |
gr.Markdown(
|
|
|
277 |
The drop off is quite noticeable, don't you think? ;)
|
278 |
"""
|
279 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
gr.Markdown(
|
281 |
"""
|
282 |
## Dive Deeper on Each President
|
|
|
313 |
# show a line chart of word count and ARI for a selected president
|
314 |
gr.Plot(plotly_word_and_ari, inputs=[president, df_state])
|
315 |
|
316 |
+
gr.Markdown("## Summarize a Speech")
|
317 |
+
gr.HTML("<div id=summarize-anchor></div>")
|
318 |
+
gr.Markdown(
|
319 |
+
"""
|
320 |
+
Use the dropdown to select a speech from a president and click the button to summarize the speech. The model will provide a concise summary of the speech with the proper historical and political context.
|
321 |
+
"""
|
322 |
+
)
|
323 |
+
speeches = df["speech_key"].unique()
|
324 |
+
speeches = speeches.tolist()
|
325 |
+
speech = gr.Dropdown(label="Select a Speech", choices=speeches)
|
326 |
+
# create a dropdown to select a speech from a president
|
327 |
+
run_summarization = gr.Button(value="Summarize")
|
328 |
+
fin_speech = gr.Textbox(label="Summarized Speech", type="text", lines=10)
|
329 |
+
run_summarization.click(streaming, inputs=[speech, df_state], outputs=[fin_speech])
|
330 |
|
331 |
demo.launch()
|