jsulz HF staff commited on
Commit
d736278
·
1 Parent(s): 65c7ea9

updating to qwen model

Browse files
Files changed (1) hide show
  1. app.py +27 -17
app.py CHANGED
@@ -163,26 +163,26 @@ def summarization(speech_key, _df):
163
  def streaming(speech_key, _df):
164
  client = InferenceClient(token=os.environ["HF_TOKEN"])
165
  speech = _df[_df["speech_key"] == speech_key]["speech_html"].values[0]
166
- potus = speech_key.split(" - ")[0]
167
  messages = []
168
  for message in client.chat_completion(
169
- model="meta-llama/Llama-3.1-8B-Instruct",
170
  messages=[
171
  {
172
  "role": "system",
173
- "content": "You are a legal scholar speaking to a class.",
174
  },
175
  {
176
  "role": "user",
177
- "content": f"The following speech is a State of the Union address from {potus}. Summarize it: {speech}",
178
  },
179
  ],
180
- max_tokens=1000,
181
- stream=False,
182
  ):
183
  # yield message.choices[0].delta.content
184
- print(message)
185
- # messages.append(message.choices[0].delta.content)
186
  return "".join(messages)
187
 
188
 
@@ -195,10 +195,14 @@ with gr.Blocks() as demo:
195
  # Build out the top level static charts and content
196
  gr.Markdown(
197
  """
198
- # A Dashboard to Analyze the State of the Union Addresses
199
  This dashboard provides an analysis of all State of the Union (SOTU) addresses from 1790 to 2020 including written and spoken addresses. The data is sourced from the [State of the Union Addresses dataset](https://huggingface.co/datasets/jsulz/state-of-the-union-addresses) on the Hugging Face Datasets Hub. You can read more about how the data was gathered and cleaned on the dataset card. To read the speeches, you can visit the [The American Presidency Project's State of the Union page](https://www.presidency.ucsb.edu/documents/presidential-documents-archive-guidebook/annual-messages-congress-the-state-the-union) where this data was sourced.
200
  """
201
  )
 
 
 
 
202
  # Basic line chart showing the total number of words in each address
203
  with gr.Row():
204
  gr.Markdown(
@@ -273,14 +277,6 @@ with gr.Blocks() as demo:
273
  The drop off is quite noticeable, don't you think? ;)
274
  """
275
  )
276
- gr.Markdown("## Summarize a Speech")
277
- speeches = df["speech_key"].unique()
278
- speeches = speeches.tolist()
279
- speech = gr.Dropdown(label="Select a Speech", choices=speeches)
280
- # create a dropdown to select a speech from a president
281
- run_summarization = gr.Button(value="Summarize")
282
- fin_speech = gr.Textbox(label="Summarized Speech", type="text", lines=10)
283
- run_summarization.click(streaming, inputs=[speech, df_state], outputs=[fin_speech])
284
  gr.Markdown(
285
  """
286
  ## Dive Deeper on Each President
@@ -317,5 +313,19 @@ with gr.Blocks() as demo:
317
  # show a line chart of word count and ARI for a selected president
318
  gr.Plot(plotly_word_and_ari, inputs=[president, df_state])
319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
 
321
  demo.launch()
 
163
  def streaming(speech_key, _df):
164
  client = InferenceClient(token=os.environ["HF_TOKEN"])
165
  speech = _df[_df["speech_key"] == speech_key]["speech_html"].values[0]
166
+ speech_info = speech_key.split(" - ")
167
  messages = []
168
  for message in client.chat_completion(
169
+ model="Qwen/Qwen2.5-72B-Instruct",
170
  messages=[
171
  {
172
  "role": "system",
173
+ "content": "You are a political scholar with a deep knowledge of State of the Union addresses. You are tasked with summarizing a speech from a given president. The speech is a mix of written and spoken addresses. The goal is to provide a concise summary of the speech with the proper historical and political context.",
174
  },
175
  {
176
  "role": "user",
177
+ "content": f"The following speech is a State of the Union address from {speech_info[0]} on {speech_info[1]}. Summarize it: {speech}",
178
  },
179
  ],
180
+ max_tokens=700,
181
+ stream=True,
182
  ):
183
  # yield message.choices[0].delta.content
184
+ # print(message)
185
+ messages.append(message.choices[0].delta.content)
186
  return "".join(messages)
187
 
188
 
 
195
  # Build out the top level static charts and content
196
  gr.Markdown(
197
  """
198
+ # An Interactive Dashboard for State of the Union Addresses
199
  This dashboard provides an analysis of all State of the Union (SOTU) addresses from 1790 to 2020 including written and spoken addresses. The data is sourced from the [State of the Union Addresses dataset](https://huggingface.co/datasets/jsulz/state-of-the-union-addresses) on the Hugging Face Datasets Hub. You can read more about how the data was gathered and cleaned on the dataset card. To read the speeches, you can visit the [The American Presidency Project's State of the Union page](https://www.presidency.ucsb.edu/documents/presidential-documents-archive-guidebook/annual-messages-congress-the-state-the-union) where this data was sourced.
200
  """
201
  )
202
+
203
+ gr.Markdown(
204
+ "In addition to analyzing the content, this space also leverages the Qwen/2.5-72B-Instruct model to summarize a speech. The model is tasked with providing a concise summary of a speech from a given president. The speech is a mix of written and spoken addresses. The goal is to provide a concise summary of the speech with the proper historical and political context."
205
+ )
206
  # Basic line chart showing the total number of words in each address
207
  with gr.Row():
208
  gr.Markdown(
 
277
  The drop off is quite noticeable, don't you think? ;)
278
  """
279
  )
 
 
 
 
 
 
 
 
280
  gr.Markdown(
281
  """
282
  ## Dive Deeper on Each President
 
313
  # show a line chart of word count and ARI for a selected president
314
  gr.Plot(plotly_word_and_ari, inputs=[president, df_state])
315
 
316
+ gr.Markdown("## Summarize a Speech")
317
+ gr.HTML("<div id=summarize-anchor></div>")
318
+ gr.Markdown(
319
+ """
320
+ Use the dropdown to select a speech from a president and click the button to summarize the speech. The model will provide a concise summary of the speech with the proper historical and political context.
321
+ """
322
+ )
323
+ speeches = df["speech_key"].unique()
324
+ speeches = speeches.tolist()
325
+ speech = gr.Dropdown(label="Select a Speech", choices=speeches)
326
+ # create a dropdown to select a speech from a president
327
+ run_summarization = gr.Button(value="Summarize")
328
+ fin_speech = gr.Textbox(label="Summarized Speech", type="text", lines=10)
329
+ run_summarization.click(streaming, inputs=[speech, df_state], outputs=[fin_speech])
330
 
331
  demo.launch()