Shreyas094 commited on
Commit
7d6eec9
1 Parent(s): d60fab0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -74
app.py CHANGED
@@ -42,7 +42,7 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
42
  start = 0
43
  all_results = []
44
  max_chars_per_page = 8000 # Limit the number of characters from each webpage to stay under the token limit
45
-
46
  with requests.Session() as session:
47
  while start < num_results:
48
  print(f"Fetching search results starting from: {start}")
@@ -53,7 +53,7 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
53
  'User-Agent': user_agent
54
  }
55
  print(f"Using User-Agent: {headers['User-Agent']}")
56
-
57
  resp = session.get(
58
  url="https://www.google.com/search",
59
  headers=headers,
@@ -71,7 +71,7 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
71
  except requests.exceptions.RequestException as e:
72
  print(f"Error fetching search results: {e}")
73
  break
74
-
75
  soup = BeautifulSoup(resp.text, "html.parser")
76
  result_block = soup.find_all("div", attrs={"class": "g"})
77
  if not result_block:
@@ -106,9 +106,9 @@ def format_prompt(query, search_results, instructions):
106
  link = result["link"]
107
  text = result["text"]
108
  if link:
109
- formatted_results += f"URL: {link}\nContent: {text}\n{'-'*80}\n"
110
  else:
111
- formatted_results += "No link found.\n" + '-'*80 + '\n'
112
 
113
  prompt = f"{instructions}User Query: {query}\n\nWeb Search Results:\n{formatted_results}\n\nAssistant:"
114
  return prompt
@@ -232,101 +232,55 @@ def save_text_to_pdf(text, output_path):
232
  doc.save(output_path) # Save the PDF to the specified path
233
  print("PDF saved successfully.")
234
 
235
- def get_predefined_queries(company):
236
- return [
237
- f"Recent earnings for {company}",
238
- f"Recent News on {company}",
239
- f"Recent Credit rating of {company}",
240
- f"Recent conference call transcript of {company}"
241
- ]
242
-
243
-
244
  # Integrated function to perform web scraping, formatting, and text generation
245
- def scrape_and_display(query, num_results, earnings_instructions, news_instructions,
246
- credit_rating_instructions, conference_call_instructions, final_instructions,
247
- web_search=True, temperature=0.7, repetition_penalty=1.0, top_p=0.9):
248
  print(f"Scraping and displaying results for query: {query} with num_results: {num_results}")
249
-
250
  if web_search:
251
- company = query.strip()
252
- predefined_queries = get_predefined_queries(company)
253
- all_results = []
254
- all_summaries = []
255
-
256
- instructions = [earnings_instructions, news_instructions, credit_rating_instructions, conference_call_instructions]
257
-
258
- for pq, instruction in zip(predefined_queries, instructions):
259
- search_results = google_search(pq, num_results=num_results // len(predefined_queries))
260
- all_results.extend(search_results)
261
-
262
- # Generate a summary for each predefined query
263
- formatted_prompt = format_prompt(pq, search_results, instruction)
264
- summary = generate_text(formatted_prompt, temperature=temperature, repetition_penalty=repetition_penalty, top_p=top_p)
265
- all_summaries.append(summary)
266
-
267
- # Combine all summaries
268
- combined_summary = "\n\n".join(all_summaries)
269
-
270
- # Generate final summary using the combined results and final instructions
271
- final_prompt = f"{final_instructions}\n\nHere are the summaries for each aspect of {company}:\n\n{combined_summary}\n\nPlease provide a comprehensive summary based on the above information:"
272
- generated_summary = generate_text(final_prompt, temperature=temperature, repetition_penalty=repetition_penalty, top_p=top_p)
273
  else:
274
- formatted_prompt = format_prompt_with_instructions(query, final_instructions)
275
  generated_summary = generate_text(formatted_prompt, temperature=temperature, repetition_penalty=repetition_penalty, top_p=top_p)
276
-
277
  print("Scraping and display complete.")
278
  if generated_summary:
 
279
  assistant_index = generated_summary.find("Assistant:")
280
  if assistant_index != -1:
281
  generated_summary = generated_summary[assistant_index:]
282
  else:
283
  generated_summary = "Assistant: No response generated."
284
- print(f"Generated summary: {generated_summary}")
285
  return generated_summary
286
 
287
-
288
  # Main Gradio interface function
289
- def gradio_interface(query, use_pdf, pdf, num_results, earnings_instructions, news_instructions,
290
- credit_rating_instructions, conference_call_instructions, final_instructions,
291
- temperature, repetition_penalty, top_p):
292
  if use_pdf and pdf is not None:
293
  pdf_text = read_pdf(pdf)
294
- generated_summary = scrape_and_display(pdf_text, num_results=0, instructions=final_instructions,
295
- web_search=False, temperature=temperature,
296
- repetition_penalty=repetition_penalty, top_p=top_p)
297
  else:
298
- generated_summary = scrape_and_display(query, num_results=num_results,
299
- earnings_instructions=earnings_instructions,
300
- news_instructions=news_instructions,
301
- credit_rating_instructions=credit_rating_instructions,
302
- conference_call_instructions=conference_call_instructions,
303
- final_instructions=final_instructions,
304
- web_search=True, temperature=temperature,
305
- repetition_penalty=repetition_penalty, top_p=top_p)
306
-
307
  output_pdf_path = "output_summary.pdf"
308
  save_text_to_pdf(generated_summary, output_pdf_path)
309
-
310
  return generated_summary, output_pdf_path
311
 
312
- # Update the Gradio Interface
313
  gr.Interface(
314
  fn=gradio_interface,
315
  inputs=[
316
- gr.Textbox(label="Company Name"),
317
  gr.Checkbox(label="Use PDF"),
318
  gr.File(label="Upload PDF"),
319
- gr.Slider(minimum=4, maximum=40, step=4, value=20, label="Number of Results (total for all queries)"),
320
- gr.Textbox(label="Earnings Instructions", lines=2, placeholder="Instructions for recent earnings query..."),
321
- gr.Textbox(label="News Instructions", lines=2, placeholder="Instructions for recent news query..."),
322
- gr.Textbox(label="Credit Rating Instructions", lines=2, placeholder="Instructions for credit rating query..."),
323
- gr.Textbox(label="Conference Call Instructions", lines=2, placeholder="Instructions for conference call transcript query..."),
324
- gr.Textbox(label="Final Summary Instructions", lines=2, placeholder="Instructions for the final summary..."),
325
- gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature"),
326
- gr.Slider(minimum=1.0, maximum=2.0, value=1.0, label="Repetition Penalty"),
327
- gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top p")
328
  ],
329
- outputs=["text", "file"],
330
  title="Financial Analyst AI Assistant",
331
- description="Enter a company name and provide specific instructions for each query. The AI will use these instructions to gather and summarize information on recent earnings, news, credit ratings, and conference call transcripts.",
332
- )
 
42
  start = 0
43
  all_results = []
44
  max_chars_per_page = 8000 # Limit the number of characters from each webpage to stay under the token limit
45
+
46
  with requests.Session() as session:
47
  while start < num_results:
48
  print(f"Fetching search results starting from: {start}")
 
53
  'User-Agent': user_agent
54
  }
55
  print(f"Using User-Agent: {headers['User-Agent']}")
56
+
57
  resp = session.get(
58
  url="https://www.google.com/search",
59
  headers=headers,
 
71
  except requests.exceptions.RequestException as e:
72
  print(f"Error fetching search results: {e}")
73
  break
74
+
75
  soup = BeautifulSoup(resp.text, "html.parser")
76
  result_block = soup.find_all("div", attrs={"class": "g"})
77
  if not result_block:
 
106
  link = result["link"]
107
  text = result["text"]
108
  if link:
109
+ formatted_results += f"URL: {link}\nContent: {text}\n{'-'80}\n"
110
  else:
111
+ formatted_results += "No link found.\n" + '-'80 + '\n'
112
 
113
  prompt = f"{instructions}User Query: {query}\n\nWeb Search Results:\n{formatted_results}\n\nAssistant:"
114
  return prompt
 
232
  doc.save(output_path) # Save the PDF to the specified path
233
  print("PDF saved successfully.")
234
 
 
 
 
 
 
 
 
 
 
235
  # Integrated function to perform web scraping, formatting, and text generation
236
+ def scrape_and_display(query, num_results, instructions, web_search=True, temperature=0.7, repetition_penalty=1.0, top_p=0.9):
 
 
237
  print(f"Scraping and displaying results for query: {query} with num_results: {num_results}")
 
238
  if web_search:
239
+ search_results = google_search(query, num_results)
240
+ formatted_prompt = format_prompt(query, search_results, instructions)
241
+ generated_summary = generate_text(formatted_prompt, temperature=temperature, repetition_penalty=repetition_penalty, top_p=top_p)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  else:
243
+ formatted_prompt = format_prompt_with_instructions(query, instructions)
244
  generated_summary = generate_text(formatted_prompt, temperature=temperature, repetition_penalty=repetition_penalty, top_p=top_p)
 
245
  print("Scraping and display complete.")
246
  if generated_summary:
247
+ # Extract and return text starting from "Assistant:"
248
  assistant_index = generated_summary.find("Assistant:")
249
  if assistant_index != -1:
250
  generated_summary = generated_summary[assistant_index:]
251
  else:
252
  generated_summary = "Assistant: No response generated."
253
+ print(f"Generated summary: {generated_summary}") # Debugging line
254
  return generated_summary
255
 
 
256
  # Main Gradio interface function
257
+ def gradio_interface(query, use_pdf, pdf, num_results, instructions, temperature, repetition_penalty, top_p):
 
 
258
  if use_pdf and pdf is not None:
259
  pdf_text = read_pdf(pdf)
260
+ generated_summary = scrape_and_display(pdf_text, num_results=0, instructions=instructions, web_search=False, temperature=temperature, repetition_penalty=repetition_penalty, top_p=top_p)
 
 
261
  else:
262
+ generated_summary = scrape_and_display(query, num_results=num_results, instructions=instructions, web_search=True, temperature=temperature, repetition_penalty=repetition_penalty, top_p=top_p)
263
+
264
+ # Save the generated summary to a PDF
 
 
 
 
 
 
265
  output_pdf_path = "output_summary.pdf"
266
  save_text_to_pdf(generated_summary, output_pdf_path)
267
+
268
  return generated_summary, output_pdf_path
269
 
270
+ # Deploy Gradio Interface
271
  gr.Interface(
272
  fn=gradio_interface,
273
  inputs=[
274
+ gr.Textbox(label="Query"),
275
  gr.Checkbox(label="Use PDF"),
276
  gr.File(label="Upload PDF"),
277
+ gr.Slider(minimum=1, maximum=20, label="Number of Results"), # Added Slider for num_results
278
+ gr.Textbox(label="Instructions"),
279
+ gr.Slider(minimum=0.1, maximum=1.0, label="Temperature"),
280
+ gr.Slider(minimum=0.1, maximum=1.0, label="Repetition Penalty"),
281
+ gr.Slider(minimum=0.1, maximum=1.0, label="Top p")
 
 
 
 
282
  ],
283
+ outputs=["text", "file"], # Updated to return text and a file
284
  title="Financial Analyst AI Assistant",
285
+ description="Enter your query about a company's financials to get valuable insights. Optionally, upload a PDF for analysis.Please instruct me for curating your output template, also for web search you can modify my search results but its advisable to restrict the same at 10. You can also adjust my parameters like Temperature, Repetition Penalty and Top_P, its adivsable to set repetition penalty at 1 and other two parameters at 0.1.",
286
+ ).launch(share=True)