Shreyas094 commited on
Commit
5239e89
·
verified ·
1 Parent(s): 3a81b27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -4
app.py CHANGED
@@ -30,6 +30,7 @@ from newspaper import Article
30
  import PyPDF2
31
  import io
32
  import requests
 
33
 
34
  # Load environment variables from a .env file
35
  load_dotenv()
@@ -53,6 +54,43 @@ client = InferenceClient(
53
  similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
54
 
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  # Set up a session with retry mechanism
57
  def requests_retry_session(
58
  retries=0,
@@ -438,7 +476,8 @@ Your response should be detailed, informative, accurate, and directly relevant t
438
 
439
  def search_and_scrape(query, chat_history, num_results=5, scraper="bs4", max_chars=3000, time_range="", language="all", category="",
440
  engines=[], safesearch=2, method="GET", llm_temperature=0.2, timeout=5):
441
- try:
 
442
  # Step 1: Rephrase the Query
443
  rephrased_query = rephrase_query(chat_history, query, temperature=llm_temperature)
444
  logger.info(f"Rephrased Query: {rephrased_query}")
@@ -447,6 +486,10 @@ def search_and_scrape(query, chat_history, num_results=5, scraper="bs4", max_cha
447
  logger.info("No need to perform search based on the rephrased query.")
448
  return "No search needed for the provided input."
449
 
 
 
 
 
450
  # Search query parameters
451
  params = {
452
  'q': rephrased_query,
@@ -612,7 +655,7 @@ def search_and_scrape(query, chat_history, num_results=5, scraper="bs4", max_cha
612
  return f"An unexpected error occurred during the search and scrape process: {e}"
613
 
614
 
615
- def chat_function(message, history, num_results, scraper, max_chars, time_range, language, category, engines, safesearch, method, llm_temperature):
616
  chat_history = "\n".join([f"{role}: {msg}" for role, msg in history])
617
 
618
  response = search_and_scrape(
@@ -627,14 +670,15 @@ def chat_function(message, history, num_results, scraper, max_chars, time_range,
627
  engines=engines,
628
  safesearch=safesearch,
629
  method=method,
630
- llm_temperature=llm_temperature
 
631
  )
632
 
633
  yield response
634
 
635
  iface = gr.ChatInterface(
636
  chat_function,
637
- title="SearXNG Scraper for Financial News",
638
  description="Enter your query, and I'll search the web for the most recent and relevant financial news, scrape content, and provide summarized results.",
639
  theme=gr.Theme.from_hub("allenai/gradio-theme"),
640
  additional_inputs=[
@@ -653,6 +697,7 @@ iface = gr.ChatInterface(
653
  gr.Slider(0, 2, value=2, step=1, label="Safe Search Level"),
654
  gr.Radio(["GET", "POST"], value="POST", label="HTTP Method"),
655
  gr.Slider(0, 1, value=0.2, step=0.1, label="LLM Temperature"),
 
656
  ],
657
  additional_inputs_accordion=gr.Accordion("⚙️ Advanced Parameters", open=True),
658
  retry_btn="Retry",
 
30
  import PyPDF2
31
  import io
32
  import requests
33
+ from duckduckgo_search import DDGS
34
 
35
  # Load environment variables from a .env file
36
  load_dotenv()
 
54
  similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
55
 
56
 
57
+ def duckduckgo_search(query, num_results=10, time_range="", language="", safesearch=2):
58
+ try:
59
+ ddgs = DDGS()
60
+
61
+ # Convert time_range to DuckDuckGo format
62
+ if time_range == "day":
63
+ timelimit = "d"
64
+ elif time_range == "week":
65
+ timelimit = "w"
66
+ elif time_range == "month":
67
+ timelimit = "m"
68
+ elif time_range == "year":
69
+ timelimit = "y"
70
+ else:
71
+ timelimit = None
72
+
73
+ # Convert safesearch to DuckDuckGo format
74
+ if safesearch == 0:
75
+ safesearch_setting = "off"
76
+ elif safesearch == 1:
77
+ safesearch_setting = "moderate"
78
+ else:
79
+ safesearch_setting = "strict"
80
+
81
+ results = ddgs.text(
82
+ query,
83
+ region='wt-wt',
84
+ safesearch=safesearch_setting,
85
+ timelimit=timelimit,
86
+ max_results=num_results
87
+ )
88
+
89
+ return [{"url": result["href"], "title": result["title"]} for result in results]
90
+ except Exception as e:
91
+ logger.error(f"Error in DuckDuckGo search: {e}")
92
+ return []
93
+
94
  # Set up a session with retry mechanism
95
  def requests_retry_session(
96
  retries=0,
 
476
 
477
  def search_and_scrape(query, chat_history, num_results=5, scraper="bs4", max_chars=3000, time_range="", language="all", category="",
478
  engines=[], safesearch=2, method="GET", llm_temperature=0.2, timeout=5):
479
+
480
+ try:
481
  # Step 1: Rephrase the Query
482
  rephrased_query = rephrase_query(chat_history, query, temperature=llm_temperature)
483
  logger.info(f"Rephrased Query: {rephrased_query}")
 
486
  logger.info("No need to perform search based on the rephrased query.")
487
  return "No search needed for the provided input."
488
 
489
+ # Step 2: Perform search
490
+ if use_duckduckgo:
491
+ search_results = duckduckgo_search(rephrased_query, num_results, time_range, language, safesearch)
492
+ else:
493
  # Search query parameters
494
  params = {
495
  'q': rephrased_query,
 
655
  return f"An unexpected error occurred during the search and scrape process: {e}"
656
 
657
 
658
+ def chat_function(message, history, num_results, scraper, max_chars, time_range, language, category, engines, safesearch, method, llm_temperature, use_duckduckgo):
659
  chat_history = "\n".join([f"{role}: {msg}" for role, msg in history])
660
 
661
  response = search_and_scrape(
 
670
  engines=engines,
671
  safesearch=safesearch,
672
  method=method,
673
+ llm_temperature=llm_temperature,
674
+ use_duckduckgo=use_duckduckgo
675
  )
676
 
677
  yield response
678
 
679
  iface = gr.ChatInterface(
680
  chat_function,
681
+ title="Web Scraper for Financial News",
682
  description="Enter your query, and I'll search the web for the most recent and relevant financial news, scrape content, and provide summarized results.",
683
  theme=gr.Theme.from_hub("allenai/gradio-theme"),
684
  additional_inputs=[
 
697
  gr.Slider(0, 2, value=2, step=1, label="Safe Search Level"),
698
  gr.Radio(["GET", "POST"], value="POST", label="HTTP Method"),
699
  gr.Slider(0, 1, value=0.2, step=0.1, label="LLM Temperature"),
700
+ gr.Checkbox(label="Use DuckDuckGo Search", value=False),
701
  ],
702
  additional_inputs_accordion=gr.Accordion("⚙️ Advanced Parameters", open=True),
703
  retry_btn="Retry",