Shreyas094
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -30,6 +30,7 @@ from newspaper import Article
|
|
30 |
import PyPDF2
|
31 |
import io
|
32 |
import requests
|
|
|
33 |
|
34 |
# Load environment variables from a .env file
|
35 |
load_dotenv()
|
@@ -53,6 +54,43 @@ client = InferenceClient(
|
|
53 |
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
|
54 |
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
# Set up a session with retry mechanism
|
57 |
def requests_retry_session(
|
58 |
retries=0,
|
@@ -438,7 +476,8 @@ Your response should be detailed, informative, accurate, and directly relevant t
|
|
438 |
|
439 |
def search_and_scrape(query, chat_history, num_results=5, scraper="bs4", max_chars=3000, time_range="", language="all", category="",
|
440 |
engines=[], safesearch=2, method="GET", llm_temperature=0.2, timeout=5):
|
441 |
-
|
|
|
442 |
# Step 1: Rephrase the Query
|
443 |
rephrased_query = rephrase_query(chat_history, query, temperature=llm_temperature)
|
444 |
logger.info(f"Rephrased Query: {rephrased_query}")
|
@@ -447,6 +486,10 @@ def search_and_scrape(query, chat_history, num_results=5, scraper="bs4", max_cha
|
|
447 |
logger.info("No need to perform search based on the rephrased query.")
|
448 |
return "No search needed for the provided input."
|
449 |
|
|
|
|
|
|
|
|
|
450 |
# Search query parameters
|
451 |
params = {
|
452 |
'q': rephrased_query,
|
@@ -612,7 +655,7 @@ def search_and_scrape(query, chat_history, num_results=5, scraper="bs4", max_cha
|
|
612 |
return f"An unexpected error occurred during the search and scrape process: {e}"
|
613 |
|
614 |
|
615 |
-
def chat_function(message, history, num_results, scraper, max_chars, time_range, language, category, engines, safesearch, method, llm_temperature):
|
616 |
chat_history = "\n".join([f"{role}: {msg}" for role, msg in history])
|
617 |
|
618 |
response = search_and_scrape(
|
@@ -627,14 +670,15 @@ def chat_function(message, history, num_results, scraper, max_chars, time_range,
|
|
627 |
engines=engines,
|
628 |
safesearch=safesearch,
|
629 |
method=method,
|
630 |
-
llm_temperature=llm_temperature
|
|
|
631 |
)
|
632 |
|
633 |
yield response
|
634 |
|
635 |
iface = gr.ChatInterface(
|
636 |
chat_function,
|
637 |
-
title="
|
638 |
description="Enter your query, and I'll search the web for the most recent and relevant financial news, scrape content, and provide summarized results.",
|
639 |
theme=gr.Theme.from_hub("allenai/gradio-theme"),
|
640 |
additional_inputs=[
|
@@ -653,6 +697,7 @@ iface = gr.ChatInterface(
|
|
653 |
gr.Slider(0, 2, value=2, step=1, label="Safe Search Level"),
|
654 |
gr.Radio(["GET", "POST"], value="POST", label="HTTP Method"),
|
655 |
gr.Slider(0, 1, value=0.2, step=0.1, label="LLM Temperature"),
|
|
|
656 |
],
|
657 |
additional_inputs_accordion=gr.Accordion("⚙️ Advanced Parameters", open=True),
|
658 |
retry_btn="Retry",
|
|
|
30 |
import PyPDF2
|
31 |
import io
|
32 |
import requests
|
33 |
+
from duckduckgo_search import DDGS
|
34 |
|
35 |
# Load environment variables from a .env file
|
36 |
load_dotenv()
|
|
|
54 |
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
|
55 |
|
56 |
|
57 |
+
def duckduckgo_search(query, num_results=10, time_range="", language="", safesearch=2):
|
58 |
+
try:
|
59 |
+
ddgs = DDGS()
|
60 |
+
|
61 |
+
# Convert time_range to DuckDuckGo format
|
62 |
+
if time_range == "day":
|
63 |
+
timelimit = "d"
|
64 |
+
elif time_range == "week":
|
65 |
+
timelimit = "w"
|
66 |
+
elif time_range == "month":
|
67 |
+
timelimit = "m"
|
68 |
+
elif time_range == "year":
|
69 |
+
timelimit = "y"
|
70 |
+
else:
|
71 |
+
timelimit = None
|
72 |
+
|
73 |
+
# Convert safesearch to DuckDuckGo format
|
74 |
+
if safesearch == 0:
|
75 |
+
safesearch_setting = "off"
|
76 |
+
elif safesearch == 1:
|
77 |
+
safesearch_setting = "moderate"
|
78 |
+
else:
|
79 |
+
safesearch_setting = "strict"
|
80 |
+
|
81 |
+
results = ddgs.text(
|
82 |
+
query,
|
83 |
+
region='wt-wt',
|
84 |
+
safesearch=safesearch_setting,
|
85 |
+
timelimit=timelimit,
|
86 |
+
max_results=num_results
|
87 |
+
)
|
88 |
+
|
89 |
+
return [{"url": result["href"], "title": result["title"]} for result in results]
|
90 |
+
except Exception as e:
|
91 |
+
logger.error(f"Error in DuckDuckGo search: {e}")
|
92 |
+
return []
|
93 |
+
|
94 |
# Set up a session with retry mechanism
|
95 |
def requests_retry_session(
|
96 |
retries=0,
|
|
|
476 |
|
477 |
def search_and_scrape(query, chat_history, num_results=5, scraper="bs4", max_chars=3000, time_range="", language="all", category="",
|
478 |
engines=[], safesearch=2, method="GET", llm_temperature=0.2, timeout=5):
|
479 |
+
|
480 |
+
try:
|
481 |
# Step 1: Rephrase the Query
|
482 |
rephrased_query = rephrase_query(chat_history, query, temperature=llm_temperature)
|
483 |
logger.info(f"Rephrased Query: {rephrased_query}")
|
|
|
486 |
logger.info("No need to perform search based on the rephrased query.")
|
487 |
return "No search needed for the provided input."
|
488 |
|
489 |
+
# Step 2: Perform search
|
490 |
+
if use_duckduckgo:
|
491 |
+
search_results = duckduckgo_search(rephrased_query, num_results, time_range, language, safesearch)
|
492 |
+
else:
|
493 |
# Search query parameters
|
494 |
params = {
|
495 |
'q': rephrased_query,
|
|
|
655 |
return f"An unexpected error occurred during the search and scrape process: {e}"
|
656 |
|
657 |
|
658 |
+
def chat_function(message, history, num_results, scraper, max_chars, time_range, language, category, engines, safesearch, method, llm_temperature, use_duckduckgo):
|
659 |
chat_history = "\n".join([f"{role}: {msg}" for role, msg in history])
|
660 |
|
661 |
response = search_and_scrape(
|
|
|
670 |
engines=engines,
|
671 |
safesearch=safesearch,
|
672 |
method=method,
|
673 |
+
llm_temperature=llm_temperature,
|
674 |
+
use_duckduckgo=use_duckduckgo
|
675 |
)
|
676 |
|
677 |
yield response
|
678 |
|
679 |
iface = gr.ChatInterface(
|
680 |
chat_function,
|
681 |
+
title="Web Scraper for Financial News",
|
682 |
description="Enter your query, and I'll search the web for the most recent and relevant financial news, scrape content, and provide summarized results.",
|
683 |
theme=gr.Theme.from_hub("allenai/gradio-theme"),
|
684 |
additional_inputs=[
|
|
|
697 |
gr.Slider(0, 2, value=2, step=1, label="Safe Search Level"),
|
698 |
gr.Radio(["GET", "POST"], value="POST", label="HTTP Method"),
|
699 |
gr.Slider(0, 1, value=0.2, step=0.1, label="LLM Temperature"),
|
700 |
+
gr.Checkbox(label="Use DuckDuckGo Search", value=False),
|
701 |
],
|
702 |
additional_inputs_accordion=gr.Accordion("⚙️ Advanced Parameters", open=True),
|
703 |
retry_btn="Retry",
|