Shreyas094
commited on
Commit
•
69ddd17
1
Parent(s):
90e8b4f
Update app.py
Browse files
app.py
CHANGED
@@ -22,6 +22,8 @@ from dotenv import load_dotenv
|
|
22 |
import certifi
|
23 |
from bs4 import BeautifulSoup
|
24 |
import requests
|
|
|
|
|
25 |
|
26 |
# Load environment variables from a .env file
|
27 |
load_dotenv()
|
@@ -253,7 +255,7 @@ Remember to focus on financial aspects and implications in your assessment and s
|
|
253 |
logger.error(f"Error assessing relevance and summarizing with LLM: {e}")
|
254 |
return "Error: Unable to assess relevance and summarize"
|
255 |
|
256 |
-
def scrape_full_content(url, scraper="
|
257 |
try:
|
258 |
logger.info(f"Scraping full content from: {url}")
|
259 |
|
@@ -319,12 +321,7 @@ Your response should be detailed, informative, accurate, and directly relevant t
|
|
319 |
logger.error(f"Error in LLM summarization: {e}")
|
320 |
return "Error: Unable to generate a summary. Please try again."
|
321 |
|
322 |
-
|
323 |
-
from trafilatura import extract
|
324 |
-
from trafilatura.settings import use_config
|
325 |
-
from urllib.request import urlopen, Request
|
326 |
-
|
327 |
-
def search_and_scrape(query, chat_history, num_results=5, scraper="trafilatura", max_chars=3000, time_range="", language="all", category="",
|
328 |
engines=[], safesearch=2, method="GET", llm_temperature=0.2, timeout=5):
|
329 |
try:
|
330 |
# Step 1: Rephrase the Query
|
@@ -561,7 +558,7 @@ iface = gr.ChatInterface(
|
|
561 |
description="Enter your query, and I'll search the web for the most recent and relevant financial news, scrape content, and provide summarized results.",
|
562 |
additional_inputs=[
|
563 |
gr.Slider(5, 20, value=10, step=1, label="Number of initial results"),
|
564 |
-
gr.Dropdown(["bs4", "trafilatura"], value="
|
565 |
gr.Slider(500, 10000, value=1500, step=100, label="Max characters to retrieve"),
|
566 |
gr.Dropdown(["", "day", "week", "month", "year"], value="year", label="Time Range"),
|
567 |
gr.Dropdown(["all", "en", "fr", "de", "es", "it", "nl", "pt", "pl", "ru", "zh"], value="en", label="Language"),
|
|
|
22 |
import certifi
|
23 |
from bs4 import BeautifulSoup
|
24 |
import requests
|
25 |
+
from trafilatura.settings import use_config
|
26 |
+
from urllib.request import urlopen, Request
|
27 |
|
28 |
# Load environment variables from a .env file
|
29 |
load_dotenv()
|
|
|
255 |
logger.error(f"Error assessing relevance and summarizing with LLM: {e}")
|
256 |
return "Error: Unable to assess relevance and summarize"
|
257 |
|
258 |
+
def scrape_full_content(url, scraper="bs4", max_chars=3000, timeout=5):
|
259 |
try:
|
260 |
logger.info(f"Scraping full content from: {url}")
|
261 |
|
|
|
321 |
logger.error(f"Error in LLM summarization: {e}")
|
322 |
return "Error: Unable to generate a summary. Please try again."
|
323 |
|
324 |
+
def search_and_scrape(query, chat_history, num_results=5, scraper="bs4", max_chars=3000, time_range="", language="all", category="",
|
|
|
|
|
|
|
|
|
|
|
325 |
engines=[], safesearch=2, method="GET", llm_temperature=0.2, timeout=5):
|
326 |
try:
|
327 |
# Step 1: Rephrase the Query
|
|
|
558 |
description="Enter your query, and I'll search the web for the most recent and relevant financial news, scrape content, and provide summarized results.",
|
559 |
additional_inputs=[
|
560 |
gr.Slider(5, 20, value=10, step=1, label="Number of initial results"),
|
561 |
+
gr.Dropdown(["bs4", "trafilatura"], value="bs4", label="Scraping Method"),
|
562 |
gr.Slider(500, 10000, value=1500, step=100, label="Max characters to retrieve"),
|
563 |
gr.Dropdown(["", "day", "week", "month", "year"], value="year", label="Time Range"),
|
564 |
gr.Dropdown(["all", "en", "fr", "de", "es", "it", "nl", "pt", "pl", "ru", "zh"], value="en", label="Language"),
|