Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ import urllib.parse
|
|
7 |
from datetime import datetime, timedelta
|
8 |
import re
|
9 |
import os
|
|
|
10 |
# List of user agents to rotate through
|
11 |
_useragent_list = [
|
12 |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
@@ -138,6 +139,44 @@ def filter_relevant_content(text):
|
|
138 |
|
139 |
return filtered_text
|
140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
def summarize_financial_news(query):
|
142 |
"""Search for financial news, extract relevant content, and summarize"""
|
143 |
search_results = google_search(query, num_results=3)
|
@@ -151,15 +190,29 @@ def summarize_financial_news(query):
|
|
151 |
if not all_filtered_text:
|
152 |
return "No relevant financial information found."
|
153 |
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
|
|
|
|
|
|
158 |
Provide a detailed, coherent summary focusing on financial implications and analysis."""
|
159 |
|
160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
-
return
|
163 |
|
164 |
# Gradio Interface
|
165 |
iface = gr.Interface(
|
|
|
7 |
from datetime import datetime, timedelta
|
8 |
import re
|
9 |
import os
|
10 |
+
|
11 |
# List of user agents to rotate through
|
12 |
_useragent_list = [
|
13 |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
|
139 |
|
140 |
return filtered_text
|
141 |
|
142 |
+
def chunk_text(text, max_chunk_size=1000, overlap=100):
|
143 |
+
# List of keywords that might indicate new sections
|
144 |
+
section_keywords = ["revenue", "income", "profit", "loss", "expenses", "outlook", "forecast", "quarter", "year"]
|
145 |
+
|
146 |
+
# Split text into sentences
|
147 |
+
sentences = re.split(r'(?<=[.!?])\s+', text)
|
148 |
+
|
149 |
+
chunks = []
|
150 |
+
current_chunk = ""
|
151 |
+
|
152 |
+
for sentence in sentences:
|
153 |
+
if len(current_chunk) + len(sentence) > max_chunk_size:
|
154 |
+
# If adding this sentence exceeds max_chunk_size, start a new chunk
|
155 |
+
chunks.append(current_chunk.strip())
|
156 |
+
current_chunk = sentence + " "
|
157 |
+
elif any(keyword in sentence.lower() for keyword in section_keywords):
|
158 |
+
# If sentence contains a section keyword, start a new chunk
|
159 |
+
if current_chunk:
|
160 |
+
chunks.append(current_chunk.strip())
|
161 |
+
current_chunk = sentence + " "
|
162 |
+
else:
|
163 |
+
current_chunk += sentence + " "
|
164 |
+
|
165 |
+
# Add the last chunk if it's not empty
|
166 |
+
if current_chunk:
|
167 |
+
chunks.append(current_chunk.strip())
|
168 |
+
|
169 |
+
# Add overlap
|
170 |
+
overlapped_chunks = []
|
171 |
+
for i, chunk in enumerate(chunks):
|
172 |
+
if i > 0:
|
173 |
+
chunk = chunks[i-1][-overlap:] + chunk
|
174 |
+
if i < len(chunks) - 1:
|
175 |
+
chunk = chunk + chunks[i+1][:overlap]
|
176 |
+
overlapped_chunks.append(chunk)
|
177 |
+
|
178 |
+
return overlapped_chunks
|
179 |
+
|
180 |
def summarize_financial_news(query):
|
181 |
"""Search for financial news, extract relevant content, and summarize"""
|
182 |
search_results = google_search(query, num_results=3)
|
|
|
190 |
if not all_filtered_text:
|
191 |
return "No relevant financial information found."
|
192 |
|
193 |
+
# Chunk the filtered text
|
194 |
+
chunks = chunk_text(all_filtered_text, max_chunk_size=3000, overlap=200)
|
195 |
+
|
196 |
+
summaries = []
|
197 |
+
for chunk in chunks:
|
198 |
+
prompt = f"""You are a financial analyst. Summarize the following text from a financial perspective:
|
199 |
+
{chunk}
|
200 |
Provide a detailed, coherent summary focusing on financial implications and analysis."""
|
201 |
|
202 |
+
summary = query_llama({"inputs": prompt, "parameters": {"max_length": 1000}})
|
203 |
+
summaries.append(summary[0]['generated_text'])
|
204 |
+
|
205 |
+
# Combine summaries
|
206 |
+
combined_summary = "\n\n".join(summaries)
|
207 |
+
|
208 |
+
# Final summarization of combined summaries
|
209 |
+
final_prompt = f"""As a financial analyst, provide a coherent and comprehensive summary of the following financial information:
|
210 |
+
{combined_summary}
|
211 |
+
Focus on the most important financial implications and analysis."""
|
212 |
+
|
213 |
+
final_summary = query_llama({"inputs": final_prompt, "parameters": {"max_length": 2000}})
|
214 |
|
215 |
+
return final_summary[0]['generated_text']
|
216 |
|
217 |
# Gradio Interface
|
218 |
iface = gr.Interface(
|