Shreyas094 commited on
Commit
ecb4d0c
·
verified ·
1 Parent(s): 0f41ad8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -6
app.py CHANGED
@@ -7,6 +7,7 @@ import urllib.parse
7
  from datetime import datetime, timedelta
8
  import re
9
  import os
 
10
  # List of user agents to rotate through
11
  _useragent_list = [
12
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
@@ -138,6 +139,44 @@ def filter_relevant_content(text):
138
 
139
  return filtered_text
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  def summarize_financial_news(query):
142
  """Search for financial news, extract relevant content, and summarize"""
143
  search_results = google_search(query, num_results=3)
@@ -151,15 +190,29 @@ def summarize_financial_news(query):
151
  if not all_filtered_text:
152
  return "No relevant financial information found."
153
 
154
- prompt = f"""You are a financial analyst. Summarize the following text from a financial perspective:
155
-
156
- {all_filtered_text}
157
-
 
 
 
158
  Provide a detailed, coherent summary focusing on financial implications and analysis."""
159
 
160
- summary = query_llama({"inputs": prompt, "parameters": {"max_length": 5000}})
 
 
 
 
 
 
 
 
 
 
 
161
 
162
- return summary[0]['generated_text']
163
 
164
  # Gradio Interface
165
  iface = gr.Interface(
 
7
  from datetime import datetime, timedelta
8
  import re
9
  import os
10
+
11
  # List of user agents to rotate through
12
  _useragent_list = [
13
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
 
139
 
140
  return filtered_text
141
 
142
+ def chunk_text(text, max_chunk_size=1000, overlap=100):
143
+ # List of keywords that might indicate new sections
144
+ section_keywords = ["revenue", "income", "profit", "loss", "expenses", "outlook", "forecast", "quarter", "year"]
145
+
146
+ # Split text into sentences
147
+ sentences = re.split(r'(?<=[.!?])\s+', text)
148
+
149
+ chunks = []
150
+ current_chunk = ""
151
+
152
+ for sentence in sentences:
153
+ if len(current_chunk) + len(sentence) > max_chunk_size:
154
+ # If adding this sentence exceeds max_chunk_size, start a new chunk
155
+ chunks.append(current_chunk.strip())
156
+ current_chunk = sentence + " "
157
+ elif any(keyword in sentence.lower() for keyword in section_keywords):
158
+ # If sentence contains a section keyword, start a new chunk
159
+ if current_chunk:
160
+ chunks.append(current_chunk.strip())
161
+ current_chunk = sentence + " "
162
+ else:
163
+ current_chunk += sentence + " "
164
+
165
+ # Add the last chunk if it's not empty
166
+ if current_chunk:
167
+ chunks.append(current_chunk.strip())
168
+
169
+ # Add overlap
170
+ overlapped_chunks = []
171
+ for i, chunk in enumerate(chunks):
172
+ if i > 0:
173
+ chunk = chunks[i-1][-overlap:] + chunk
174
+ if i < len(chunks) - 1:
175
+ chunk = chunk + chunks[i+1][:overlap]
176
+ overlapped_chunks.append(chunk)
177
+
178
+ return overlapped_chunks
179
+
180
  def summarize_financial_news(query):
181
  """Search for financial news, extract relevant content, and summarize"""
182
  search_results = google_search(query, num_results=3)
 
190
  if not all_filtered_text:
191
  return "No relevant financial information found."
192
 
193
+ # Chunk the filtered text
194
+ chunks = chunk_text(all_filtered_text, max_chunk_size=3000, overlap=200)
195
+
196
+ summaries = []
197
+ for chunk in chunks:
198
+ prompt = f"""You are a financial analyst. Summarize the following text from a financial perspective:
199
+ {chunk}
200
  Provide a detailed, coherent summary focusing on financial implications and analysis."""
201
 
202
+ summary = query_llama({"inputs": prompt, "parameters": {"max_length": 1000}})
203
+ summaries.append(summary[0]['generated_text'])
204
+
205
+ # Combine summaries
206
+ combined_summary = "\n\n".join(summaries)
207
+
208
+ # Final summarization of combined summaries
209
+ final_prompt = f"""As a financial analyst, provide a coherent and comprehensive summary of the following financial information:
210
+ {combined_summary}
211
+ Focus on the most important financial implications and analysis."""
212
+
213
+ final_summary = query_llama({"inputs": final_prompt, "parameters": {"max_length": 2000}})
214
 
215
+ return final_summary[0]['generated_text']
216
 
217
  # Gradio Interface
218
  iface = gr.Interface(