Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,10 +3,8 @@ import gradio as gr
|
|
3 |
import pandas as pd
|
4 |
import torch
|
5 |
import logging
|
6 |
-
import gc
|
7 |
-
import threading
|
8 |
-
import concurrent.futures
|
9 |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
|
|
|
10 |
|
11 |
# Setup logging
|
12 |
logging.basicConfig(
|
@@ -25,452 +23,193 @@ def clear_gpu_memory():
|
|
25 |
torch.cuda.empty_cache()
|
26 |
gc.collect()
|
27 |
|
28 |
-
class ModelLoadingError(Exception):
|
29 |
-
"""Custom exception for model loading errors"""
|
30 |
-
pass
|
31 |
-
|
32 |
-
class ModelManager:
|
33 |
-
"""Handles model loading and inference"""
|
34 |
-
|
35 |
-
def __init__(self):
|
36 |
-
self.device = DEVICE
|
37 |
-
self.models = {}
|
38 |
-
self.tokenizers = {}
|
39 |
-
self.model_cache = {}
|
40 |
-
self.max_cache_size = 2
|
41 |
-
|
42 |
-
def load_model(self, model_name, model_type="sentiment", timeout=300):
|
43 |
-
"""Load model and tokenizer with thread-safe timeout"""
|
44 |
-
try:
|
45 |
-
if model_name in self.model_cache:
|
46 |
-
self.models[model_name] = self.model_cache[model_name]
|
47 |
-
logger.info(f"Loaded {model_name} from cache")
|
48 |
-
return
|
49 |
-
|
50 |
-
def load_model_task():
|
51 |
-
if model_type == "sentiment":
|
52 |
-
self.tokenizers[model_name] = AutoTokenizer.from_pretrained(
|
53 |
-
model_name,
|
54 |
-
use_fast=True
|
55 |
-
)
|
56 |
-
self.models[model_name] = AutoModelForSequenceClassification.from_pretrained(
|
57 |
-
model_name,
|
58 |
-
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
|
59 |
-
).to(self.device)
|
60 |
-
else:
|
61 |
-
self.models[model_name] = pipeline(
|
62 |
-
"text-generation",
|
63 |
-
model=model_name,
|
64 |
-
device_map="auto" if self.device == "cuda" else None,
|
65 |
-
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
|
66 |
-
)
|
67 |
-
|
68 |
-
# Use ThreadPoolExecutor for timeout
|
69 |
-
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
70 |
-
future = executor.submit(load_model_task)
|
71 |
-
try:
|
72 |
-
future.result(timeout=timeout)
|
73 |
-
except concurrent.futures.TimeoutError:
|
74 |
-
raise ModelLoadingError(f"Model loading timed out after {timeout} seconds")
|
75 |
-
|
76 |
-
# Cache the model
|
77 |
-
self.cache_model(model_name, self.models[model_name])
|
78 |
-
logger.info(f"Successfully loaded model: {model_name}")
|
79 |
-
|
80 |
-
except Exception as e:
|
81 |
-
logger.error(f"Error loading model {model_name}: {str(e)}")
|
82 |
-
raise ModelLoadingError(f"Failed to load model {model_name}: {str(e)}")
|
83 |
-
|
84 |
-
def cache_model(self, model_name, model):
|
85 |
-
"""Cache model for faster reloading"""
|
86 |
-
if len(self.model_cache) >= self.max_cache_size:
|
87 |
-
oldest_model = next(iter(self.model_cache))
|
88 |
-
del self.model_cache[oldest_model]
|
89 |
-
self.model_cache[model_name] = model
|
90 |
-
|
91 |
-
def unload_model(self, model_name):
|
92 |
-
"""Unload model and tokenizer"""
|
93 |
-
try:
|
94 |
-
if model_name in self.models:
|
95 |
-
del self.models[model_name]
|
96 |
-
if model_name in self.tokenizers:
|
97 |
-
del self.tokenizers[model_name]
|
98 |
-
clear_gpu_memory()
|
99 |
-
logger.info(f"Unloaded model: {model_name}")
|
100 |
-
except Exception as e:
|
101 |
-
logger.error(f"Error unloading model {model_name}: {str(e)}")
|
102 |
-
|
103 |
-
def get_model(self, model_name):
|
104 |
-
"""Get loaded model"""
|
105 |
-
return self.models.get(model_name)
|
106 |
-
|
107 |
-
def get_tokenizer(self, model_name):
|
108 |
-
"""Get loaded tokenizer"""
|
109 |
-
return self.tokenizers.get(model_name)
|
110 |
-
|
111 |
class FinancialAnalyzer:
|
112 |
-
"""
|
113 |
|
114 |
def __init__(self):
|
115 |
-
|
116 |
-
self.
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
# Load sentiment model at initialization with longer timeout
|
123 |
try:
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
except Exception as e:
|
127 |
-
logger.error(f"
|
128 |
raise
|
129 |
|
130 |
-
def
|
131 |
-
"""
|
132 |
try:
|
133 |
if file_obj is None:
|
134 |
raise ValueError("No file provided")
|
135 |
|
136 |
-
|
137 |
-
df = pd.read_csv(file_obj, encoding='utf-8', on_bad_lines='skip')
|
138 |
|
139 |
if df.empty:
|
140 |
raise ValueError("Empty CSV file")
|
141 |
-
|
142 |
-
#
|
143 |
-
logger.info(f"CSV Preview:\n{df.head()}")
|
144 |
-
logger.info(f"CSV Columns: {df.columns.tolist()}")
|
145 |
-
|
146 |
-
# Validate numeric columns
|
147 |
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
|
148 |
if len(numeric_cols) == 0:
|
149 |
raise ValueError("No numeric columns found in CSV")
|
150 |
-
|
151 |
-
#
|
152 |
summary = df[numeric_cols].describe()
|
153 |
-
logger.info(f"Statistical Summary:\n{summary}")
|
154 |
|
155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
|
157 |
except Exception as e:
|
158 |
-
logger.error(f"Error
|
159 |
raise
|
160 |
|
161 |
-
def
|
162 |
-
"""
|
163 |
-
try:
|
164 |
-
model_name = self.models["sentiment"]
|
165 |
-
model = self.model_manager.get_model(model_name)
|
166 |
-
tokenizer = self.model_manager.get_tokenizer(model_name)
|
167 |
-
|
168 |
-
# Validate input
|
169 |
-
if not text or not isinstance(text, str):
|
170 |
-
raise ValueError("Invalid input text")
|
171 |
-
|
172 |
-
# Preprocess text
|
173 |
-
text = text.strip()
|
174 |
-
if len(text) == 0:
|
175 |
-
raise ValueError("Empty text input")
|
176 |
-
|
177 |
-
# Truncate text if too long
|
178 |
-
max_length = 512
|
179 |
-
if len(text.split()) > max_length:
|
180 |
-
logger.warning(f"Text length exceeds {max_length} tokens. Truncating...")
|
181 |
-
|
182 |
-
# Tokenize with proper padding and truncation
|
183 |
-
inputs = tokenizer(
|
184 |
-
text,
|
185 |
-
return_tensors="pt",
|
186 |
-
truncation=True,
|
187 |
-
max_length=max_length,
|
188 |
-
padding=True
|
189 |
-
).to(DEVICE)
|
190 |
-
|
191 |
-
# Get prediction
|
192 |
-
with torch.no_grad():
|
193 |
-
outputs = model(**inputs)
|
194 |
-
probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
|
195 |
-
|
196 |
-
# Process results
|
197 |
-
labels = ['negative', 'neutral', 'positive']
|
198 |
-
scores = probabilities[0].cpu().tolist()
|
199 |
-
|
200 |
-
results = [
|
201 |
-
{'label': label, 'score': score}
|
202 |
-
for label, score in zip(labels, scores)
|
203 |
-
]
|
204 |
-
|
205 |
-
logger.info(f"Sentiment analysis results: {results}")
|
206 |
-
return [results]
|
207 |
-
|
208 |
-
except Exception as e:
|
209 |
-
logger.error(f"Sentiment analysis error: {str(e)}")
|
210 |
-
return [{"label": "error", "score": 1.0}]
|
211 |
-
|
212 |
-
def generate_analysis(self, financial_data):
|
213 |
-
"""Generate strategic analysis with improved prompting and error handling"""
|
214 |
-
try:
|
215 |
-
model_name = self.models["analysis"]
|
216 |
-
self.model_manager.load_model(model_name, "generation", timeout=600)
|
217 |
-
|
218 |
-
# Truncate financial data if too long
|
219 |
-
max_data_length = 1000
|
220 |
-
if len(financial_data.split()) > max_data_length:
|
221 |
-
logger.warning(f"Financial data too long. Truncating to {max_data_length} tokens...")
|
222 |
-
financial_data = ' '.join(financial_data.split()[:max_data_length])
|
223 |
-
|
224 |
-
prompt = f"""[INST] As a senior financial analyst, provide a detailed analysis of these financial statements:
|
225 |
-
|
226 |
-
Financial Data:
|
227 |
-
{financial_data}
|
228 |
-
|
229 |
-
Please provide a comprehensive analysis covering:
|
230 |
-
|
231 |
-
1. Business Health Assessment
|
232 |
-
- Current financial position
|
233 |
-
- Key performance indicators
|
234 |
-
- Trend analysis
|
235 |
-
|
236 |
-
2. Key Strategic Insights
|
237 |
-
- Major financial trends
|
238 |
-
- Performance drivers
|
239 |
-
- Areas of concern
|
240 |
-
|
241 |
-
3. Market Position
|
242 |
-
- Competitive advantages
|
243 |
-
- Market share indicators
|
244 |
-
- Industry comparison
|
245 |
-
|
246 |
-
4. Growth Opportunities
|
247 |
-
- Expansion potential
|
248 |
-
- Investment opportunities
|
249 |
-
- Revenue growth areas
|
250 |
-
|
251 |
-
5. Risk Factors
|
252 |
-
- Financial risks
|
253 |
-
- Operational risks
|
254 |
-
- Market risks
|
255 |
-
|
256 |
-
Provide specific metrics and detailed explanations for each section. [/INST]"""
|
257 |
-
|
258 |
-
logger.info("Generating analysis...")
|
259 |
-
response = self.model_manager.get_model(model_name)(
|
260 |
-
prompt,
|
261 |
-
max_length=2000,
|
262 |
-
min_length=800,
|
263 |
-
temperature=0.7,
|
264 |
-
do_sample=True,
|
265 |
-
num_return_sequences=1,
|
266 |
-
truncation=True,
|
267 |
-
repetition_penalty=1.2,
|
268 |
-
no_repeat_ngram_size=3
|
269 |
-
)
|
270 |
-
|
271 |
-
analysis_text = response[0]['generated_text']
|
272 |
-
return self.format_analysis_text(analysis_text)
|
273 |
-
|
274 |
-
except Exception as e:
|
275 |
-
logger.error(f"Analysis generation error: {str(e)}")
|
276 |
-
return "Error in analysis generation"
|
277 |
-
finally:
|
278 |
-
self.model_manager.unload_model(model_name)
|
279 |
-
|
280 |
-
def format_analysis_text(self, text):
|
281 |
-
"""Format the analysis text for better readability"""
|
282 |
try:
|
283 |
-
|
284 |
-
|
|
|
285 |
|
286 |
-
|
287 |
-
|
288 |
-
section = section.strip()
|
289 |
-
if not section:
|
290 |
-
continue
|
291 |
-
|
292 |
-
# Check if this is a new section
|
293 |
-
if any(section.startswith(str(i)) for i in range(1, 6)):
|
294 |
-
current_section = f"### {section}"
|
295 |
-
formatted_sections.append(current_section)
|
296 |
-
elif current_section:
|
297 |
-
# Add bullet points to content under sections
|
298 |
-
lines = section.split('\n')
|
299 |
-
formatted_lines = []
|
300 |
-
for line in lines:
|
301 |
-
line = line.strip()
|
302 |
-
if line:
|
303 |
-
if not line.startswith('- '):
|
304 |
-
line = f"- {line}"
|
305 |
-
formatted_lines.append(line)
|
306 |
-
formatted_sections.append('\n'.join(formatted_lines))
|
307 |
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
"""Generate recommendations with improved prompting and error handling"""
|
315 |
-
try:
|
316 |
-
model_name = self.models["recommendation"]
|
317 |
-
self.model_manager.load_model(model_name, "generation", timeout=600)
|
318 |
-
|
319 |
-
# Truncate analysis if too long
|
320 |
-
max_analysis_length = 1000
|
321 |
-
if len(analysis.split()) > max_analysis_length:
|
322 |
-
logger.warning(f"Analysis too long. Truncating to {max_analysis_length} tokens...")
|
323 |
-
analysis = ' '.join(analysis.split()[:max_analysis_length])
|
324 |
-
|
325 |
-
prompt = f"""Based on this financial analysis, provide detailed strategic recommendations:
|
326 |
-
|
327 |
-
Analysis Context:
|
328 |
-
{analysis}
|
329 |
-
|
330 |
-
Please provide specific, actionable recommendations for each area:
|
331 |
-
|
332 |
-
1. Strategic Initiatives
|
333 |
-
- Detail specific actions for business growth
|
334 |
-
- Identify market expansion opportunities
|
335 |
-
- Outline product/service development strategies
|
336 |
-
|
337 |
-
2. Operational Improvements
|
338 |
-
- Specify efficiency enhancement measures
|
339 |
-
- Recommend process optimization steps
|
340 |
-
- Suggest cost reduction strategies
|
341 |
-
|
342 |
-
3. Financial Management
|
343 |
-
- Provide cash flow optimization tactics
|
344 |
-
- Prioritize investment opportunities
|
345 |
-
- Detail risk management approaches
|
346 |
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
|
|
|
|
|
|
|
|
351 |
|
352 |
-
|
353 |
-
|
354 |
-
- Detail expansion plans
|
355 |
-
- Specify resource requirements
|
356 |
|
357 |
-
|
358 |
-
- Include implementation timeline
|
359 |
-
- Specify resource requirements
|
360 |
-
- Define success metrics
|
361 |
-
- List potential challenges
|
362 |
|
363 |
-
|
|
|
364 |
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
max_length=2000,
|
369 |
-
min_length=800,
|
370 |
temperature=0.7,
|
371 |
-
|
372 |
-
num_return_sequences=1,
|
373 |
-
truncation=True,
|
374 |
-
repetition_penalty=1.2,
|
375 |
-
no_repeat_ngram_size=3
|
376 |
)
|
377 |
|
378 |
-
|
379 |
-
return self.format_recommendation_text(recommendations_text)
|
380 |
|
381 |
except Exception as e:
|
382 |
-
logger.error(f"
|
383 |
-
return "Error generating
|
384 |
-
finally:
|
385 |
-
self.model_manager.unload_model(model_name)
|
386 |
|
387 |
-
def
|
388 |
-
"""Format the
|
389 |
try:
|
390 |
-
|
391 |
-
|
|
|
|
|
|
|
|
|
|
|
392 |
|
|
|
393 |
current_section = None
|
394 |
for section in sections:
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
|
|
|
|
|
|
414 |
|
415 |
-
return '\n\n'.join(formatted_sections)
|
416 |
except Exception as e:
|
417 |
-
logger.error(f"Error formatting
|
418 |
-
return
|
419 |
|
420 |
-
def
|
421 |
-
"""Main
|
422 |
try:
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
# Validate inputs
|
428 |
-
if not income_statement or not balance_sheet:
|
429 |
-
return "Error: Please provide both income statement and balance sheet files"
|
430 |
|
431 |
-
#
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
|
437 |
-
|
438 |
-
|
439 |
-
{income_summary.to_string()}
|
440 |
|
441 |
-
|
442 |
-
|
443 |
-
|
|
|
|
|
|
|
444 |
|
|
|
|
|
|
|
|
|
|
|
|
|
445 |
# Generate analysis
|
446 |
-
logger.info("
|
447 |
-
|
448 |
-
if "Error" in analysis:
|
449 |
-
logger.error("Strategic analysis generation failed")
|
450 |
-
return "Error: Failed to generate strategic analysis. Please try again."
|
451 |
-
|
452 |
-
# Analyze sentiment
|
453 |
-
logger.info("Starting sentiment analysis...")
|
454 |
-
sentiment = analyzer.analyze_sentiment(analysis)
|
455 |
-
if sentiment[0][0]['label'] == "error":
|
456 |
-
logger.error("Sentiment analysis failed")
|
457 |
-
return "Error: Failed to analyze sentiment. Please try again."
|
458 |
-
|
459 |
-
# Generate recommendations
|
460 |
-
logger.info("Starting recommendations generation...")
|
461 |
-
recommendations = analyzer.generate_recommendations(analysis)
|
462 |
-
if "Error" in recommendations:
|
463 |
-
logger.error("Recommendations generation failed")
|
464 |
-
return "Error: Failed to generate recommendations. Please try again."
|
465 |
-
|
466 |
-
# Format results
|
467 |
-
logger.info("Formatting final results...")
|
468 |
-
result = format_results(analysis, sentiment, recommendations)
|
469 |
-
clear_gpu_memory()
|
470 |
|
471 |
-
|
472 |
return result
|
473 |
-
|
474 |
except Exception as e:
|
475 |
logger.error(f"Analysis error: {str(e)}")
|
476 |
return f"""Analysis Error:
|
@@ -478,99 +217,58 @@ def analyze_financial_statements(income_statement, balance_sheet):
|
|
478 |
{str(e)}
|
479 |
|
480 |
Please verify:
|
481 |
-
1. Files are
|
482 |
-
2. Files contain
|
483 |
-
3.
|
484 |
-
4. Data contains numeric columns
|
485 |
-
5. Files are not corrupted"""
|
486 |
-
|
487 |
-
def format_results(analysis, sentiment, recommendations):
|
488 |
-
"""Format analysis results with improved validation and formatting"""
|
489 |
-
try:
|
490 |
-
if not isinstance(analysis, str) or not isinstance(recommendations, str):
|
491 |
-
raise ValueError("Invalid input types")
|
492 |
-
|
493 |
-
output = [
|
494 |
-
"# Financial Analysis Report\n\n",
|
495 |
-
"## Strategic Analysis\n\n",
|
496 |
-
f"{analysis.strip()}\n\n",
|
497 |
-
"## Market Sentiment\n\n"
|
498 |
-
]
|
499 |
-
|
500 |
-
if isinstance(sentiment, list) and sentiment:
|
501 |
-
for score in sentiment[0]:
|
502 |
-
if isinstance(score, dict) and 'label' in score and 'score' in score:
|
503 |
-
output.append(f"- {score['label']}: {score['score']:.2%}\n")
|
504 |
-
output.append("\n")
|
505 |
-
|
506 |
-
output.append("## Strategic Recommendations\n\n")
|
507 |
-
output.append(f"{recommendations.strip()}")
|
508 |
|
509 |
-
|
510 |
-
except Exception as e:
|
511 |
-
logger.error(f"Formatting error: {str(e)}")
|
512 |
-
return "Error formatting results"
|
513 |
-
|
514 |
-
# Create Gradio interface with improved error handling and guidance
|
515 |
iface = gr.Interface(
|
516 |
-
fn=
|
517 |
inputs=[
|
518 |
gr.File(
|
519 |
-
label="Income Statement (CSV)",
|
520 |
-
file_types=[".csv"]
|
|
|
521 |
),
|
522 |
gr.File(
|
523 |
-
label="Balance Sheet (CSV)",
|
524 |
-
file_types=[".csv"]
|
|
|
525 |
)
|
526 |
],
|
527 |
outputs=gr.Markdown(),
|
528 |
-
title="
|
529 |
-
description="""## Financial
|
|
|
|
|
|
|
|
|
|
|
530 |
|
531 |
-
|
532 |
-
-
|
533 |
-
-
|
534 |
-
- Strategic Recommendations
|
535 |
|
536 |
Requirements:
|
537 |
- Files must be in CSV format
|
538 |
- Must contain numeric data columns
|
539 |
-
-
|
540 |
-
- Standard financial statement format preferred
|
541 |
-
|
542 |
-
Note: Analysis may take a few minutes to complete.""",
|
543 |
-
article="""### Usage Tips:
|
544 |
-
1. Ensure your CSV files have clear column headers
|
545 |
-
2. Verify that numeric data is properly formatted
|
546 |
-
3. Wait for the analysis to complete - it may take several minutes
|
547 |
-
4. The more detailed your financial data, the better the analysis
|
548 |
-
|
549 |
-
For optimal results, include key financial metrics such as:
|
550 |
-
- Revenue
|
551 |
-
- Expenses
|
552 |
-
- Profits/Losses
|
553 |
-
- Assets
|
554 |
-
- Liabilities
|
555 |
-
- Equity""",
|
556 |
examples=[
|
557 |
["example_income_statement.csv", "example_balance_sheet.csv"]
|
558 |
],
|
559 |
flagging_mode="never"
|
560 |
)
|
561 |
|
562 |
-
# Launch the interface with
|
563 |
if __name__ == "__main__":
|
564 |
try:
|
565 |
-
# Enable
|
566 |
-
iface.queue()
|
567 |
-
|
568 |
-
# Launch with specific server configuration
|
569 |
iface.launch(
|
570 |
share=False,
|
571 |
server_name="0.0.0.0",
|
572 |
server_port=7860,
|
573 |
-
|
574 |
)
|
575 |
except Exception as e:
|
576 |
logger.error(f"Launch error: {str(e)}")
|
|
|
3 |
import pandas as pd
|
4 |
import torch
|
5 |
import logging
|
|
|
|
|
|
|
6 |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
|
7 |
+
import gc
|
8 |
|
9 |
# Setup logging
|
10 |
logging.basicConfig(
|
|
|
23 |
torch.cuda.empty_cache()
|
24 |
gc.collect()
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
class FinancialAnalyzer:
|
27 |
+
"""Simplified Financial Analyzer using small models"""
|
28 |
|
29 |
def __init__(self):
|
30 |
+
# Initialize with two small models
|
31 |
+
self.sentiment_model = None
|
32 |
+
self.analysis_model = None
|
33 |
+
self.load_models()
|
34 |
+
|
35 |
+
def load_models(self):
|
36 |
+
"""Load the required models"""
|
|
|
37 |
try:
|
38 |
+
# Load FinBERT for sentiment analysis
|
39 |
+
self.sentiment_model = pipeline(
|
40 |
+
"text-classification",
|
41 |
+
model="ProsusAI/finbert",
|
42 |
+
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
43 |
+
)
|
44 |
+
|
45 |
+
# Load small model for analysis and recommendations
|
46 |
+
self.analysis_model = pipeline(
|
47 |
+
"text-generation",
|
48 |
+
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
49 |
+
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
50 |
+
)
|
51 |
+
|
52 |
+
logger.info("Models loaded successfully")
|
53 |
except Exception as e:
|
54 |
+
logger.error(f"Error loading models: {str(e)}")
|
55 |
raise
|
56 |
|
57 |
+
def process_csv(self, file_obj):
|
58 |
+
"""Process CSV file and extract KPIs"""
|
59 |
try:
|
60 |
if file_obj is None:
|
61 |
raise ValueError("No file provided")
|
62 |
|
63 |
+
df = pd.read_csv(file_obj)
|
|
|
64 |
|
65 |
if df.empty:
|
66 |
raise ValueError("Empty CSV file")
|
67 |
+
|
68 |
+
# Get numeric columns
|
|
|
|
|
|
|
|
|
69 |
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
|
70 |
if len(numeric_cols) == 0:
|
71 |
raise ValueError("No numeric columns found in CSV")
|
72 |
+
|
73 |
+
# Calculate basic KPIs
|
74 |
summary = df[numeric_cols].describe()
|
|
|
75 |
|
76 |
+
# Extract key metrics
|
77 |
+
metrics = {
|
78 |
+
'total': df[numeric_cols].sum(),
|
79 |
+
'average': df[numeric_cols].mean(),
|
80 |
+
'growth': df[numeric_cols].pct_change().mean() * 100
|
81 |
+
}
|
82 |
+
|
83 |
+
return summary, metrics
|
84 |
|
85 |
except Exception as e:
|
86 |
+
logger.error(f"Error processing CSV: {str(e)}")
|
87 |
raise
|
88 |
|
89 |
+
def analyze_financials(self, income_summary, balance_summary):
|
90 |
+
"""Generate financial analysis and recommendations"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
try:
|
92 |
+
financial_context = f"""
|
93 |
+
Income Statement Metrics:
|
94 |
+
{income_summary[0].to_string()}
|
95 |
|
96 |
+
Key Income Indicators:
|
97 |
+
{income_summary[1]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
+
Balance Sheet Metrics:
|
100 |
+
{balance_summary[0].to_string()}
|
101 |
+
|
102 |
+
Key Balance Sheet Indicators:
|
103 |
+
{balance_summary[1]}
|
104 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
+
# Generate sentiment analysis
|
107 |
+
sentiment = self.sentiment_model(financial_context)[0]
|
108 |
+
|
109 |
+
# Generate business analysis
|
110 |
+
analysis_prompt = f"""[INST] Based on the following financial data, provide:
|
111 |
+
1. Current Business Status
|
112 |
+
2. Key Business Insights
|
113 |
+
3. Strategic Recommendations and Roadmap
|
114 |
|
115 |
+
Financial Context:
|
116 |
+
{financial_context}
|
|
|
|
|
117 |
|
118 |
+
Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
|
|
|
|
|
|
|
|
|
119 |
|
120 |
+
Provide a concise but detailed analysis for each section.
|
121 |
+
[/INST]"""
|
122 |
|
123 |
+
response = self.analysis_model(
|
124 |
+
analysis_prompt,
|
125 |
+
max_length=1000,
|
|
|
|
|
126 |
temperature=0.7,
|
127 |
+
num_return_sequences=1
|
|
|
|
|
|
|
|
|
128 |
)
|
129 |
|
130 |
+
return self.format_response(response[0]['generated_text'], sentiment)
|
|
|
131 |
|
132 |
except Exception as e:
|
133 |
+
logger.error(f"Error in analysis: {str(e)}")
|
134 |
+
return "Error generating analysis"
|
|
|
|
|
135 |
|
136 |
+
def format_response(self, analysis_text, sentiment):
|
137 |
+
"""Format the analysis response into structured sections"""
|
138 |
try:
|
139 |
+
# Split the analysis into sections
|
140 |
+
sections = analysis_text.split('\n\n')
|
141 |
+
|
142 |
+
# Initialize output sections
|
143 |
+
status = []
|
144 |
+
insights = []
|
145 |
+
recommendations = []
|
146 |
|
147 |
+
# Process each section
|
148 |
current_section = None
|
149 |
for section in sections:
|
150 |
+
if "Business Status" in section:
|
151 |
+
current_section = status
|
152 |
+
elif "Key Business Insights" in section:
|
153 |
+
current_section = insights
|
154 |
+
elif "Strategic Recommendations" in section:
|
155 |
+
current_section = recommendations
|
156 |
+
elif current_section is not None:
|
157 |
+
current_section.append(section.strip())
|
158 |
+
|
159 |
+
# Format the final output
|
160 |
+
output = [
|
161 |
+
"# Financial Analysis Report\n\n",
|
162 |
+
f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
|
163 |
+
"## Current Business Status\n",
|
164 |
+
"".join(f"- {item}\n" for item in status if item),
|
165 |
+
"\n## Key Business Insights\n",
|
166 |
+
"".join(f"- {item}\n" for item in insights if item),
|
167 |
+
"\n## Strategic Recommendations & Roadmap\n",
|
168 |
+
"".join(f"- {item}\n" for item in recommendations if item)
|
169 |
+
]
|
170 |
+
|
171 |
+
return "".join(output)
|
172 |
|
|
|
173 |
except Exception as e:
|
174 |
+
logger.error(f"Error formatting response: {str(e)}")
|
175 |
+
return "Error formatting analysis results"
|
176 |
|
177 |
+
def analyze_statements(income_statement, balance_sheet):
|
178 |
+
"""Main function to analyze financial statements"""
|
179 |
try:
|
180 |
+
# Check if files are uploaded
|
181 |
+
if income_statement is None or balance_sheet is None:
|
182 |
+
return "Please upload both Income Statement and Balance Sheet CSV files."
|
|
|
|
|
|
|
|
|
183 |
|
184 |
+
# Get file names
|
185 |
+
income_filename = income_statement.name if hasattr(income_statement, 'name') else 'Income Statement'
|
186 |
+
balance_filename = balance_sheet.name if hasattr(balance_sheet, 'name') else 'Balance Sheet'
|
187 |
+
|
188 |
+
logger.info(f"Processing {income_filename} and {balance_filename}")
|
189 |
|
190 |
+
# Initialize analyzer
|
191 |
+
analyzer = FinancialAnalyzer()
|
|
|
192 |
|
193 |
+
# Process statements with better error handling
|
194 |
+
try:
|
195 |
+
income_summary = analyzer.process_csv(income_statement)
|
196 |
+
logger.info("Successfully processed Income Statement")
|
197 |
+
except Exception as e:
|
198 |
+
return f"Error processing Income Statement: {str(e)}\nPlease ensure it's a valid CSV file with numeric data."
|
199 |
|
200 |
+
try:
|
201 |
+
balance_summary = analyzer.process_csv(balance_sheet)
|
202 |
+
logger.info("Successfully processed Balance Sheet")
|
203 |
+
except Exception as e:
|
204 |
+
return f"Error processing Balance Sheet: {str(e)}\nPlease ensure it's a valid CSV file with numeric data."
|
205 |
+
|
206 |
# Generate analysis
|
207 |
+
logger.info("Generating analysis...")
|
208 |
+
result = analyzer.analyze_financials(income_summary, balance_summary)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
+
clear_gpu_memory()
|
211 |
return result
|
212 |
+
|
213 |
except Exception as e:
|
214 |
logger.error(f"Analysis error: {str(e)}")
|
215 |
return f"""Analysis Error:
|
|
|
217 |
{str(e)}
|
218 |
|
219 |
Please verify:
|
220 |
+
1. Files are in CSV format
|
221 |
+
2. Files contain numeric data columns
|
222 |
+
3. Files follow standard financial statement format"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
|
224 |
+
# Create Gradio interface with improved file handling
|
|
|
|
|
|
|
|
|
|
|
225 |
iface = gr.Interface(
|
226 |
+
fn=analyze_statements,
|
227 |
inputs=[
|
228 |
gr.File(
|
229 |
+
label="Upload Income Statement (CSV)",
|
230 |
+
file_types=[".csv"],
|
231 |
+
file_count="single"
|
232 |
),
|
233 |
gr.File(
|
234 |
+
label="Upload Balance Sheet (CSV)",
|
235 |
+
file_types=[".csv"],
|
236 |
+
file_count="single"
|
237 |
)
|
238 |
],
|
239 |
outputs=gr.Markdown(),
|
240 |
+
title="Financial Statement Analyzer",
|
241 |
+
description="""## Financial Analysis Tool
|
242 |
+
|
243 |
+
How to use:
|
244 |
+
1. Click 'Upload Income Statement' to select your income statement CSV file
|
245 |
+
2. Click 'Upload Balance Sheet' to select your balance sheet CSV file
|
246 |
+
3. Wait for the analysis to complete
|
247 |
|
248 |
+
The tool will provide:
|
249 |
+
- Business Status Assessment
|
250 |
+
- Key Financial Insights
|
251 |
+
- Strategic Recommendations
|
252 |
|
253 |
Requirements:
|
254 |
- Files must be in CSV format
|
255 |
- Must contain numeric data columns
|
256 |
+
- Standard financial statement format preferred""",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
examples=[
|
258 |
["example_income_statement.csv", "example_balance_sheet.csv"]
|
259 |
],
|
260 |
flagging_mode="never"
|
261 |
)
|
262 |
|
263 |
+
# Launch the interface with better error handling
|
264 |
if __name__ == "__main__":
|
265 |
try:
|
266 |
+
iface.queue() # Enable queuing for better file handling
|
|
|
|
|
|
|
267 |
iface.launch(
|
268 |
share=False,
|
269 |
server_name="0.0.0.0",
|
270 |
server_port=7860,
|
271 |
+
show_api=False # Disable API tab for security
|
272 |
)
|
273 |
except Exception as e:
|
274 |
logger.error(f"Launch error: {str(e)}")
|