Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import pandas as pd | |
import torch | |
import logging | |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer | |
import gc | |
# Setup logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s' | |
) | |
logger = logging.getLogger(__name__) | |
# Device configuration | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
logger.info(f"Using device: {DEVICE}") | |
def clear_gpu_memory(): | |
"""Utility function to clear GPU memory""" | |
if DEVICE == "cuda": | |
torch.cuda.empty_cache() | |
gc.collect() | |
class FinancialAnalyzer: | |
"""Simplified Financial Analyzer using small models""" | |
def __init__(self): | |
# Initialize with two small models | |
self.sentiment_model = None | |
self.analysis_model = None | |
self.load_models() | |
def load_models(self): | |
"""Load the required models""" | |
try: | |
# Load FinBERT for sentiment analysis | |
self.sentiment_model = pipeline( | |
"text-classification", | |
model="ProsusAI/finbert", | |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32 | |
) | |
# Load small model for analysis and recommendations | |
self.analysis_model = pipeline( | |
"text-generation", | |
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", | |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32 | |
) | |
logger.info("Models loaded successfully") | |
except Exception as e: | |
logger.error(f"Error loading models: {str(e)}") | |
raise | |
def process_csv(self, file_obj): | |
"""Process CSV file and extract KPIs""" | |
try: | |
if file_obj is None: | |
raise ValueError("No file provided") | |
df = pd.read_csv(file_obj) | |
if df.empty: | |
raise ValueError("Empty CSV file") | |
# Get numeric columns | |
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns | |
if len(numeric_cols) == 0: | |
raise ValueError("No numeric columns found in CSV") | |
# Calculate basic KPIs | |
summary = df[numeric_cols].describe() | |
# Extract key metrics | |
metrics = { | |
'total': df[numeric_cols].sum(), | |
'average': df[numeric_cols].mean(), | |
'growth': df[numeric_cols].pct_change().mean() * 100 | |
} | |
return summary, metrics | |
except Exception as e: | |
logger.error(f"Error processing CSV: {str(e)}") | |
raise | |
def analyze_financials(self, income_summary, balance_summary): | |
"""Generate financial analysis and recommendations""" | |
try: | |
financial_context = f""" | |
Income Statement Metrics: | |
{income_summary[0].to_string()} | |
Key Income Indicators: | |
{income_summary[1]} | |
Balance Sheet Metrics: | |
{balance_summary[0].to_string()} | |
Key Balance Sheet Indicators: | |
{balance_summary[1]} | |
""" | |
# Generate sentiment analysis | |
sentiment = self.sentiment_model(financial_context)[0] | |
# Generate business analysis | |
analysis_prompt = f"""[INST] Based on the following financial data, provide: | |
1. Current Business Status | |
2. Key Business Insights | |
3. Strategic Recommendations and Roadmap | |
Financial Context: | |
{financial_context} | |
Sentiment: {sentiment['label']} ({sentiment['score']:.2%}) | |
Provide a concise but detailed analysis for each section. | |
[/INST]""" | |
response = self.analysis_model( | |
analysis_prompt, | |
max_length=1000, | |
temperature=0.7, | |
num_return_sequences=1 | |
) | |
return self.format_response(response[0]['generated_text'], sentiment) | |
except Exception as e: | |
logger.error(f"Error in analysis: {str(e)}") | |
return "Error generating analysis" | |
def format_response(self, analysis_text, sentiment): | |
"""Format the analysis response into structured sections""" | |
try: | |
# Split the analysis into sections | |
sections = analysis_text.split('\n\n') | |
# Initialize output sections | |
status = [] | |
insights = [] | |
recommendations = [] | |
# Process each section | |
current_section = None | |
for section in sections: | |
if "Business Status" in section: | |
current_section = status | |
elif "Key Business Insights" in section: | |
current_section = insights | |
elif "Strategic Recommendations" in section: | |
current_section = recommendations | |
elif current_section is not None: | |
current_section.append(section.strip()) | |
# Format the final output | |
output = [ | |
"# Financial Analysis Report\n\n", | |
f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n", | |
"## Current Business Status\n", | |
"".join(f"- {item}\n" for item in status if item), | |
"\n## Key Business Insights\n", | |
"".join(f"- {item}\n" for item in insights if item), | |
"\n## Strategic Recommendations & Roadmap\n", | |
"".join(f"- {item}\n" for item in recommendations if item) | |
] | |
return "".join(output) | |
except Exception as e: | |
logger.error(f"Error formatting response: {str(e)}") | |
return "Error formatting analysis results" | |
def analyze_statements(income_statement, balance_sheet): | |
"""Main function to analyze financial statements""" | |
try: | |
# Check if files are uploaded | |
if income_statement is None or balance_sheet is None: | |
return "Please upload both Income Statement and Balance Sheet CSV files." | |
# Get file names | |
income_filename = income_statement.name if hasattr(income_statement, 'name') else 'Income Statement' | |
balance_filename = balance_sheet.name if hasattr(balance_sheet, 'name') else 'Balance Sheet' | |
logger.info(f"Processing {income_filename} and {balance_filename}") | |
# Initialize analyzer | |
analyzer = FinancialAnalyzer() | |
# Process statements with better error handling | |
try: | |
income_summary = analyzer.process_csv(income_statement) | |
logger.info("Successfully processed Income Statement") | |
except Exception as e: | |
return f"Error processing Income Statement: {str(e)}\nPlease ensure it's a valid CSV file with numeric data." | |
try: | |
balance_summary = analyzer.process_csv(balance_sheet) | |
logger.info("Successfully processed Balance Sheet") | |
except Exception as e: | |
return f"Error processing Balance Sheet: {str(e)}\nPlease ensure it's a valid CSV file with numeric data." | |
# Generate analysis | |
logger.info("Generating analysis...") | |
result = analyzer.analyze_financials(income_summary, balance_summary) | |
clear_gpu_memory() | |
return result | |
except Exception as e: | |
logger.error(f"Analysis error: {str(e)}") | |
return f"""Analysis Error: | |
{str(e)} | |
Please verify: | |
1. Files are in CSV format | |
2. Files contain numeric data columns | |
3. Files follow standard financial statement format""" | |
# Create Gradio interface with improved file handling | |
iface = gr.Interface( | |
fn=analyze_statements, | |
inputs=[ | |
gr.File( | |
label="Upload Income Statement (CSV)", | |
file_types=[".csv"], | |
file_count="single" | |
), | |
gr.File( | |
label="Upload Balance Sheet (CSV)", | |
file_types=[".csv"], | |
file_count="single" | |
) | |
], | |
outputs=gr.Markdown(), | |
title="Financial Statement Analyzer", | |
description="""## Financial Analysis Tool | |
How to use: | |
1. Click 'Upload Income Statement' to select your income statement CSV file | |
2. Click 'Upload Balance Sheet' to select your balance sheet CSV file | |
3. Wait for the analysis to complete | |
The tool will provide: | |
- Business Status Assessment | |
- Key Financial Insights | |
- Strategic Recommendations | |
Requirements: | |
- Files must be in CSV format | |
- Must contain numeric data columns | |
- Standard financial statement format preferred""", | |
flagging_mode="never" | |
) | |
# Launch the interface with better error handling | |
if __name__ == "__main__": | |
try: | |
iface.queue() # Enable queuing for better file handling | |
iface.launch( | |
share=False, | |
server_name="0.0.0.0", | |
server_port=7860, | |
show_api=False # Disable API tab for security | |
) | |
except Exception as e: | |
logger.error(f"Launch error: {str(e)}") | |
sys.exit(1) |