Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import pandas as pd | |
import torch | |
import logging | |
import gc | |
from transformers import pipeline | |
# Setup logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s' | |
) | |
logger = logging.getLogger(__name__) | |
# Device configuration | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
logger.info(f"Using device: {DEVICE}") | |
def clear_gpu_memory(): | |
"""Utility function to clear GPU memory""" | |
if DEVICE == "cuda": | |
torch.cuda.empty_cache() | |
gc.collect() | |
class FinancialDataExtractor: | |
"""Extract and clean financial data""" | |
def __init__(self): | |
self.logger = logger | |
def clean_number(self, value): | |
"""Clean numeric values from financial statements""" | |
try: | |
if pd.isna(value) or value == '' or value == '-': | |
return 0.0 | |
if isinstance(value, (int, float)): | |
return float(value) | |
# Remove currency symbols, spaces, commas | |
cleaned = str(value).replace('$', '').replace(',', '').replace('"', '').strip() | |
# Handle parentheses for negative numbers | |
if '(' in cleaned and ')' in cleaned: | |
cleaned = '-' + cleaned.replace('(', '').replace(')', '') | |
return float(cleaned) | |
except: | |
return 0.0 | |
def extract_data(self, df: pd.DataFrame) -> pd.DataFrame: | |
"""Extract and clean data from DataFrame""" | |
# Clean column names | |
df.columns = df.columns.str.strip() | |
# Get year columns | |
year_cols = [col for col in df.columns if str(col).isdigit()] | |
if not year_cols: | |
raise ValueError("No year columns found in data") | |
# Clean numeric data | |
for col in year_cols: | |
df[col] = df[col].apply(self.clean_number) | |
return df, year_cols | |
class FinancialAnalyzer: | |
"""Financial analysis using small models""" | |
def __init__(self): | |
self.extractor = FinancialDataExtractor() | |
self.sentiment_model = None | |
self.analysis_model = None | |
self.load_models() | |
def load_models(self): | |
"""Load the required models""" | |
try: | |
# Load FinBERT for sentiment analysis | |
self.sentiment_model = pipeline( | |
"text-classification", | |
model="ProsusAI/finbert", | |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32, | |
truncation=True | |
) | |
# Load small model for analysis | |
self.analysis_model = pipeline( | |
"text-generation", | |
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", | |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32 | |
) | |
logger.info("Models loaded successfully") | |
except Exception as e: | |
logger.error(f"Error loading models: {str(e)}") | |
raise | |
def calculate_metrics(self, income_df: pd.DataFrame, balance_df: pd.DataFrame, year_cols: list) -> dict: | |
"""Calculate financial metrics""" | |
metrics = {} | |
for year in year_cols: | |
# Income Statement metrics | |
income = { | |
'Revenue': income_df[income_df['Period'].str.contains('Total Net Revenue|Revenue', na=False, case=False)][year].iloc[0], | |
'COGS': income_df[income_df['Period'].str.contains('Cost of Goods Sold', na=False, case=False)][year].iloc[0], | |
'Operating_Expenses': income_df[income_df['Period'].str.contains('Total Expenses', na=False, case=False)][year].iloc[0], | |
'EBIT': income_df[income_df['Period'].str.contains('Earnings Before Interest & Taxes', na=False, case=False)][year].iloc[0], | |
'Net_Income': income_df[income_df['Period'].str.contains('Net Income|Net Earnings', na=False, case=False)][year].iloc[-1] | |
} | |
# Balance Sheet metrics | |
balance = { | |
'Total_Assets': balance_df[balance_df['Period'].str.contains('Total Assets', na=False, case=False)][year].iloc[0], | |
'Current_Assets': balance_df[balance_df['Period'].str.contains('Total current assets', na=False, case=False)][year].iloc[0], | |
'Total_Liabilities': balance_df[balance_df['Period'].str.contains('Total Liabilities', na=False, case=False)][year].iloc[0], | |
'Current_Liabilities': balance_df[balance_df['Period'].str.contains('Total current liabilities', na=False, case=False)][year].iloc[0], | |
'Equity': balance_df[balance_df['Period'].str.contains("Shareholder's Equity", na=False, case=False)][year].iloc[-1] | |
} | |
# Calculate ratios | |
metrics[year] = { | |
'Profitability': { | |
'Gross_Margin': ((income['Revenue'] - income['COGS']) / income['Revenue']) * 100, | |
'Operating_Margin': (income['EBIT'] / income['Revenue']) * 100, | |
'Net_Margin': (income['Net_Income'] / income['Revenue']) * 100, | |
'ROE': (income['Net_Income'] / balance['Equity']) * 100, | |
'ROA': (income['Net_Income'] / balance['Total_Assets']) * 100 | |
}, | |
'Liquidity': { | |
'Current_Ratio': balance['Current_Assets'] / balance['Current_Liabilities'], | |
'Working_Capital': balance['Current_Assets'] - balance['Current_Liabilities'] | |
}, | |
'Growth': { | |
'Revenue': income['Revenue'], | |
'Net_Income': income['Net_Income'], | |
'Total_Assets': balance['Total_Assets'] | |
} | |
} | |
return metrics | |
def analyze_financials(self, income_df: pd.DataFrame, balance_df: pd.DataFrame) -> str: | |
"""Generate financial analysis""" | |
try: | |
# Extract and clean data | |
income_df, year_cols = self.extractor.extract_data(income_df) | |
balance_df, _ = self.extractor.extract_data(balance_df) | |
# Calculate metrics | |
metrics = self.calculate_metrics(income_df, balance_df, year_cols) | |
# Get latest and earliest years | |
latest_year = max(year_cols) | |
earliest_year = min(year_cols) | |
# Calculate growth | |
revenue_growth = ((metrics[latest_year]['Growth']['Revenue'] / metrics[earliest_year]['Growth']['Revenue']) - 1) * 100 | |
profit_growth = ((metrics[latest_year]['Growth']['Net_Income'] / metrics[earliest_year]['Growth']['Net_Income']) - 1) * 100 | |
# Generate analysis context | |
context = f"""Financial Analysis ({earliest_year}-{latest_year}): | |
Performance Metrics: | |
- Revenue Growth: {revenue_growth:.1f}% | |
- Profit Growth: {profit_growth:.1f}% | |
- Current Gross Margin: {metrics[latest_year]['Profitability']['Gross_Margin']:.1f}% | |
- Current Net Margin: {metrics[latest_year]['Profitability']['Net_Margin']:.1f}% | |
- ROE: {metrics[latest_year]['Profitability']['ROE']:.1f}% | |
- Current Ratio: {metrics[latest_year]['Liquidity']['Current_Ratio']:.2f} | |
Trends: | |
- Revenue has grown from ${metrics[earliest_year]['Growth']['Revenue']:,.0f} to ${metrics[latest_year]['Growth']['Revenue']:,.0f} | |
- Net Income has changed from ${metrics[earliest_year]['Growth']['Net_Income']:,.0f} to ${metrics[latest_year]['Growth']['Net_Income']:,.0f} | |
- Profitability margins show {('improving' if metrics[latest_year]['Profitability']['Net_Margin'] > metrics[earliest_year]['Profitability']['Net_Margin'] else 'declining')} trend""" | |
# Get sentiment | |
sentiment = self.sentiment_model(context[:512])[0] | |
# Generate detailed analysis | |
analysis = self.analysis_model( | |
f"[INST] As a financial analyst, provide a detailed analysis of this company:\n\n{context}\n\nInclude:\n1. Financial health assessment\n2. Key performance insights\n3. Strategic recommendations [/INST]", | |
max_length=1500, | |
num_return_sequences=1, | |
do_sample=True, | |
temperature=0.7 | |
)[0]['generated_text'] | |
# Format output | |
output = f"""# Financial Analysis Report | |
## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.1%}) | |
## Key Performance Indicators ({latest_year}) | |
- Gross Margin: {metrics[latest_year]['Profitability']['Gross_Margin']:.1f}% | |
- Operating Margin: {metrics[latest_year]['Profitability']['Operating_Margin']:.1f}% | |
- Net Margin: {metrics[latest_year]['Profitability']['Net_Margin']:.1f}% | |
- ROE: {metrics[latest_year]['Profitability']['ROE']:.1f}% | |
- Current Ratio: {metrics[latest_year]['Liquidity']['Current_Ratio']:.2f} | |
## Performance Trends ({earliest_year}-{latest_year}) | |
- Revenue Growth: {revenue_growth:.1f}% | |
- Profit Growth: {profit_growth:.1f}% | |
- Working Capital: ${metrics[latest_year]['Liquidity']['Working_Capital']:,.0f} | |
## Analysis | |
{analysis}""" | |
return output | |
except Exception as e: | |
logger.error(f"Analysis error: {str(e)}") | |
raise | |
def analyze_statements(income_statement, balance_sheet): | |
"""Main function to analyze financial statements""" | |
try: | |
if not income_statement or not balance_sheet: | |
return "Please upload both Income Statement and Balance Sheet CSV files." | |
# Read files | |
income_df = pd.read_csv(income_statement.name) | |
balance_df = pd.read_csv(balance_sheet.name) | |
# Create analyzer and process | |
analyzer = FinancialAnalyzer() | |
result = analyzer.analyze_financials(income_df, balance_df) | |
# Clear memory | |
clear_gpu_memory() | |
return result | |
except Exception as e: | |
logger.error(f"Analysis error: {str(e)}") | |
return f"""Analysis Error: {str(e)} | |
Please ensure your CSV files: | |
1. Have clear year columns | |
2. Contain recognizable financial metrics | |
3. Use consistent number formatting""" | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=analyze_statements, | |
inputs=[ | |
gr.File(label="Upload Income Statement (CSV)", file_types=[".csv"]), | |
gr.File(label="Upload Balance Sheet (CSV)", file_types=[".csv"]) | |
], | |
outputs=gr.Markdown(), | |
title="Financial Statement Analyzer", | |
description="""## Financial Analysis Tool | |
Upload your financial statements to get: | |
- Performance Analysis | |
- Key Metrics & Ratios | |
- Trend Analysis | |
- Strategic Recommendations""", | |
examples=None | |
) | |
# Launch the interface | |
if __name__ == "__main__": | |
try: | |
iface.launch(server_name="0.0.0.0", server_port=7860) | |
except Exception as e: | |
logger.error(f"Launch error: {str(e)}") | |
sys.exit(1) |