walaa2022's picture
Update app.py
f95f954 verified
raw
history blame
11 kB
import os
import gradio as gr
import pandas as pd
import torch
import logging
import gc
from transformers import pipeline
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Device configuration
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"Using device: {DEVICE}")
def clear_gpu_memory():
"""Utility function to clear GPU memory"""
if DEVICE == "cuda":
torch.cuda.empty_cache()
gc.collect()
class FinancialDataExtractor:
"""Extract and clean financial data"""
def __init__(self):
self.logger = logger
def clean_number(self, value):
"""Clean numeric values from financial statements"""
try:
if pd.isna(value) or value == '' or value == '-':
return 0.0
if isinstance(value, (int, float)):
return float(value)
# Remove currency symbols, spaces, commas
cleaned = str(value).replace('$', '').replace(',', '').replace('"', '').strip()
# Handle parentheses for negative numbers
if '(' in cleaned and ')' in cleaned:
cleaned = '-' + cleaned.replace('(', '').replace(')', '')
return float(cleaned)
except:
return 0.0
def extract_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Extract and clean data from DataFrame"""
# Clean column names
df.columns = df.columns.str.strip()
# Get year columns
year_cols = [col for col in df.columns if str(col).isdigit()]
if not year_cols:
raise ValueError("No year columns found in data")
# Clean numeric data
for col in year_cols:
df[col] = df[col].apply(self.clean_number)
return df, year_cols
class FinancialAnalyzer:
"""Financial analysis using small models"""
def __init__(self):
self.extractor = FinancialDataExtractor()
self.sentiment_model = None
self.analysis_model = None
self.load_models()
def load_models(self):
"""Load the required models"""
try:
# Load FinBERT for sentiment analysis
self.sentiment_model = pipeline(
"text-classification",
model="ProsusAI/finbert",
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
truncation=True
)
# Load small model for analysis
self.analysis_model = pipeline(
"text-generation",
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
)
logger.info("Models loaded successfully")
except Exception as e:
logger.error(f"Error loading models: {str(e)}")
raise
def calculate_metrics(self, income_df: pd.DataFrame, balance_df: pd.DataFrame, year_cols: list) -> dict:
"""Calculate financial metrics"""
metrics = {}
for year in year_cols:
# Income Statement metrics
income = {
'Revenue': income_df[income_df['Period'].str.contains('Total Net Revenue|Revenue', na=False, case=False)][year].iloc[0],
'COGS': income_df[income_df['Period'].str.contains('Cost of Goods Sold', na=False, case=False)][year].iloc[0],
'Operating_Expenses': income_df[income_df['Period'].str.contains('Total Expenses', na=False, case=False)][year].iloc[0],
'EBIT': income_df[income_df['Period'].str.contains('Earnings Before Interest & Taxes', na=False, case=False)][year].iloc[0],
'Net_Income': income_df[income_df['Period'].str.contains('Net Income|Net Earnings', na=False, case=False)][year].iloc[-1]
}
# Balance Sheet metrics
balance = {
'Total_Assets': balance_df[balance_df['Period'].str.contains('Total Assets', na=False, case=False)][year].iloc[0],
'Current_Assets': balance_df[balance_df['Period'].str.contains('Total current assets', na=False, case=False)][year].iloc[0],
'Total_Liabilities': balance_df[balance_df['Period'].str.contains('Total Liabilities', na=False, case=False)][year].iloc[0],
'Current_Liabilities': balance_df[balance_df['Period'].str.contains('Total current liabilities', na=False, case=False)][year].iloc[0],
'Equity': balance_df[balance_df['Period'].str.contains("Shareholder's Equity", na=False, case=False)][year].iloc[-1]
}
# Calculate ratios
metrics[year] = {
'Profitability': {
'Gross_Margin': ((income['Revenue'] - income['COGS']) / income['Revenue']) * 100,
'Operating_Margin': (income['EBIT'] / income['Revenue']) * 100,
'Net_Margin': (income['Net_Income'] / income['Revenue']) * 100,
'ROE': (income['Net_Income'] / balance['Equity']) * 100,
'ROA': (income['Net_Income'] / balance['Total_Assets']) * 100
},
'Liquidity': {
'Current_Ratio': balance['Current_Assets'] / balance['Current_Liabilities'],
'Working_Capital': balance['Current_Assets'] - balance['Current_Liabilities']
},
'Growth': {
'Revenue': income['Revenue'],
'Net_Income': income['Net_Income'],
'Total_Assets': balance['Total_Assets']
}
}
return metrics
def analyze_financials(self, income_df: pd.DataFrame, balance_df: pd.DataFrame) -> str:
"""Generate financial analysis"""
try:
# Extract and clean data
income_df, year_cols = self.extractor.extract_data(income_df)
balance_df, _ = self.extractor.extract_data(balance_df)
# Calculate metrics
metrics = self.calculate_metrics(income_df, balance_df, year_cols)
# Get latest and earliest years
latest_year = max(year_cols)
earliest_year = min(year_cols)
# Calculate growth
revenue_growth = ((metrics[latest_year]['Growth']['Revenue'] / metrics[earliest_year]['Growth']['Revenue']) - 1) * 100
profit_growth = ((metrics[latest_year]['Growth']['Net_Income'] / metrics[earliest_year]['Growth']['Net_Income']) - 1) * 100
# Generate analysis context
context = f"""Financial Analysis ({earliest_year}-{latest_year}):
Performance Metrics:
- Revenue Growth: {revenue_growth:.1f}%
- Profit Growth: {profit_growth:.1f}%
- Current Gross Margin: {metrics[latest_year]['Profitability']['Gross_Margin']:.1f}%
- Current Net Margin: {metrics[latest_year]['Profitability']['Net_Margin']:.1f}%
- ROE: {metrics[latest_year]['Profitability']['ROE']:.1f}%
- Current Ratio: {metrics[latest_year]['Liquidity']['Current_Ratio']:.2f}
Trends:
- Revenue has grown from ${metrics[earliest_year]['Growth']['Revenue']:,.0f} to ${metrics[latest_year]['Growth']['Revenue']:,.0f}
- Net Income has changed from ${metrics[earliest_year]['Growth']['Net_Income']:,.0f} to ${metrics[latest_year]['Growth']['Net_Income']:,.0f}
- Profitability margins show {('improving' if metrics[latest_year]['Profitability']['Net_Margin'] > metrics[earliest_year]['Profitability']['Net_Margin'] else 'declining')} trend"""
# Get sentiment
sentiment = self.sentiment_model(context[:512])[0]
# Generate detailed analysis
analysis = self.analysis_model(
f"[INST] As a financial analyst, provide a detailed analysis of this company:\n\n{context}\n\nInclude:\n1. Financial health assessment\n2. Key performance insights\n3. Strategic recommendations [/INST]",
max_length=1500,
num_return_sequences=1,
do_sample=True,
temperature=0.7
)[0]['generated_text']
# Format output
output = f"""# Financial Analysis Report
## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.1%})
## Key Performance Indicators ({latest_year})
- Gross Margin: {metrics[latest_year]['Profitability']['Gross_Margin']:.1f}%
- Operating Margin: {metrics[latest_year]['Profitability']['Operating_Margin']:.1f}%
- Net Margin: {metrics[latest_year]['Profitability']['Net_Margin']:.1f}%
- ROE: {metrics[latest_year]['Profitability']['ROE']:.1f}%
- Current Ratio: {metrics[latest_year]['Liquidity']['Current_Ratio']:.2f}
## Performance Trends ({earliest_year}-{latest_year})
- Revenue Growth: {revenue_growth:.1f}%
- Profit Growth: {profit_growth:.1f}%
- Working Capital: ${metrics[latest_year]['Liquidity']['Working_Capital']:,.0f}
## Analysis
{analysis}"""
return output
except Exception as e:
logger.error(f"Analysis error: {str(e)}")
raise
def analyze_statements(income_statement, balance_sheet):
"""Main function to analyze financial statements"""
try:
if not income_statement or not balance_sheet:
return "Please upload both Income Statement and Balance Sheet CSV files."
# Read files
income_df = pd.read_csv(income_statement.name)
balance_df = pd.read_csv(balance_sheet.name)
# Create analyzer and process
analyzer = FinancialAnalyzer()
result = analyzer.analyze_financials(income_df, balance_df)
# Clear memory
clear_gpu_memory()
return result
except Exception as e:
logger.error(f"Analysis error: {str(e)}")
return f"""Analysis Error: {str(e)}
Please ensure your CSV files:
1. Have clear year columns
2. Contain recognizable financial metrics
3. Use consistent number formatting"""
# Create Gradio interface
iface = gr.Interface(
fn=analyze_statements,
inputs=[
gr.File(label="Upload Income Statement (CSV)", file_types=[".csv"]),
gr.File(label="Upload Balance Sheet (CSV)", file_types=[".csv"])
],
outputs=gr.Markdown(),
title="Financial Statement Analyzer",
description="""## Financial Analysis Tool
Upload your financial statements to get:
- Performance Analysis
- Key Metrics & Ratios
- Trend Analysis
- Strategic Recommendations""",
examples=None
)
# Launch the interface
if __name__ == "__main__":
try:
iface.launch(server_name="0.0.0.0", server_port=7860)
except Exception as e:
logger.error(f"Launch error: {str(e)}")
sys.exit(1)