Spaces:
Sleeping
Sleeping
File size: 11,005 Bytes
864f28a a1ef945 35acd3c ca1d38d c7a28ea ceb9625 eac8dde 6e9bd28 35acd3c eac8dde ceb9625 f95f954 c7a28ea f95f954 c7a28ea f95f954 195e9d5 f95f954 c7a28ea f95f954 c7a28ea f95f954 195e9d5 f95f954 195e9d5 f95f954 c7a28ea f95f954 c7a28ea f95f954 195e9d5 0ff54a0 f95f954 eac8dde 0ff54a0 c7a28ea ca1d38d 35acd3c ca1d38d 195e9d5 ca1d38d c7a28ea ca1d38d 35acd3c ca1d38d 35acd3c f95f954 eac8dde f95f954 c7a28ea f95f954 c7a28ea f95f954 c7a28ea f95f954 c7a28ea ca1d38d c7a28ea f95f954 eac8dde c7a28ea f95f954 c7a28ea f95f954 c7a28ea f95f954 c7a28ea f95f954 c7a28ea f95f954 c7a28ea f95f954 c7a28ea f95f954 195e9d5 f95f954 c7a28ea f95f954 c7a28ea f95f954 eac8dde f95f954 c7a28ea f95f954 c7a28ea f95f954 ca1d38d f95f954 91033f9 f95f954 a1ef945 ca1d38d a1ef945 195e9d5 ca1d38d f95f954 c7a28ea f95f954 ca1d38d c7a28ea ca1d38d c7a28ea 91033f9 ca1d38d a1ef945 eac8dde 195e9d5 eac8dde 195e9d5 f95f954 f4bbd39 f95f954 a1ef945 ca1d38d a1ef945 f95f954 a1ef945 4682c30 f95f954 c7a28ea f95f954 a1ef945 f95f954 4682c30 35acd3c f95f954 35acd3c eac8dde 4682c30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 |
import os
import gradio as gr
import pandas as pd
import torch
import logging
import gc
from transformers import pipeline
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Device configuration
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"Using device: {DEVICE}")
def clear_gpu_memory():
"""Utility function to clear GPU memory"""
if DEVICE == "cuda":
torch.cuda.empty_cache()
gc.collect()
class FinancialDataExtractor:
"""Extract and clean financial data"""
def __init__(self):
self.logger = logger
def clean_number(self, value):
"""Clean numeric values from financial statements"""
try:
if pd.isna(value) or value == '' or value == '-':
return 0.0
if isinstance(value, (int, float)):
return float(value)
# Remove currency symbols, spaces, commas
cleaned = str(value).replace('$', '').replace(',', '').replace('"', '').strip()
# Handle parentheses for negative numbers
if '(' in cleaned and ')' in cleaned:
cleaned = '-' + cleaned.replace('(', '').replace(')', '')
return float(cleaned)
except:
return 0.0
def extract_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""Extract and clean data from DataFrame"""
# Clean column names
df.columns = df.columns.str.strip()
# Get year columns
year_cols = [col for col in df.columns if str(col).isdigit()]
if not year_cols:
raise ValueError("No year columns found in data")
# Clean numeric data
for col in year_cols:
df[col] = df[col].apply(self.clean_number)
return df, year_cols
class FinancialAnalyzer:
"""Financial analysis using small models"""
def __init__(self):
self.extractor = FinancialDataExtractor()
self.sentiment_model = None
self.analysis_model = None
self.load_models()
def load_models(self):
"""Load the required models"""
try:
# Load FinBERT for sentiment analysis
self.sentiment_model = pipeline(
"text-classification",
model="ProsusAI/finbert",
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
truncation=True
)
# Load small model for analysis
self.analysis_model = pipeline(
"text-generation",
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
)
logger.info("Models loaded successfully")
except Exception as e:
logger.error(f"Error loading models: {str(e)}")
raise
def calculate_metrics(self, income_df: pd.DataFrame, balance_df: pd.DataFrame, year_cols: list) -> dict:
"""Calculate financial metrics"""
metrics = {}
for year in year_cols:
# Income Statement metrics
income = {
'Revenue': income_df[income_df['Period'].str.contains('Total Net Revenue|Revenue', na=False, case=False)][year].iloc[0],
'COGS': income_df[income_df['Period'].str.contains('Cost of Goods Sold', na=False, case=False)][year].iloc[0],
'Operating_Expenses': income_df[income_df['Period'].str.contains('Total Expenses', na=False, case=False)][year].iloc[0],
'EBIT': income_df[income_df['Period'].str.contains('Earnings Before Interest & Taxes', na=False, case=False)][year].iloc[0],
'Net_Income': income_df[income_df['Period'].str.contains('Net Income|Net Earnings', na=False, case=False)][year].iloc[-1]
}
# Balance Sheet metrics
balance = {
'Total_Assets': balance_df[balance_df['Period'].str.contains('Total Assets', na=False, case=False)][year].iloc[0],
'Current_Assets': balance_df[balance_df['Period'].str.contains('Total current assets', na=False, case=False)][year].iloc[0],
'Total_Liabilities': balance_df[balance_df['Period'].str.contains('Total Liabilities', na=False, case=False)][year].iloc[0],
'Current_Liabilities': balance_df[balance_df['Period'].str.contains('Total current liabilities', na=False, case=False)][year].iloc[0],
'Equity': balance_df[balance_df['Period'].str.contains("Shareholder's Equity", na=False, case=False)][year].iloc[-1]
}
# Calculate ratios
metrics[year] = {
'Profitability': {
'Gross_Margin': ((income['Revenue'] - income['COGS']) / income['Revenue']) * 100,
'Operating_Margin': (income['EBIT'] / income['Revenue']) * 100,
'Net_Margin': (income['Net_Income'] / income['Revenue']) * 100,
'ROE': (income['Net_Income'] / balance['Equity']) * 100,
'ROA': (income['Net_Income'] / balance['Total_Assets']) * 100
},
'Liquidity': {
'Current_Ratio': balance['Current_Assets'] / balance['Current_Liabilities'],
'Working_Capital': balance['Current_Assets'] - balance['Current_Liabilities']
},
'Growth': {
'Revenue': income['Revenue'],
'Net_Income': income['Net_Income'],
'Total_Assets': balance['Total_Assets']
}
}
return metrics
def analyze_financials(self, income_df: pd.DataFrame, balance_df: pd.DataFrame) -> str:
"""Generate financial analysis"""
try:
# Extract and clean data
income_df, year_cols = self.extractor.extract_data(income_df)
balance_df, _ = self.extractor.extract_data(balance_df)
# Calculate metrics
metrics = self.calculate_metrics(income_df, balance_df, year_cols)
# Get latest and earliest years
latest_year = max(year_cols)
earliest_year = min(year_cols)
# Calculate growth
revenue_growth = ((metrics[latest_year]['Growth']['Revenue'] / metrics[earliest_year]['Growth']['Revenue']) - 1) * 100
profit_growth = ((metrics[latest_year]['Growth']['Net_Income'] / metrics[earliest_year]['Growth']['Net_Income']) - 1) * 100
# Generate analysis context
context = f"""Financial Analysis ({earliest_year}-{latest_year}):
Performance Metrics:
- Revenue Growth: {revenue_growth:.1f}%
- Profit Growth: {profit_growth:.1f}%
- Current Gross Margin: {metrics[latest_year]['Profitability']['Gross_Margin']:.1f}%
- Current Net Margin: {metrics[latest_year]['Profitability']['Net_Margin']:.1f}%
- ROE: {metrics[latest_year]['Profitability']['ROE']:.1f}%
- Current Ratio: {metrics[latest_year]['Liquidity']['Current_Ratio']:.2f}
Trends:
- Revenue has grown from ${metrics[earliest_year]['Growth']['Revenue']:,.0f} to ${metrics[latest_year]['Growth']['Revenue']:,.0f}
- Net Income has changed from ${metrics[earliest_year]['Growth']['Net_Income']:,.0f} to ${metrics[latest_year]['Growth']['Net_Income']:,.0f}
- Profitability margins show {('improving' if metrics[latest_year]['Profitability']['Net_Margin'] > metrics[earliest_year]['Profitability']['Net_Margin'] else 'declining')} trend"""
# Get sentiment
sentiment = self.sentiment_model(context[:512])[0]
# Generate detailed analysis
analysis = self.analysis_model(
f"[INST] As a financial analyst, provide a detailed analysis of this company:\n\n{context}\n\nInclude:\n1. Financial health assessment\n2. Key performance insights\n3. Strategic recommendations [/INST]",
max_length=1500,
num_return_sequences=1,
do_sample=True,
temperature=0.7
)[0]['generated_text']
# Format output
output = f"""# Financial Analysis Report
## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.1%})
## Key Performance Indicators ({latest_year})
- Gross Margin: {metrics[latest_year]['Profitability']['Gross_Margin']:.1f}%
- Operating Margin: {metrics[latest_year]['Profitability']['Operating_Margin']:.1f}%
- Net Margin: {metrics[latest_year]['Profitability']['Net_Margin']:.1f}%
- ROE: {metrics[latest_year]['Profitability']['ROE']:.1f}%
- Current Ratio: {metrics[latest_year]['Liquidity']['Current_Ratio']:.2f}
## Performance Trends ({earliest_year}-{latest_year})
- Revenue Growth: {revenue_growth:.1f}%
- Profit Growth: {profit_growth:.1f}%
- Working Capital: ${metrics[latest_year]['Liquidity']['Working_Capital']:,.0f}
## Analysis
{analysis}"""
return output
except Exception as e:
logger.error(f"Analysis error: {str(e)}")
raise
def analyze_statements(income_statement, balance_sheet):
"""Main function to analyze financial statements"""
try:
if not income_statement or not balance_sheet:
return "Please upload both Income Statement and Balance Sheet CSV files."
# Read files
income_df = pd.read_csv(income_statement.name)
balance_df = pd.read_csv(balance_sheet.name)
# Create analyzer and process
analyzer = FinancialAnalyzer()
result = analyzer.analyze_financials(income_df, balance_df)
# Clear memory
clear_gpu_memory()
return result
except Exception as e:
logger.error(f"Analysis error: {str(e)}")
return f"""Analysis Error: {str(e)}
Please ensure your CSV files:
1. Have clear year columns
2. Contain recognizable financial metrics
3. Use consistent number formatting"""
# Create Gradio interface
iface = gr.Interface(
fn=analyze_statements,
inputs=[
gr.File(label="Upload Income Statement (CSV)", file_types=[".csv"]),
gr.File(label="Upload Balance Sheet (CSV)", file_types=[".csv"])
],
outputs=gr.Markdown(),
title="Financial Statement Analyzer",
description="""## Financial Analysis Tool
Upload your financial statements to get:
- Performance Analysis
- Key Metrics & Ratios
- Trend Analysis
- Strategic Recommendations""",
examples=None
)
# Launch the interface
if __name__ == "__main__":
try:
iface.launch(server_name="0.0.0.0", server_port=7860)
except Exception as e:
logger.error(f"Launch error: {str(e)}")
sys.exit(1) |