Spaces:

walaa2022
/

fin_analysis

Sleeping

File size: 19,555 Bytes

import gradio as gr
import pandas as pd
import json
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM,
    AutoModelForSequenceClassification
)
import torch
import numpy as np
import re

class FinancialDataset:
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        inputs = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        return {
            'input_ids': inputs['input_ids'].squeeze(),
            'attention_mask': inputs['attention_mask'].squeeze(),
            'labels': torch.tensor(self.labels[idx], dtype=torch.long)
        }

class FinancialAnalyzer:
    def __init__(self):
        print("Initializing Analyzer...")
        self.last_metrics = {}
        self.initialize_models()
        print("Initialization complete!")

    def initialize_models(self):
        """Initialize TinyLlama model"""
        try:
            self.llama_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
            self.llama_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
            self.llama_model.eval()
            print("Models loaded successfully!")
        except Exception as e:
            print(f"Error initializing models: {str(e)}")
            raise

    def clean_number(self, value):
        """Clean and convert numerical values"""
        try:
            if isinstance(value, str):
                value = value.replace('$', '').replace(',', '').strip()
                if '(' in value and ')' in value:
                    value = '-' + value.replace('(', '').replace(')', '')
            return float(value or 0)
        except:
            return 0.0

    def is_valid_markdown(self, file_path):
        """Check if a file is a valid Markdown file"""
        try:
            with open(file_path, 'r') as f:
                content = f.read()
            return any(line.startswith('#') or '|' in line for line in content.split('\n'))
        except:
            return False

    def parse_financial_data(self, content):
        """Parse markdown content into structured data"""
        try:
            data = {}
            current_section = ""
            current_table = []
            headers = None

            for line in content.split('\n'):
                if line.startswith('#'):
                    if current_table and headers:
                        data[current_section] = self.process_table(headers, current_table)
                    current_section = line.strip('# ')
                    current_table = []
                    headers = None
                elif '|' in line:
                    if '-|-' not in line:
                        row = [cell.strip() for cell in line.split('|')[1:-1]]
                        if not headers:
                            headers = row
                        else:
                            current_table.append(row)

            if current_table and headers:
                data[current_section] = self.process_table(headers, current_table)

            return data
        except Exception as e:
            print(f"Error parsing financial data: {str(e)}")
            return {}

    def process_table(self, headers, rows):
        """Process table data into structured format"""
        try:
            processed_data = {}
            for row in rows:
                if len(row) == len(headers):
                    item_name = row[0].strip('*').strip()
                    processed_data[item_name] = {}
                    for i, value in enumerate(row[1:], 1):
                        processed_data[item_name][headers[i]] = self.clean_number(value)
            return processed_data
        except Exception as e:
            print(f"Error processing table: {str(e)}")
            return {}

    def get_nested_value(self, data, section, key, year):
        """Safely get nested dictionary value"""
        try:
            return data.get(section, {}).get(key, {}).get(str(year), 0)
        except:
            return 0

    def calculate_metrics(self, income_data, balance_data):
        """Calculate all CFI standard financial metrics"""
        try:
            metrics = {}
            
            # 1. Gross Profit Margin Ratio
            # 1. Gross Profit Margin
            revenue = self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2025")
            cogs = self.get_nested_value(income_data, "Cost and Gross Profit", "Cost of Goods Sold", "2025")
            gross_profit = revenue - cogs
            metrics['gross_profit_margin'] = (gross_profit / revenue) * 100 if revenue != 0 else 0

            # 2. Current Ratio
            current_assets = self.get_nested_value(balance_data, "Key Totals", "Total_Current_Assets", "2025")
            current_liabilities = self.get_nested_value(balance_data, "Key Totals", "Total_Current_Liabilities", "2025")
            metrics['current_ratio'] = current_assets / current_liabilities if current_liabilities != 0 else 0

            # 3. Debt Ratio
            total_liabilities = self.get_nested_value(balance_data, "Key Totals", "Total_Liabilities", "2025")
            total_assets = self.get_nested_value(balance_data, "Key Totals", "Total_Assets", "2025")
            metrics['debt_ratio'] = (total_liabilities / total_assets) * 100 if total_assets != 0 else 0

            # 4. Sustainable Growth Rate (SGR)
            net_income = self.get_nested_value(income_data, "Profit Summary", "Net Earnings", "2025")
            equity = self.get_nested_value(balance_data, "Key Totals", "Total_Shareholders_Equity", "2025")
            dividends = self.get_nested_value(income_data, "Profit Summary", "Dividends Paid", "2025")
        
            roe = (net_income / equity) * 100 if equity != 0 else 0
            retention_ratio = (net_income - dividends) / net_income if net_income != 0 else 0
            metrics['sgr'] = roe * retention_ratio / 100 if roe != 0 else 0

            # 5. Accounts Receivable Turnover
            accounts_receivable = self.get_nested_value(balance_data, "Balance Sheet Data 2021-2025", "Accounts_Receivable", "2025")
            metrics['ar_turnover'] = revenue / accounts_receivable if accounts_receivable != 0 else 0

            # 6. Return on Equity (ROE)
            metrics['roe'] = roe

            # 7. Net Profit Margin
            metrics['net_profit_margin'] = (net_income / revenue) * 100 if revenue != 0 else 0

            # 8. Retained Earnings Ratio
            retained_earnings = self.get_nested_value(balance_data, "Balance Sheet Data 2021-2025", "Retained_Earnings", "2025")
            metrics['retained_earnings_ratio'] = (retained_earnings / total_assets) * 100 if total_assets != 0 else 0

            # 9. Revenue Growth (YoY)
            revenue_2024 = self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2024")
            metrics['revenue_growth'] = ((revenue / revenue_2024) - 1) * 100 if revenue_2024 != 0 else 0

            # 10. Revenue CAGR (2021-2025)
            revenue_2021 = self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2021")
            metrics['revenue_cagr'] = ((revenue / revenue_2021) ** (1 / 4) - 1) * 100 if revenue_2021 != 0 else 0

            return metrics
        except Exception as e:
            print(f"Error calculating metrics: {e}")
            return {}

    
    def analyze_financials(self, balance_sheet_path, income_statement_path):
        try:
            # Validate markdown files
            if not self.is_valid_markdown(balance_sheet_path):
                return "Invalid Balance Sheet file format. Please upload a valid Markdown file."
            if not self.is_valid_markdown(income_statement_path):
                return "Invalid Income Statement file format. Please upload a valid Markdown file."

            # Read and parse files
            with open(balance_sheet_path, 'r') as f:
                balance_content = f.read()
            with open(income_statement_path, 'r') as f:
                income_content = f.read()

            balance_data = self.parse_financial_data(balance_content)
            income_data = self.parse_financial_data(income_content)

            # Calculate metrics
            metrics = self.calculate_metrics(income_data, balance_data)

            # Generate analysis
            return self.generate_analysis(metrics)

        except Exception as e:
            return f"Error analyzing financials: {e}"

    
    def generate_analysis(self, metrics):
        """Generate comprehensive analysis"""
        try:
            prompt = f"""[INST] As a financial analyst, provide a comprehensive analysis based on these metrics:

1. Profitability:
   - Gross Profit Margin: {metrics.get('gross_profit_margin', 0):.2f}%
   - Net Profit Margin: {metrics.get('net_profit_margin', 0):.2f}%
   - Return on Equity: {metrics.get('roe', 0):.2f}%

2. Liquidity & Efficiency:
   - Current Ratio: {metrics.get('current_ratio', 0):.2f}
   - Accounts Receivable Turnover: {metrics.get('ar_turnover', 0):.2f}

3. Financial Structure:
   - Debt Ratio: {metrics.get('debt_ratio', 0):.2f}%
   - Retained Earnings Ratio: {metrics.get('retained_earnings_ratio', 0):.2f}%

4. Growth:
   - Sustainable Growth Rate: {metrics.get('sgr', 0):.2f}%
   - Revenue Growth (YoY): {metrics.get('revenue_growth', 0):.2f}%

Provide:
1. Overall financial health assessment
2. Key strengths and concerns
3. Operational efficiency analysis
4. Specific recommendations for improvement
[/INST]"""

            inputs = self.llama_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
            outputs = self.llama_model.generate(
                inputs["input_ids"],
                max_new_tokens=1024,
                min_new_tokens=200,
                temperature=0.7,
                top_p=0.95,
                repetition_penalty=1.2,
                length_penalty=1.5
            )
            
            analysis = self.llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            if len(analysis.split()) < 100:
                return self.generate_fallback_analysis(metrics)
            
            return analysis

        except Exception as e:
            print(f"Error generating analysis: {str(e)}")
            return self.generate_fallback_analysis(metrics)

    def generate_fallback_analysis(self, metrics):
        """Generate basic analysis when model fails"""
        try:
            analysis = f"""Financial Analysis Summary:

1. Profitability Assessment:
- Gross Profit Margin: {metrics.get('gross_profit_margin', 0):.2f}% 
  ({self.interpret_metric('gross_profit_margin', metrics.get('gross_profit_margin', 0))})
- Net Profit Margin: {metrics.get('net_profit_margin', 0):.2f}%
  ({self.interpret_metric('net_profit_margin', metrics.get('net_profit_margin', 0))})
- Return on Equity: {metrics.get('roe', 0):.2f}%
  ({self.interpret_metric('roe', metrics.get('roe', 0))})

2. Liquidity & Efficiency Analysis:
- Current Ratio: {metrics.get('current_ratio', 0):.2f}
  ({self.interpret_metric('current_ratio', metrics.get('current_ratio', 0))})
- AR Turnover: {metrics.get('ar_turnover', 0):.2f}
  ({self.interpret_metric('ar_turnover', metrics.get('ar_turnover', 0))})

3. Financial Structure:
- Debt Ratio: {metrics.get('debt_ratio', 0):.2f}%
  ({self.interpret_metric('debt_ratio', metrics.get('debt_ratio', 0))})
- Retained Earnings Ratio: {metrics.get('retained_earnings_ratio', 0):.2f}%
  ({self.interpret_metric('retained_earnings_ratio', metrics.get('retained_earnings_ratio', 0))})

4. Growth & Sustainability:
- Sustainable Growth Rate: {metrics.get('sgr', 0):.2f}%
  ({self.interpret_metric('sgr', metrics.get('sgr', 0))})
- Revenue Growth: {metrics.get('revenue_growth', 0):.2f}%
  ({self.interpret_metric('revenue_growth', metrics.get('revenue_growth', 0))})

{self.generate_recommendations(metrics)}"""
            return analysis

        except Exception as e:
            return f"Error generating fallback analysis: {str(e)}"

    def interpret_metric(self, metric_name, value):
        """Interpret individual metrics based on CFI standards"""
        interpretations = {
            'gross_profit_margin': lambda x: 'Strong' if x > 40 else 'Adequate' if x > 30 else 'Needs improvement',
            'current_ratio': lambda x: 'Strong' if x > 2 else 'Adequate' if x > 1 else 'Concerning',
            'debt_ratio': lambda x: 'Conservative' if x < 40 else 'Moderate' if x < 60 else 'High risk',
            'ar_turnover': lambda x: 'Excellent' if x > 8 else 'Good' if x > 4 else 'Needs improvement',
            'roe': lambda x: 'Strong' if x > 15 else 'Adequate' if x > 10 else 'Below target',
            'net_profit_margin': lambda x: 'Strong' if x > 10 else 'Adequate' if x > 5 else 'Needs improvement',
            'retained_earnings_ratio': lambda x: 'Strong' if x > 30 else 'Adequate' if x > 15 else 'Low retention',
            'sgr': lambda x: 'Strong' if x > 10 else 'Moderate' if x > 5 else 'Limited growth potential',
            'revenue_growth': lambda x: 'Strong' if x > 10 else 'Moderate' if x > 5 else 'Below industry average'
        }
        try:
            return interpretations.get(metric_name, lambda x: 'No interpretation')(value)
        except:
            return 'Unable to interpret'

    def generate_recommendations(self, metrics):
        """Generate specific recommendations based on metrics"""
        recommendations = []
        
        if metrics.get('gross_profit_margin', 0) < 30:
            recommendations.append("- Review pricing strategy and cost structure to improve gross margins")
        if metrics.get('current_ratio', 0) < 1.5:
            recommendations.append("- Strengthen working capital management to improve liquidity")
        if metrics.get('debt_ratio', 0) > 60:
            recommendations.append("- Consider debt reduction strategies to improve financial flexibility")
        if metrics.get('ar_turnover', 0) < 4:
            recommendations.append("- Improve accounts receivable collection practices")
        if metrics.get('roe', 0) < 10:
            recommendations.append("- Focus on improving operational efficiency to enhance returns")
        if metrics.get('revenue_growth', 0) < 5:
            recommendations.append("- Develop strategies to accelerate revenue growth")
            recommendations.append("- Consider strategic acquisitions or new market entry")
        
        return "Key Recommendations:\n" + "\n".join(recommendations)

    def analyze_financials(self, balance_sheet_file, income_stmt_file):
        """Main analysis function"""
        try:
            # Validate input files
            if not (self.is_valid_markdown(balance_sheet_file) and self.is_valid_markdown(income_stmt_file)):
                return "Error: One or both files are invalid or not in Markdown format."

            # Read files
            with open(balance_sheet_file, 'r') as f:
                balance_sheet = f.read()
            with open(income_stmt_file, 'r') as f:
                income_stmt = f.read()

            # Process financial data
            income_data = self.parse_financial_data(income_stmt)
            balance_data = self.parse_financial_data(balance_sheet)
            
            # Calculate metrics
            metrics = self.calculate_metrics(income_data, balance_data)
            self.last_metrics = metrics

            # Generate analysis
            analysis = self.generate_analysis(metrics)

            # Prepare final results
            results = {
                "Financial Analysis": {
                    "Key Metrics": {
                        "Profitability": {
                            "Gross Profit Margin": f"{metrics['gross_profit_margin']:.2f}%",
                            "Net Profit Margin": f"{metrics['net_profit_margin']:.2f}%",
                            "Return on Equity": f"{metrics['roe']:.2f}%"
                        },
                        "Liquidity": {
                            "Current Ratio": f"{metrics['current_ratio']:.2f}",
                            "Accounts Receivable Turnover": f"{metrics['ar_turnover']:.2f}"
                        },
                        "Solvency": {
                            "Debt Ratio": f"{metrics['debt_ratio']:.2f}%",
                            "Retained Earnings Ratio": f"{metrics['retained_earnings_ratio']:.2f}%"
                        },
                        "Growth": {
                            "Sustainable Growth Rate": f"{metrics['sgr']:.2f}%",
                            "Revenue Growth (YoY)": f"{metrics['revenue_growth']:.2f}%"
                        }
                    },
                    "Analysis": analysis,
                    "Analysis Period": "2021-2025",
                    "Note": "Analysis based on CFI standards"
                }
            }

            return json.dumps(results, indent=2)

        except Exception as e:
            return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"

    def fine_tune_models(self, train_texts, train_labels, epochs=3):
        """Fine-tune the model with custom data"""
        try:
            # Prepare dataset
            train_dataset = FinancialDataset(train_texts, train_labels, self.llama_tokenizer)
            
            # Training arguments
            training_args = TrainingArguments(
                output_dir="./financial_model_tuned",
                num_train_epochs=epochs,
                per_device_train_batch_size=4,
                logging_dir="./logs",
                logging_steps=10,
                save_steps=50,
                eval_steps=50,
                learning_rate=2e-5,
                weight_decay=0.01,
                warmup_steps=500
            )

            # Initialize trainer
            trainer = Trainer(
                model=self.llama_model,
                args=training_args,
                train_dataset=train_dataset
            )

            # Fine-tune the model
            trainer.train()
            
            # Save the fine-tuned model
            self.llama_model.save_pretrained("./financial_model_tuned")
            self.llama_tokenizer.save_pretrained("./financial_model_tuned")
            
            print("Fine-tuning completed successfully!")
        except Exception as e:
            print(f"Error in fine-tuning: {str(e)}")


def create_interface():
    analyzer = FinancialAnalyzer()
    
    iface = gr.Interface(
        fn=analyzer.analyze_financials,
        inputs=[
            gr.File(label="Balance Sheet (Markdown)", type="filepath"),
            gr.File(label="Income Statement (Markdown)", type="filepath")
        ],
        outputs=gr.Textbox(label="Analysis Results", lines=25),
        title="AI Financial Statement Analyzer",
        description="""Upload financial statements in Markdown format for AI-powered analysis.
                      Analysis is based on Corporate Finance Institute (CFI) standards.""",
        
        cache_examples=False
    )
    
    return iface

if __name__ == "__main__":
    iface = create_interface()
    iface.launch()