Spaces:

walaa2022
/

financial-analysis-system

Sleeping

File size: 9,373 Bytes

864f28a
a1ef945
 
 
35acd3c
91033f9
ca1d38d
ceb9625
eac8dde
 
 
 
 
6e9bd28
35acd3c
eac8dde
 
 
ceb9625
eac8dde
 
 
 
 
57061b5
0ff54a0
ca1d38d
eac8dde
0ff54a0
ca1d38d
 
 
 
 
 
 
35acd3c
ca1d38d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35acd3c
ca1d38d
35acd3c
 
ca1d38d
 
35acd3c
37a163f
 
eac8dde
ca1d38d
eac8dde
 
 
ca1d38d
 
91033f9
 
 
ca1d38d
 
91033f9
 
ca1d38d
 
 
 
 
 
 
 
91033f9
35acd3c
ca1d38d
35acd3c
0ff54a0
ca1d38d
 
91033f9
ca1d38d
 
 
91033f9
ca1d38d
 
91033f9
ca1d38d
 
 
 
 
 
91033f9
ca1d38d
 
 
 
 
 
 
 
91033f9
ca1d38d
 
91033f9
ca1d38d
91033f9
ca1d38d
 
eac8dde
ca1d38d
 
 
91033f9
ca1d38d
35acd3c
eac8dde
ca1d38d
91033f9
35acd3c
ca1d38d
 
eac8dde
ca1d38d
 
91033f9
ca1d38d
 
 
 
 
 
 
91033f9
ca1d38d
324809c
91033f9
ca1d38d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91033f9
 
ca1d38d
 
a1ef945
ca1d38d
 
a1ef945
ca1d38d
 
 
eac8dde
ca1d38d
 
 
 
 
eac8dde
ca1d38d
 
a1ef945
ca1d38d
 
 
 
 
 
eac8dde
ca1d38d
 
 
 
 
 
eac8dde
ca1d38d
 
324809c
ca1d38d
91033f9
ca1d38d
a1ef945
eac8dde
 
 
 
35acd3c
eac8dde
ca1d38d
 
 
eac8dde
ca1d38d
f4bbd39
a1ef945
ca1d38d
a1ef945
324809c
ca1d38d
 
 
324809c
 
ca1d38d
 
 
324809c
a1ef945
 
ca1d38d
 
 
 
 
 
 
324809c
ca1d38d
 
 
 
324809c
 
 
 
ca1d38d
eac8dde
a1ef945
 
ca1d38d
a1ef945
35acd3c
ca1d38d
eac8dde
 
 
324809c
ca1d38d
eac8dde
35acd3c
eac8dde
35acd3c

import os 
import gradio as gr
import pandas as pd
import torch
import logging
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
import gc

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Device configuration
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"Using device: {DEVICE}")

def clear_gpu_memory():
    """Utility function to clear GPU memory"""
    if DEVICE == "cuda":
        torch.cuda.empty_cache()
    gc.collect()

class FinancialAnalyzer:
    """Simplified Financial Analyzer using small models"""
    
    def __init__(self):
        # Initialize with two small models
        self.sentiment_model = None
        self.analysis_model = None
        self.load_models()

    def load_models(self):
        """Load the required models"""
        try:
            # Load FinBERT for sentiment analysis
            self.sentiment_model = pipeline(
                "text-classification",
                model="ProsusAI/finbert",
                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
            )
            
            # Load small model for analysis and recommendations
            self.analysis_model = pipeline(
                "text-generation",
                model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
            )
            
            logger.info("Models loaded successfully")
        except Exception as e:
            logger.error(f"Error loading models: {str(e)}")
            raise

    def process_csv(self, file_obj):
        """Process CSV file and extract KPIs"""
        try:
            if file_obj is None:
                raise ValueError("No file provided")
            
            df = pd.read_csv(file_obj)
            
            if df.empty:
                raise ValueError("Empty CSV file")

            # Get numeric columns
            numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
            if len(numeric_cols) == 0:
                raise ValueError("No numeric columns found in CSV")

            # Calculate basic KPIs
            summary = df[numeric_cols].describe()
            
            # Extract key metrics
            metrics = {
                'total': df[numeric_cols].sum(),
                'average': df[numeric_cols].mean(),
                'growth': df[numeric_cols].pct_change().mean() * 100
            }
            
            return summary, metrics
            
        except Exception as e:
            logger.error(f"Error processing CSV: {str(e)}")
            raise

    def analyze_financials(self, income_summary, balance_summary):
        """Generate financial analysis and recommendations"""
        try:
            financial_context = f"""
            Income Statement Metrics:
            {income_summary[0].to_string()}
            
            Key Income Indicators:
            {income_summary[1]}
            
            Balance Sheet Metrics:
            {balance_summary[0].to_string()}
            
            Key Balance Sheet Indicators:
            {balance_summary[1]}
            """

            # Generate sentiment analysis
            sentiment = self.sentiment_model(financial_context)[0]
            
            # Generate business analysis
            analysis_prompt = f"""[INST] Based on the following financial data, provide:
            1. Current Business Status
            2. Key Business Insights
            3. Strategic Recommendations and Roadmap

            Financial Context:
            {financial_context}

            Sentiment: {sentiment['label']} ({sentiment['score']:.2%})

            Provide a concise but detailed analysis for each section.
            [/INST]"""

            response = self.analysis_model(
                analysis_prompt,
                max_length=1000,
                temperature=0.7,
                num_return_sequences=1
            )

            return self.format_response(response[0]['generated_text'], sentiment)

        except Exception as e:
            logger.error(f"Error in analysis: {str(e)}")
            return "Error generating analysis"

    def format_response(self, analysis_text, sentiment):
        """Format the analysis response into structured sections"""
        try:
            # Split the analysis into sections
            sections = analysis_text.split('\n\n')
            
            # Initialize output sections
            status = []
            insights = []
            recommendations = []
            
            # Process each section
            current_section = None
            for section in sections:
                if "Business Status" in section:
                    current_section = status
                elif "Key Business Insights" in section:
                    current_section = insights
                elif "Strategic Recommendations" in section:
                    current_section = recommendations
                elif current_section is not None:
                    current_section.append(section.strip())
            
            # Format the final output
            output = [
                "# Financial Analysis Report\n\n",
                f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
                "## Current Business Status\n",
                "".join(f"- {item}\n" for item in status if item),
                "\n## Key Business Insights\n",
                "".join(f"- {item}\n" for item in insights if item),
                "\n## Strategic Recommendations & Roadmap\n",
                "".join(f"- {item}\n" for item in recommendations if item)
            ]
            
            return "".join(output)
            
        except Exception as e:
            logger.error(f"Error formatting response: {str(e)}")
            return "Error formatting analysis results"

def analyze_statements(income_statement, balance_sheet):
    """Main function to analyze financial statements"""
    try:
        # Check if files are uploaded
        if income_statement is None or balance_sheet is None:
            return "Please upload both Income Statement and Balance Sheet CSV files."

        # Get file names
        income_filename = income_statement.name if hasattr(income_statement, 'name') else 'Income Statement'
        balance_filename = balance_sheet.name if hasattr(balance_sheet, 'name') else 'Balance Sheet'
        
        logger.info(f"Processing {income_filename} and {balance_filename}")

        # Initialize analyzer
        analyzer = FinancialAnalyzer()
        
        # Process statements with better error handling
        try:
            income_summary = analyzer.process_csv(income_statement)
            logger.info("Successfully processed Income Statement")
        except Exception as e:
            return f"Error processing Income Statement: {str(e)}\nPlease ensure it's a valid CSV file with numeric data."

        try:
            balance_summary = analyzer.process_csv(balance_sheet)
            logger.info("Successfully processed Balance Sheet")
        except Exception as e:
            return f"Error processing Balance Sheet: {str(e)}\nPlease ensure it's a valid CSV file with numeric data."
        
        # Generate analysis
        logger.info("Generating analysis...")
        result = analyzer.analyze_financials(income_summary, balance_summary)
        
        clear_gpu_memory()
        return result
        
    except Exception as e:
        logger.error(f"Analysis error: {str(e)}")
        return f"""Analysis Error:
        
        {str(e)}
        
        Please verify:
        1. Files are in CSV format
        2. Files contain numeric data columns
        3. Files follow standard financial statement format"""

# Create Gradio interface with improved file handling

iface = gr.Interface(
    fn=analyze_statements,
    inputs=[
        gr.File(
            label="Upload Income Statement (CSV)",
            file_types=[".csv"],
            file_count="single"
        ),
        gr.File(
            label="Upload Balance Sheet (CSV)",
            file_types=[".csv"],
            file_count="single"
        )
    ],
    outputs=gr.Markdown(),
    title="Financial Statement Analyzer",
    description="""## Financial Analysis Tool

How to use:
1. Click 'Upload Income Statement' to select your income statement CSV file
2. Click 'Upload Balance Sheet' to select your balance sheet CSV file
3. Wait for the analysis to complete

The tool will provide:
- Business Status Assessment
- Key Financial Insights
- Strategic Recommendations

Requirements:
- Files must be in CSV format
- Must contain numeric data columns
- Standard financial statement format preferred""",
    flagging_mode="never"
)

# Launch the interface with better error handling
if __name__ == "__main__":
    try:
        iface.queue()  # Enable queuing for better file handling
        iface.launch(
            share=False,
            server_name="0.0.0.0",
            server_port=7860,
            show_api=False  # Disable API tab for security
        )
    except Exception as e:
        logger.error(f"Launch error: {str(e)}")
        sys.exit(1)