Spaces:

walaa2022
/

financial-analysis-system

Sleeping

File size: 6,629 Bytes

30c4182
a1ef945
 
 
35acd3c
c7a28ea
ceb9625
a88d6ed
eac8dde
 
 
 
6e9bd28
35acd3c
eac8dde
 
ceb9625
0ff54a0
 
ca1d38d
30c4182
ca1d38d
 
 
35acd3c
17c0709
30c4182
 
98d6352
30c4182
 
 
17c0709
ca1d38d
 
98d6352
30c4182
ca1d38d
 
a88d6ed
35acd3c
ca1d38d
35acd3c
 
17c0709
 
9e42b37
17c0709
9e42b37
17c0709
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e42b37
17c0709
9e42b37
17c0709
9e42b37
 
17c0709
98d6352
17c0709
 
 
9e42b37
17c0709
 
9e42b37
17c0709
 
 
 
 
 
 
 
98d6352
17c0709
9e42b37
98d6352
a88d6ed
17c0709
 
a772146
17c0709
a772146
9e42b37
98d6352
17c0709
 
 
 
9e42b37
bc8be1d
17c0709
 
9e42b37
 
30c4182
98d6352
30c4182
f95f954
17c0709
9e42b37
 
 
 
 
17c0709
98d6352
9e42b37
98d6352
17c0709
 
 
 
 
98d6352
a88d6ed
9e42b37
 
 
98d6352
a772146
17c0709
 
9e42b37
17c0709
 
 
98d6352
9e42b37
98d6352
 
9e42b37
a88d6ed
ca1d38d
a1ef945
195e9d5
9e42b37
a88d6ed
17c0709
 
9e42b37
 
 
17c0709
 
 
 
 
9e42b37
17c0709
9e42b37
17c0709
 
 
9e42b37
 
a1ef945
9e42b37
195e9d5
eac8dde
9e42b37
17c0709
 
9e42b37
f4bbd39
f95f954
a1ef945
ca1d38d
98d6352
17c0709
 
98d6352
4682c30
17c0709
 
 
9e42b37
17c0709
a1ef945
 
17c0709
4682c30
9e42b37

import os
import gradio as gr
import pandas as pd
import torch
import logging
from transformers import pipeline

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"Using device: {DEVICE}")

class FinancialAnalyzer:
    def __init__(self):
        self.analysis_model = None
        self.sentiment_model = None
        self.load_models()

    def load_models(self):
        try:
            logger.info("Loading TinyLlama model...")
            self.analysis_model = pipeline(
                "text-generation",
                model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
            )
            
            logger.info("Loading FinBERT model...")
            self.sentiment_model = pipeline(
                "text-classification",
                model="ProsusAI/finbert",
                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
            )
            
            logger.info("Models loaded successfully")
        except Exception as e:
            logger.error(f"Error loading models: {str(e)}")
            raise

    def extract_and_analyze(self, statement_text, statement_type):
        """Extract information from financial statement text"""
        try:
            prompt = f"""[INST] As a financial analyst, analyze this {statement_type}:

{statement_text}

Extract and summarize:
1. Key financial numbers for 2025
2. Notable trends
3. Important metrics

Focus on the most recent year (2025) and key financial indicators.
[/INST]"""

            response = self.analysis_model(
                prompt,
                max_new_tokens=300,
                temperature=0.3,
                num_return_sequences=1,
                truncation=True
            )
            
            return response[0]['generated_text']
        except Exception as e:
            logger.error(f"Error extracting data from {statement_type}: {str(e)}")
            raise

    def analyze_financials(self, income_text, balance_text):
        try:
            # First, extract key information from each statement
            logger.info("Analyzing Income Statement...")
            income_analysis = self.extract_and_analyze(income_text, "Income Statement")
            
            logger.info("Analyzing Balance Sheet...")
            balance_analysis = self.extract_and_analyze(balance_text, "Balance Sheet")
            
            # Combine the analyses
            combined_analysis = f"""Income Statement Analysis:
{income_analysis}

Balance Sheet Analysis:
{balance_analysis}"""

            # Get sentiment
            sentiment = self.sentiment_model(
                combined_analysis[:512],
                truncation=True
            )[0]
            
            # Generate final analysis
            final_prompt = f"""[INST] Based on this financial analysis:

{combined_analysis}

Market Sentiment: {sentiment['label']} ({sentiment['score']:.2%})

Provide a concise analysis with:
1. Overall Financial Health (2-3 key points)
2. Main Business Insights (2-3 insights)
3. Key Recommendations (2-3 recommendations)
[/INST]"""

            final_response = self.analysis_model(
                final_prompt,
                max_new_tokens=500,
                temperature=0.7,
                num_return_sequences=1,
                truncation=True
            )

            return self.format_response(final_response[0]['generated_text'], sentiment, combined_analysis)
            
        except Exception as e:
            logger.error(f"Analysis error: {str(e)}")
            return f"Error in analysis: {str(e)}"

    def format_response(self, analysis_text, sentiment, raw_analysis):
        try:
            sections = [
                "# Financial Analysis Report\n\n",
                f"## Market Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
                "## Extracted Financial Data\n```\n",
                raw_analysis,
                "\n```\n\n",
                "## Analysis\n\n"
            ]
            
            for line in analysis_text.split('\n'):
                line = line.strip()
                if not line:
                    continue
                
                if any(header in line for header in ["Financial Health", "Business Insights", "Recommendations"]):
                    sections.append(f"\n### {line}\n")
                elif line:
                    if not line.startswith('-'):
                        line = f"- {line}"
                    sections.append(f"{line}\n")
            
            return "".join(sections)
        except Exception as e:
            logger.error(f"Error formatting response: {str(e)}")
            return "Error formatting analysis"

def analyze_statements(income_statement, balance_sheet):
    try:
        if not income_statement or not balance_sheet:
            return "Please upload both financial statements."

        logger.info("Reading financial statements...")
        # Read files as text
        income_df = pd.read_csv(income_statement)
        balance_df = pd.read_csv(balance_sheet)
        
        # Convert to string while preserving format
        income_text = income_df.to_string(index=False)
        balance_text = balance_df.to_string(index=False)
        
        logger.info("Initializing analysis...")
        analyzer = FinancialAnalyzer()
        result = analyzer.analyze_financials(income_text, balance_text)
        
        if DEVICE == "cuda":
            torch.cuda.empty_cache()
            
        return result
        
    except Exception as e:
        logger.error(f"Error: {str(e)}")
        return f"""Analysis Error: {str(e)}
        
Please check:
1. Files are readable CSV files
2. Files contain financial data
3. Files are not corrupted"""

# Create Gradio interface
iface = gr.Interface(
    fn=analyze_statements,
    inputs=[
        gr.File(label="Income Statement (CSV)", file_types=[".csv"]),
        gr.File(label="Balance Sheet (CSV)", file_types=[".csv"])
    ],
    outputs=gr.Markdown(),
    title="AI Financial Statement Analyzer",
    description="""Upload your financial statements for AI analysis.
The model will extract and analyze key financial information automatically.""",
    theme="default",
    flagging_mode="never"
)

# Launch
if __name__ == "__main__":
    iface.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )