Spaces:

walaa2022
/

financial-analysis-system

Sleeping

App Files Files Community

walaa2022 commited on Nov 25, 2024

Commit

eac8dde

verified ·

1 Parent(s): 2ac4fcc

Update app.py

Browse files

Files changed (1) hide show

app.py +229 -137

app.py CHANGED Viewed

@@ -1,193 +1,284 @@
 import os
 import gradio as gr
 import pandas as pd
-from transformers import pipeline
 import torch
-import sys
 import logging
-import io
-from huggingface_hub import login
-from dotenv import load_dotenv
-# Load environment variables
-load_dotenv()
-import logging
-# Set up logging
-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Get token securely from environment variable
-hf_token = os.getenv('HUGGINGFACE_TOKEN')
-# Check if the token is available
-if hf_token:
-    # Log in to Hugging Face Hub
-    login(token=hf_token)
-    print("Successfully logged in to Hugging Face Hub.")
-else:
-    print("HF_TOKEN environment variable not found. Please set it in the Space settings.")
 class FinancialAnalyzer:
     def __init__(self):
-        """Initialize models with error handling"""
         try:
-            # 1. Llama 2 for strategic analysis
-            self.strategic_analyzer = pipeline(
-                "text-generation",
-                model="meta-llama/Llama-3.2-1B",
-                device_map="auto"
-            )
-            logger.info("Llama 3 initialized successfully")
-            # 2. FinBERT for financial sentiment
-            self.financial_analyzer = pipeline(
-                "text-classification",
-                model="ProsusAI/finbert",
-                top_k= None
-            )
-            logger.info("FinBERT initialized successfully")
-            # 3. Falcon for recommendations
-            self.recommendation_generator = pipeline(
-                "text-generation",
-                model="tiiuae/falcon-7b-instruct",
-                device_map="auto"
-            )
-            logger.info("Falcon initialized successfully")
         except Exception as e:
-            logger.error(f"Error initializing models: {str(e)}")
             raise
-    def read_csv_file(self, file_obj):
-        """Safely read CSV file"""
         try:
             if file_obj is None:
                 raise ValueError("No file provided")
-            return pd.read_csv(file_obj)
         except Exception as e:
-            logger.error(f"Error reading CSV file: {str(e)}")
             raise
-    def generate_strategic_analysis(self, financial_data):
-        """Generate strategic analysis using Llama 3"""
         try:
-            prompt = f"""[INST] As a senior financial analyst, analyze these financial statements:
-            Financial Data:
             {financial_data}
             Provide:
             1. Business Health Assessment
             2. Key Strategic Insights
-            3. Market Position Analysis
             4. Growth Opportunities
             5. Risk Factors [/INST]"""
-            response = self.strategic_analyzer(
                 prompt,
-                max_length=1500,
-                temperature=0.7
             )
             return response[0]['generated_text']
         except Exception as e:
-            logger.error(f"Error in strategic analysis: {str(e)}")
-            return "Error generating strategic analysis"
-    def analyze_sentiment(self, text):
-        """Analyze financial sentiment using FinBERT"""
-        try:
-            return self.financial_analyzer(text)
-        except Exception as e:
-            logger.error(f"Error in sentiment analysis: {str(e)}")
-            return [{"label": "error", "score": 1.0}]
     def generate_recommendations(self, analysis):
-        """Generate recommendations using Falcon"""
         try:
-            prompt = f"""Based on this financial analysis:
             {analysis}
-            Provide specific, actionable recommendations covering:
             1. Strategic Initiatives
             2. Operational Improvements
             3. Financial Management
             4. Risk Mitigation
             5. Growth Strategy"""
-            response = self.recommendation_generator(
                 prompt,
-                max_length=1500,
-                temperature=0.6
             )
             return response[0]['generated_text']
         except Exception as e:
-            logger.error(f"Error generating recommendations: {str(e)}")
             return "Error generating recommendations"
 def analyze_financial_statements(income_statement, balance_sheet):
-    """Main analysis function with error handling"""
     try:
-        # Initialize analyzer
         analyzer = FinancialAnalyzer()
-        # Read CSV files safely
-        logger.info("Reading input files...")
-        income_df = analyzer.read_csv_file(income_statement)
-        balance_df = analyzer.read_csv_file(balance_sheet)
-        # Prepare financial data
         financial_data = f"""
         Income Statement Summary:
-        {income_df.to_string()}
         Balance Sheet Summary:
-        {balance_df.to_string()}
         """
-        # Generate analyses
         logger.info("Generating analysis...")
-        strategic_analysis = analyzer.generate_strategic_analysis(financial_data)
-        sentiment = analyzer.analyze_sentiment(strategic_analysis)
-        recommendations = analyzer.generate_recommendations(strategic_analysis)
-        # Format output
-        logger.info("Formatting results...")
-        return format_results(strategic_analysis, sentiment, recommendations)
     except Exception as e:
-        logger.error(f"Error in analysis: {str(e)}")
-        return f"""Error analyzing files: {str(e)}
-Please check:
-Files are in correct CSV format & contain the expected data
-If the problem persists, try uploading the files again."""
 def format_results(analysis, sentiment, recommendations):
     """Format analysis results"""
     try:
-        output = "# Financial Analysis Report\n\n"
-        # Strategic Analysis
-        output += "## Strategic Analysis\n\n"
-        output += analysis + "\n\n"
-        # Sentiment Analysis
-        output += "## Market Sentiment\n\n"
-        for score in sentiment[0]:
-            output += f"- {score['label']}: {score['score']:.2%}\n"
-        output += "\n"
-        # Recommendations
-        output += "## Strategic Recommendations\n\n"
-        output += recommendations
-        return output
     except Exception as e:
-        logger.error(f"Error formatting results: {str(e)}")
-        return "Error formatting analysis results"
 # Create Gradio interface
 iface = gr.Interface(
@@ -197,23 +288,24 @@ iface = gr.Interface(
         gr.File(label="Balance Sheet (CSV)")
     ],
     outputs=gr.Markdown(),
-    title="AI-Powered Financial Statement Analysis",
-    description="""Upload your financial statements for comprehensive analysis using:
-    - Llama 3: Strategic Analysis
-    - FinBERT: Financial Sentiment Analysis
-    - Falcon: Strategic Recommendations""",
-    examples=[
-        [
-            "OFINTECH-Income Statement-template.csv",
-            "OFINTECH Balance Sheet template.csv"
-        ]
-    ]
 )
-# Launch the interface
 if __name__ == "__main__":
     try:
-        iface.launch()
     except Exception as e:
-        logger.error(f"Error launching application: {str(e)}")
         sys.exit(1)

 import os
 import gradio as gr
 import pandas as pd
 import torch
 import logging
+from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
+import gc
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
 logger = logging.getLogger(__name__)
+# Device configuration
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+logger.info(f"Using device: {DEVICE}")
+def clear_gpu_memory():
+    """Utility function to clear GPU memory"""
+    if DEVICE == "cuda":
+        torch.cuda.empty_cache()
+    gc.collect()
+class ModelManager:
+    """Handles model loading and inference"""
+    def __init__(self):
+        self.device = DEVICE
+        self.models = {}
+        self.tokenizers = {}
+    def load_model(self, model_name, model_type="sentiment"):
+        """Load model and tokenizer"""
+        try:
+            if model_name not in self.models:
+                if model_type == "sentiment":
+                    self.tokenizers[model_name] = AutoTokenizer.from_pretrained(model_name)
+                    self.models[model_name] = AutoModelForSequenceClassification.from_pretrained(
+                        model_name,
+                        torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
+                    ).to(self.device)
+                else:
+                    self.models[model_name] = pipeline(
+                        "text-generation",
+                        model=model_name,
+                        device_map="auto" if self.device == "cuda" else None,
+                        torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
+                    )
+                logger.info(f"Loaded model: {model_name}")
+        except Exception as e:
+            logger.error(f"Error loading model {model_name}: {str(e)}")
+            raise
+    def unload_model(self, model_name):
+        """Unload model and tokenizer"""
+        try:
+            if model_name in self.models:
+                del self.models[model_name]
+            if model_name in self.tokenizers:
+                del self.tokenizers[model_name]
+            clear_gpu_memory()
+            logger.info(f"Unloaded model: {model_name}")
+        except Exception as e:
+            logger.error(f"Error unloading model {model_name}: {str(e)}")
+    def get_model(self, model_name):
+        """Get loaded model"""
+        return self.models.get(model_name)
+    def get_tokenizer(self, model_name):
+        """Get loaded tokenizer"""
+        return self.tokenizers.get(model_name)
 class FinancialAnalyzer:
+    """Main analyzer class for financial statements"""
     def __init__(self):
+        self.model_manager = ModelManager()
+        self.models = {
+            "sentiment": "ProsusAI/finbert",
+            "analysis": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+            "recommendation": "tiiuae/falcon-rw-1b"
+        }
+        # Load sentiment model at initialization
         try:
+            self.model_manager.load_model(self.models["sentiment"], "sentiment")
         except Exception as e:
+            logger.error(f"Failed to initialize sentiment model: {str(e)}")
             raise
+    def read_csv(self, file_obj):
+        """Read and validate CSV file"""
         try:
             if file_obj is None:
                 raise ValueError("No file provided")
+            df = pd.read_csv(file_obj)
+            if df.empty:
+                raise ValueError("Empty CSV file")
+            return df.describe()
         except Exception as e:
+            logger.error(f"Error reading CSV: {str(e)}")
             raise
+    def analyze_sentiment(self, text):
+        """Analyze sentiment using FinBERT"""
         try:
+            model_name = self.models["sentiment"]
+            model = self.model_manager.get_model(model_name)
+            tokenizer = self.model_manager.get_tokenizer(model_name)
+            inputs = tokenizer(
+                text,
+                return_tensors="pt",
+                truncation=True,
+                max_length=512,
+                padding=True
+            ).to(DEVICE)
+            with torch.no_grad():
+                outputs = model(**inputs)
+                probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
+            labels = ['negative', 'neutral', 'positive']
+            scores = probabilities[0].cpu().tolist()
+            results = [
+                {'label': label, 'score': score}
+                for label, score in zip(labels, scores)
+            ]
+            return [results]
+        except Exception as e:
+            logger.error(f"Sentiment analysis error: {str(e)}")
+            return [{"label": "error", "score": 1.0}]
+    def generate_analysis(self, financial_data):
+        """Generate strategic analysis"""
+        try:
+            model_name = self.models["analysis"]
+            self.model_manager.load_model(model_name, "generation")
+            prompt = f"""[INST] Analyze these financial statements:
             {financial_data}
             Provide:
             1. Business Health Assessment
             2. Key Strategic Insights
+            3. Market Position
             4. Growth Opportunities
             5. Risk Factors [/INST]"""
+            response = self.model_manager.get_model(model_name)(
                 prompt,
+                max_length=1000,
+                temperature=0.7,
+                do_sample=True,
+                num_return_sequences=1,
+                truncation=True
             )
             return response[0]['generated_text']
         except Exception as e:
+            logger.error(f"Analysis generation error: {str(e)}")
+            return "Error in analysis generation"
+        finally:
+            self.model_manager.unload_model(model_name)
     def generate_recommendations(self, analysis):
+        """Generate recommendations"""
         try:
+            model_name = self.models["recommendation"]
+            self.model_manager.load_model(model_name, "generation")
+            prompt = f"""Based on this analysis:
             {analysis}
+            Provide actionable recommendations for:
             1. Strategic Initiatives
             2. Operational Improvements
             3. Financial Management
             4. Risk Mitigation
             5. Growth Strategy"""
+            response = self.model_manager.get_model(model_name)(
                 prompt,
+                max_length=1000,
+                temperature=0.6,
+                do_sample=True,
+                num_return_sequences=1,
+                truncation=True
             )
             return response[0]['generated_text']
         except Exception as e:
+            logger.error(f"Recommendations generation error: {str(e)}")
             return "Error generating recommendations"
+        finally:
+            self.model_manager.unload_model(model_name)
 def analyze_financial_statements(income_statement, balance_sheet):
+    """Main analysis function"""
     try:
         analyzer = FinancialAnalyzer()
+        # Validate inputs
+        if not income_statement or not balance_sheet:
+            return "Error: Please provide both income statement and balance sheet files"
+        # Process financial statements
+        logger.info("Processing financial statements...")
+        income_summary = analyzer.read_csv(income_statement)
+        balance_summary = analyzer.read_csv(balance_sheet)
         financial_data = f"""
         Income Statement Summary:
+        {income_summary.to_string()}
         Balance Sheet Summary:
+        {balance_summary.to_string()}
         """
+        # Generate analysis
         logger.info("Generating analysis...")
+        analysis = analyzer.generate_analysis(financial_data)
+        # Analyze sentiment
+        logger.info("Analyzing sentiment...")
+        sentiment = analyzer.analyze_sentiment(analysis)
+        # Generate recommendations
+        logger.info("Generating recommendations...")
+        recommendations = analyzer.generate_recommendations(analysis)
+        # Format results
+        return format_results(analysis, sentiment, recommendations)
     except Exception as e:
+        logger.error(f"Analysis error: {str(e)}")
+        return f"""Analysis Error:
+        {str(e)}
+        Please verify:
+        1. Files are valid CSV format
+        2. Files contain required financial data
+        3. File size is within limits"""
 def format_results(analysis, sentiment, recommendations):
     """Format analysis results"""
     try:
+        if not isinstance(analysis, str) or not isinstance(recommendations, str):
+            raise ValueError("Invalid input types")
+        output = [
+            "# Financial Analysis Report\n\n",
+            "## Strategic Analysis\n\n",
+            f"{analysis.strip()}\n\n",
+            "## Market Sentiment\n\n"
+        ]
+        if isinstance(sentiment, list) and sentiment:
+            for score in sentiment[0]:
+                if isinstance(score, dict) and 'label' in score and 'score' in score:
+                    output.append(f"- {score['label']}: {score['score']:.2%}\n")
+        output.append("\n")
+        output.append("## Strategic Recommendations\n\n")
+        output.append(f"{recommendations.strip()}")
+        return "".join(output)
     except Exception as e:
+        logger.error(f"Formatting error: {str(e)}")
+        return "Error formatting results"
 # Create Gradio interface
 iface = gr.Interface(
         gr.File(label="Balance Sheet (CSV)")
     ],
     outputs=gr.Markdown(),
+    title="Financial Statement Analyzer",
+    description="""Upload financial statements for AI-powered analysis:
+    - Strategic Analysis (TinyLlama)
+    - Sentiment Analysis (FinBERT)
+    - Strategic Recommendations (Falcon)
+    Note: Please ensure files are in CSV format.""",
+    flagging_mode="never"
 )
 if __name__ == "__main__":
     try:
+        iface.queue()
+        iface.launch(
+            share=False,
+            server_name="0.0.0.0",
+            server_port=7860
+        )
     except Exception as e:
+        logger.error(f"Launch error: {str(e)}")
         sys.exit(1)