Spaces:

walaa2022
/

financial-analysis-system

Sleeping

App Files Files Community

walaa2022 commited on Nov 26, 2024

Commit

91033f9

verified ·

1 Parent(s): eac8dde

Update app.py

Browse files

Files changed (1) hide show

app.py +227 -46

app.py CHANGED Viewed

@@ -3,8 +3,11 @@ import gradio as gr
 import pandas as pd
 import torch
 import logging
-from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
 import gc
 # Setup logging
 logging.basicConfig(
@@ -17,12 +20,34 @@ logger = logging.getLogger(__name__)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"Using device: {DEVICE}")
 def clear_gpu_memory():
     """Utility function to clear GPU memory"""
     if DEVICE == "cuda":
         torch.cuda.empty_cache()
     gc.collect()
 class ModelManager:
     """Handles model loading and inference"""
@@ -30,29 +55,52 @@ class ModelManager:
         self.device = DEVICE
         self.models = {}
         self.tokenizers = {}
-    def load_model(self, model_name, model_type="sentiment"):
-        """Load model and tokenizer"""
         try:
-            if model_name not in self.models:
-                if model_type == "sentiment":
-                    self.tokenizers[model_name] = AutoTokenizer.from_pretrained(model_name)
-                    self.models[model_name] = AutoModelForSequenceClassification.from_pretrained(
-                        model_name,
-                        torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
-                    ).to(self.device)
-                else:
-                    self.models[model_name] = pipeline(
-                        "text-generation",
-                        model=model_name,
-                        device_map="auto" if self.device == "cuda" else None,
-                        torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
-                    )
-                logger.info(f"Loaded model: {model_name}")
         except Exception as e:
             logger.error(f"Error loading model {model_name}: {str(e)}")
             raise
     def unload_model(self, model_name):
         """Unload model and tokenizer"""
         try:
@@ -92,29 +140,53 @@ class FinancialAnalyzer:
             raise
     def read_csv(self, file_obj):
-        """Read and validate CSV file"""
         try:
             if file_obj is None:
                 raise ValueError("No file provided")
-            df = pd.read_csv(file_obj)
             if df.empty:
                 raise ValueError("Empty CSV file")
-            return df.describe()
         except Exception as e:
             logger.error(f"Error reading CSV: {str(e)}")
             raise
     def analyze_sentiment(self, text):
-        """Analyze sentiment using FinBERT"""
         try:
             model_name = self.models["sentiment"]
             model = self.model_manager.get_model(model_name)
             tokenizer = self.model_manager.get_tokenizer(model_name)
             inputs = tokenizer(
                 text,
                 return_tensors="pt",
@@ -123,10 +195,12 @@ class FinancialAnalyzer:
                 padding=True
             ).to(DEVICE)
             with torch.no_grad():
                 outputs = model(**inputs)
                 probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
             labels = ['negative', 'neutral', 'positive']
             scores = probabilities[0].cpu().tolist()
@@ -135,79 +209,181 @@ class FinancialAnalyzer:
                 for label, score in zip(labels, scores)
             ]
             return [results]
         except Exception as e:
             logger.error(f"Sentiment analysis error: {str(e)}")
             return [{"label": "error", "score": 1.0}]
     def generate_analysis(self, financial_data):
-        """Generate strategic analysis"""
         try:
             model_name = self.models["analysis"]
             self.model_manager.load_model(model_name, "generation")
-            prompt = f"""[INST] Analyze these financial statements:
             {financial_data}
-            Provide:
             1. Business Health Assessment
             2. Key Strategic Insights
             3. Market Position
             4. Growth Opportunities
-            5. Risk Factors [/INST]"""
             response = self.model_manager.get_model(model_name)(
                 prompt,
-                max_length=1000,
                 temperature=0.7,
                 do_sample=True,
                 num_return_sequences=1,
-                truncation=True
             )
-            return response[0]['generated_text']
         except Exception as e:
             logger.error(f"Analysis generation error: {str(e)}")
             return "Error in analysis generation"
         finally:
             self.model_manager.unload_model(model_name)
     def generate_recommendations(self, analysis):
-        """Generate recommendations"""
         try:
             model_name = self.models["recommendation"]
             self.model_manager.load_model(model_name, "generation")
-            prompt = f"""Based on this analysis:
             {analysis}
-            Provide actionable recommendations for:
             1. Strategic Initiatives
             2. Operational Improvements
             3. Financial Management
             4. Risk Mitigation
-            5. Growth Strategy"""
             response = self.model_manager.get_model(model_name)(
                 prompt,
-                max_length=1000,
-                temperature=0.6,
                 do_sample=True,
                 num_return_sequences=1,
-                truncation=True
             )
-            return response[0]['generated_text']
         except Exception as e:
             logger.error(f"Recommendations generation error: {str(e)}")
             return "Error generating recommendations"
         finally:
             self.model_manager.unload_model(model_name)
 def analyze_financial_statements(income_statement, balance_sheet):
-    """Main analysis function"""
     try:
         analyzer = FinancialAnalyzer()
         # Validate inputs
@@ -240,7 +416,9 @@ def analyze_financial_statements(income_statement, balance_sheet):
         recommendations = analyzer.generate_recommendations(analysis)
         # Format results
-        return format_results(analysis, sentiment, recommendations)
     except Exception as e:
         logger.error(f"Analysis error: {str(e)}")
@@ -254,7 +432,7 @@ def analyze_financial_statements(income_statement, balance_sheet):
         3. File size is within limits"""
 def format_results(analysis, sentiment, recommendations):
-    """Format analysis results"""
     try:
         if not isinstance(analysis, str) or not isinstance(recommendations, str):
             raise ValueError("Invalid input types")
@@ -280,7 +458,7 @@ def format_results(analysis, sentiment, recommendations):
         logger.error(f"Formatting error: {str(e)}")
         return "Error formatting results"
-# Create Gradio interface
 iface = gr.Interface(
     fn=analyze_financial_statements,
     inputs=[
@@ -294,7 +472,10 @@ iface = gr.Interface(
     - Sentiment Analysis (FinBERT)
     - Strategic Recommendations (Falcon)
-    Note: Please ensure files are in CSV format.""",
     flagging_mode="never"
 )

 import pandas as pd
 import torch
 import logging
 import gc
+import signal
+from contextlib import contextmanager
+import psutil
+from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
 # Setup logging
 logging.basicConfig(
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"Using device: {DEVICE}")
+def monitor_memory():
+    """Monitor system memory usage"""
+    try:
+        process = psutil.Process(os.getpid())
+        memory_info = process.memory_info()
+        logger.info(f"Memory usage: {memory_info.rss / 1024 / 1024:.2f} MB")
+        if DEVICE == "cuda":
+            logger.info(f"GPU Memory: {torch.cuda.max_memory_allocated() / 1024 / 1024:.2f} MB")
+    except Exception as e:
+        logger.error(f"Error monitoring memory: {str(e)}")
 def clear_gpu_memory():
     """Utility function to clear GPU memory"""
     if DEVICE == "cuda":
         torch.cuda.empty_cache()
     gc.collect()
+@contextmanager
+def timeout_context(seconds):
+    def signal_handler(signum, frame):
+        raise TimeoutError(f"Operation timed out after {seconds} seconds")
+    signal.signal(signal.SIGALRM, signal_handler)
+    signal.alarm(seconds)
+    try:
+        yield
+    finally:
+        signal.alarm(0)
 class ModelManager:
     """Handles model loading and inference"""
         self.device = DEVICE
         self.models = {}
         self.tokenizers = {}
+        self.model_cache = {}
+        self.max_cache_size = 2
+    def load_model(self, model_name, model_type="sentiment", timeout=300):
+        """Load model and tokenizer with timeout"""
         try:
+            if model_name in self.model_cache:
+                self.models[model_name] = self.model_cache[model_name]
+                logger.info(f"Loaded {model_name} from cache")
+                return
+            with timeout_context(timeout):
+                if model_name not in self.models:
+                    if model_type == "sentiment":
+                        self.tokenizers[model_name] = AutoTokenizer.from_pretrained(
+                            model_name,
+                            use_fast=True
+                        )
+                        self.models[model_name] = AutoModelForSequenceClassification.from_pretrained(
+                            model_name,
+                            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
+                        ).to(self.device)
+                    else:
+                        self.models[model_name] = pipeline(
+                            "text-generation",
+                            model=model_name,
+                            device_map="auto" if self.device == "cuda" else None,
+                            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
+                        )
+                    # Cache the model
+                    self.cache_model(model_name, self.models[model_name])
+                    logger.info(f"Successfully loaded model: {model_name}")
+                    monitor_memory()
         except Exception as e:
             logger.error(f"Error loading model {model_name}: {str(e)}")
             raise
+    def cache_model(self, model_name, model):
+        """Cache model for faster reloading"""
+        if len(self.model_cache) >= self.max_cache_size:
+            oldest_model = next(iter(self.model_cache))
+            del self.model_cache[oldest_model]
+        self.model_cache[model_name] = model
     def unload_model(self, model_name):
         """Unload model and tokenizer"""
         try:
             raise
     def read_csv(self, file_obj):
+        """Read and validate CSV file with better error handling"""
         try:
             if file_obj is None:
                 raise ValueError("No file provided")
+            # Read CSV with explicit encoding and error handling
+            df = pd.read_csv(file_obj, encoding='utf-8', on_bad_lines='skip')
             if df.empty:
                 raise ValueError("Empty CSV file")
+            # Log CSV information
+            logger.info(f"CSV Preview:\n{df.head()}")
+            logger.info(f"CSV Columns: {df.columns.tolist()}")
+            # Validate numeric columns
+            numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
+            if len(numeric_cols) == 0:
+                raise ValueError("No numeric columns found in CSV")
+            # Generate statistical summary
+            summary = df[numeric_cols].describe()
+            logger.info(f"Statistical Summary:\n{summary}")
+            return summary
         except Exception as e:
             logger.error(f"Error reading CSV: {str(e)}")
             raise
     def analyze_sentiment(self, text):
+        """Analyze sentiment using FinBERT with improved error handling"""
         try:
             model_name = self.models["sentiment"]
             model = self.model_manager.get_model(model_name)
             tokenizer = self.model_manager.get_tokenizer(model_name)
+            # Validate input
+            if not text or not isinstance(text, str):
+                raise ValueError("Invalid input text")
+            # Preprocess text
+            text = text.strip()
+            if len(text) == 0:
+                raise ValueError("Empty text input")
+            # Tokenize with proper padding and truncation
             inputs = tokenizer(
                 text,
                 return_tensors="pt",
                 padding=True
             ).to(DEVICE)
+            # Get prediction
             with torch.no_grad():
                 outputs = model(**inputs)
                 probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
+            # Process results
             labels = ['negative', 'neutral', 'positive']
             scores = probabilities[0].cpu().tolist()
                 for label, score in zip(labels, scores)
             ]
+            logger.info(f"Sentiment analysis results: {results}")
             return [results]
         except Exception as e:
             logger.error(f"Sentiment analysis error: {str(e)}")
             return [{"label": "error", "score": 1.0}]
     def generate_analysis(self, financial_data):
+        """Generate strategic analysis with improved prompting"""
         try:
             model_name = self.models["analysis"]
             self.model_manager.load_model(model_name, "generation")
+            prompt = f"""[INST] As a senior financial analyst, provide a detailed analysis of these financial statements:
+            Financial Data:
             {financial_data}
+            Please provide a comprehensive analysis covering:
             1. Business Health Assessment
+            - Current financial position
+            - Key performance indicators
+            - Trend analysis
             2. Key Strategic Insights
+            - Major financial trends
+            - Performance drivers
+            - Areas of concern
             3. Market Position
+            - Competitive advantages
+            - Market share indicators
+            - Industry comparison
             4. Growth Opportunities
+            - Expansion potential
+            - Investment opportunities
+            - Revenue growth areas
+            5. Risk Factors
+            - Financial risks
+            - Operational risks
+            - Market risks
+            Provide specific metrics and detailed explanations for each section. [/INST]"""
             response = self.model_manager.get_model(model_name)(
                 prompt,
+                max_length=2000,
+                min_length=800,
                 temperature=0.7,
                 do_sample=True,
                 num_return_sequences=1,
+                truncation=True,
+                repetition_penalty=1.2,
+                no_repeat_ngram_size=3
             )
+            analysis_text = response[0]['generated_text']
+            return self.format_analysis_text(analysis_text)
         except Exception as e:
             logger.error(f"Analysis generation error: {str(e)}")
             return "Error in analysis generation"
         finally:
             self.model_manager.unload_model(model_name)
+    def format_analysis_text(self, text):
+        """Format the analysis text for better readability"""
+        try:
+            sections = text.split('\n\n')
+            formatted_sections = []
+            for section in sections:
+                if section.strip():
+                    if any(section.startswith(str(i)) for i in range(1, 6)):
+                        formatted_sections.append(f"### {section}")
+                    else:
+                        formatted_sections.append(section)
+            return '\n\n'.join(formatted_sections)
+        except Exception as e:
+            logger.error(f"Error formatting analysis text: {str(e)}")
+            return text
     def generate_recommendations(self, analysis):
+        """Generate recommendations with comprehensive prompting"""
         try:
             model_name = self.models["recommendation"]
             self.model_manager.load_model(model_name, "generation")
+            prompt = f"""Based on this financial analysis, provide detailed strategic recommendations:
+            Analysis Context:
             {analysis}
+            Please provide specific, actionable recommendations for each area:
             1. Strategic Initiatives
+            - Detail specific actions for business growth
+            - Identify market expansion opportunities
+            - Outline product/service development strategies
             2. Operational Improvements
+            - Specify efficiency enhancement measures
+            - Recommend process optimization steps
+            - Suggest cost reduction strategies
             3. Financial Management
+            - Provide cash flow optimization tactics
+            - Prioritize investment opportunities
+            - Detail risk management approaches
             4. Risk Mitigation
+            - Address identified risks
+            - Outline specific mitigation strategies
+            - Suggest monitoring mechanisms
+            5. Growth Strategy
+            - Identify market opportunities
+            - Detail expansion plans
+            - Specify resource requirements
+            For each recommendation:
+            - Include implementation timeline
+            - Specify resource requirements
+            - Define success metrics
+            - List potential challenges
+            Format each section with clear, actionable bullet points."""
             response = self.model_manager.get_model(model_name)(
                 prompt,
+                max_length=2000,
+                min_length=800,
+                temperature=0.7,
                 do_sample=True,
                 num_return_sequences=1,
+                truncation=True,
+                repetition_penalty=1.2,
+                no_repeat_ngram_size=3
             )
+            recommendations_text = response[0]['generated_text']
+            return self.format_recommendation_text(recommendations_text)
         except Exception as e:
             logger.error(f"Recommendations generation error: {str(e)}")
             return "Error generating recommendations"
         finally:
             self.model_manager.unload_model(model_name)
+    def format_recommendation_text(self, text):
+        """Format the recommendation text for better readability"""
+        try:
+            sections = text.split('\n\n')
+            formatted_sections = []
+            for section in sections:
+                if section.strip():
+                    if any(section.startswith(str(i)) for i in range(1, 6)):
+                        formatted_sections.append(f"### {section}")
+                    else:
+                        formatted_sections.append(section)
+            return '\n\n'.join(formatted_sections)
+        except Exception as e:
+            logger.error(f"Error formatting recommendation text: {str(e)}")
+            return text
 def analyze_financial_statements(income_statement, balance_sheet):
+    """Main analysis function with improved error handling and logging"""
     try:
+        monitor_memory()
         analyzer = FinancialAnalyzer()
         # Validate inputs
         recommendations = analyzer.generate_recommendations(analysis)
         # Format results
+        result = format_results(analysis, sentiment, recommendations)
+        monitor_memory()
+        return result
     except Exception as e:
         logger.error(f"Analysis error: {str(e)}")
         3. File size is within limits"""
 def format_results(analysis, sentiment, recommendations):
+    """Format analysis results with improved validation and formatting"""
     try:
         if not isinstance(analysis, str) or not isinstance(recommendations, str):
             raise ValueError("Invalid input types")
         logger.error(f"Formatting error: {str(e)}")
         return "Error formatting results"
+# Create Gradio interface with improved error handling
 iface = gr.Interface(
     fn=analyze_financial_statements,
     inputs=[
     - Sentiment Analysis (FinBERT)
     - Strategic Recommendations (Falcon)
+    Note:
+    - Files must be in CSV format
+    - Each file should contain financial data in columns
+    - Maximum file size: 10MB""",
     flagging_mode="never"
 )