Spaces:

walaa2022
/

financial-analysis-system

Sleeping

App Files Files Community

walaa2022 commited on Nov 26, 2024

Commit

195e9d5

verified ·

1 Parent(s): f4bbd39

Update app.py

Browse files

Files changed (1) hide show

app.py +134 -73

app.py CHANGED Viewed

@@ -23,11 +23,32 @@ def clear_gpu_memory():
         torch.cuda.empty_cache()
     gc.collect()
 class FinancialAnalyzer:
     """Simplified Financial Analyzer using small models"""
     def __init__(self):
-        # Initialize with two small models
         self.sentiment_model = None
         self.analysis_model = None
         self.load_models()
@@ -39,7 +60,8 @@ class FinancialAnalyzer:
             self.sentiment_model = pipeline(
                 "text-classification",
                 model="ProsusAI/finbert",
-                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
             # Load small model for analysis and recommendations
@@ -60,27 +82,41 @@ class FinancialAnalyzer:
             if file_obj is None:
                 raise ValueError("No file provided")
-            df = pd.read_csv(file_obj)
             if df.empty:
                 raise ValueError("Empty CSV file")
             # Get numeric columns
             numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
             if len(numeric_cols) == 0:
                 raise ValueError("No numeric columns found in CSV")
-            # Calculate basic KPIs
-            summary = df[numeric_cols].describe()
-            # Extract key metrics
-            metrics = {
-                'total': df[numeric_cols].sum(),
-                'average': df[numeric_cols].mean(),
-                'growth': df[numeric_cols].pct_change().mean() * 100
             }
-            return summary, metrics
         except Exception as e:
             logger.error(f"Error processing CSV: {str(e)}")
@@ -89,49 +125,87 @@ class FinancialAnalyzer:
     def analyze_financials(self, income_summary, balance_summary):
         """Generate financial analysis and recommendations"""
         try:
             financial_context = f"""
-            Income Statement Metrics:
             {income_summary[0].to_string()}
-            Key Income Indicators:
-            {income_summary[1]}
-            Balance Sheet Metrics:
             {balance_summary[0].to_string()}
-            Key Balance Sheet Indicators:
-            {balance_summary[1]}
             """
             # Generate sentiment analysis
-            sentiment = self.sentiment_model(financial_context)[0]
             # Generate business analysis
-            analysis_prompt = f"""[INST] Based on the following financial data, provide:
-            1. Current Business Status
-            2. Key Business Insights
-            3. Strategic Recommendations and Roadmap
-            Financial Context:
             {financial_context}
             Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
-            Provide a concise but detailed analysis for each section.
             [/INST]"""
             response = self.analysis_model(
                 analysis_prompt,
-                max_length=1000,
-                temperature=0.7,
-                num_return_sequences=1
             )
             return self.format_response(response[0]['generated_text'], sentiment)
         except Exception as e:
             logger.error(f"Error in analysis: {str(e)}")
-            return "Error generating analysis"
     def format_response(self, analysis_text, sentiment):
         """Format the analysis response into structured sections"""
@@ -149,7 +223,7 @@ class FinancialAnalyzer:
             for section in sections:
                 if "Business Status" in section:
                     current_section = status
-                elif "Key Business Insights" in section:
                     current_section = insights
                 elif "Strategic Recommendations" in section:
                     current_section = recommendations
@@ -160,7 +234,7 @@ class FinancialAnalyzer:
             output = [
                 "# Financial Analysis Report\n\n",
                 f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
-                "## Current Business Status\n",
                 "".join(f"- {item}\n" for item in status if item),
                 "\n## Key Business Insights\n",
                 "".join(f"- {item}\n" for item in insights if item),
@@ -177,52 +251,38 @@ class FinancialAnalyzer:
 def analyze_statements(income_statement, balance_sheet):
     """Main function to analyze financial statements"""
     try:
-        # Check if files are uploaded
-        if income_statement is None or balance_sheet is None:
             return "Please upload both Income Statement and Balance Sheet CSV files."
-        # Get file names
-        income_filename = income_statement.name if hasattr(income_statement, 'name') else 'Income Statement'
-        balance_filename = balance_sheet.name if hasattr(balance_sheet, 'name') else 'Balance Sheet'
-        logger.info(f"Processing {income_filename} and {balance_filename}")
-        # Initialize analyzer
         analyzer = FinancialAnalyzer()
-        # Process statements with better error handling
-        try:
-            income_summary = analyzer.process_csv(income_statement)
-            logger.info("Successfully processed Income Statement")
-        except Exception as e:
-            return f"Error processing Income Statement: {str(e)}\nPlease ensure it's a valid CSV file with numeric data."
-        try:
-            balance_summary = analyzer.process_csv(balance_sheet)
-            logger.info("Successfully processed Balance Sheet")
-        except Exception as e:
-            return f"Error processing Balance Sheet: {str(e)}\nPlease ensure it's a valid CSV file with numeric data."
-        # Generate analysis
-        logger.info("Generating analysis...")
         result = analyzer.analyze_financials(income_summary, balance_summary)
         clear_gpu_memory()
         return result
     except Exception as e:
         logger.error(f"Analysis error: {str(e)}")
-        return f"""Analysis Error:
-        {str(e)}
-        Please verify:
-        1. Files are in CSV format
-        2. Files contain numeric data columns
-        3. Files follow standard financial statement format"""
-# Create Gradio interface with improved file handling
 iface = gr.Interface(
     fn=analyze_statements,
     inputs=[
@@ -242,8 +302,10 @@ iface = gr.Interface(
     description="""## Financial Analysis Tool
 How to use:
-1. Click 'Upload Income Statement' to select your income statement CSV file
-2. Click 'Upload Balance Sheet' to select your balance sheet CSV file
 3. Wait for the analysis to complete
 The tool will provide:
@@ -253,20 +315,19 @@ The tool will provide:
 Requirements:
 - Files must be in CSV format
-- Must contain numeric data columns
-- Standard financial statement format preferred""",
     flagging_mode="never"
 )
-# Launch the interface with better error handling
 if __name__ == "__main__":
     try:
-        iface.queue()  # Enable queuing for better file handling
         iface.launch(
             share=False,
             server_name="0.0.0.0",
-            server_port=7860,
-            show_api=False  # Disable API tab for security
         )
     except Exception as e:
         logger.error(f"Launch error: {str(e)}")

         torch.cuda.empty_cache()
     gc.collect()
+def validate_financial_csv(file_obj, file_type):
+    """Validate financial CSV files"""
+    try:
+        df = pd.read_csv(file_obj)
+        # Expected columns based on file type
+        expected_columns = {
+            'income_statement': ['Revenue', 'Expenses', 'Profit'],
+            'balance_sheet': ['Assets', 'Liabilities', 'Equity']
+        }
+        # Check for minimum required columns
+        found_columns = set(df.columns)
+        required_columns = set(expected_columns.get(file_type, []))
+        if not any(col in found_columns for col in required_columns):
+            return False, f"Missing required columns. Expected at least one of: {required_columns}"
+        return True, "Valid CSV file"
+    except Exception as e:
+        return False, f"Invalid CSV file: {str(e)}"
 class FinancialAnalyzer:
     """Simplified Financial Analyzer using small models"""
     def __init__(self):
         self.sentiment_model = None
         self.analysis_model = None
         self.load_models()
             self.sentiment_model = pipeline(
                 "text-classification",
                 model="ProsusAI/finbert",
+                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
+                truncation=True
             )
             # Load small model for analysis and recommendations
             if file_obj is None:
                 raise ValueError("No file provided")
+            # Read CSV with better column handling
+            df = pd.read_csv(file_obj, skipinitialspace=True)
             if df.empty:
                 raise ValueError("Empty CSV file")
+            # Clean column names
+            df.columns = df.columns.str.strip()
+            # Log the columns found
+            logger.info(f"Found columns: {df.columns.tolist()}")
+            # Remove any unnamed columns
+            df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
+            # Convert columns to numeric where possible
+            for col in df.columns:
+                df[col] = pd.to_numeric(df[col].str.replace('[$,()]', '', regex=True), errors='ignore')
             # Get numeric columns
             numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
             if len(numeric_cols) == 0:
                 raise ValueError("No numeric columns found in CSV")
+            logger.info(f"Numeric columns: {numeric_cols.tolist()}")
+            # Calculate meaningful KPIs
+            kpis = {
+                'total_revenue': df[numeric_cols].sum().sum(),
+                'average_values': df[numeric_cols].mean(),
+                'year_over_year_growth': df[numeric_cols].pct_change().mean() * 100,
+                'key_metrics': df[numeric_cols].describe()
             }
+            return df[numeric_cols].describe(), kpis
         except Exception as e:
             logger.error(f"Error processing CSV: {str(e)}")
     def analyze_financials(self, income_summary, balance_summary):
         """Generate financial analysis and recommendations"""
         try:
+            # Extract meaningful metrics
+            income_metrics = {
+                'Total Revenue': income_summary[1]['total_revenue'],
+                'Average Values': income_summary[1]['average_values'].mean(),
+                'Growth Rate': income_summary[1]['year_over_year_growth'].mean()
+            }
+            balance_metrics = {
+                'Total Assets': balance_summary[1]['total_revenue'],
+                'Average Values': balance_summary[1]['average_values'].mean(),
+                'Growth Rate': balance_summary[1]['year_over_year_growth'].mean()
+            }
             financial_context = f"""
+            Income Statement Analysis:
+            - Total Revenue: ${income_metrics['Total Revenue']:,.2f}
+            - Average Revenue: ${income_metrics['Average Values']:,.2f}
+            - Growth Rate: {income_metrics['Growth Rate']:.2f}%
+            Detailed Income Metrics:
             {income_summary[0].to_string()}
+            Balance Sheet Analysis:
+            - Total Assets: ${balance_metrics['Total Assets']:,.2f}
+            - Average Assets: ${balance_metrics['Average Values']:,.2f}
+            - Growth Rate: {balance_metrics['Growth Rate']:.2f}%
+            Detailed Balance Metrics:
             {balance_summary[0].to_string()}
             """
             # Generate sentiment analysis
+            sentiment = self.sentiment_model(
+                financial_context,
+                truncation=True,
+                max_length=512
+            )[0]
             # Generate business analysis
+            analysis_prompt = f"""[INST] As a financial analyst, provide a detailed analysis based on these financial metrics:
             {financial_context}
             Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
+            Please provide:
+            1. Business Status:
+            - Financial health assessment
+            - Growth trajectory
+            - Key performance indicators analysis
+            2. Key Insights:
+            - Revenue trends
+            - Asset utilization
+            - Financial efficiency metrics
+            - Areas of concern or opportunity
+            3. Strategic Recommendations:
+            - Specific action items based on the metrics
+            - Growth opportunities
+            - Risk mitigation strategies
+            - Timeline-based roadmap
+            Be specific and data-driven in your analysis.
             [/INST]"""
             response = self.analysis_model(
                 analysis_prompt,
+                max_length=1500,
+                do_sample=False,
+                num_return_sequences=1,
+                truncation=True,
+                pad_token_id=self.analysis_model.tokenizer.eos_token_id
             )
             return self.format_response(response[0]['generated_text'], sentiment)
         except Exception as e:
             logger.error(f"Error in analysis: {str(e)}")
+            return f"Error generating analysis: {str(e)}"
     def format_response(self, analysis_text, sentiment):
         """Format the analysis response into structured sections"""
             for section in sections:
                 if "Business Status" in section:
                     current_section = status
+                elif "Key Insights" in section:
                     current_section = insights
                 elif "Strategic Recommendations" in section:
                     current_section = recommendations
             output = [
                 "# Financial Analysis Report\n\n",
                 f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
+                "## Business Status\n",
                 "".join(f"- {item}\n" for item in status if item),
                 "\n## Key Business Insights\n",
                 "".join(f"- {item}\n" for item in insights if item),
 def analyze_statements(income_statement, balance_sheet):
     """Main function to analyze financial statements"""
     try:
+        if not income_statement or not balance_sheet:
             return "Please upload both Income Statement and Balance Sheet CSV files."
+        # Validate files
+        income_valid, income_msg = validate_financial_csv(income_statement, 'income_statement')
+        if not income_valid:
+            return f"Invalid Income Statement: {income_msg}"
+        balance_valid, balance_msg = validate_financial_csv(balance_sheet, 'balance_sheet')
+        if not balance_valid:
+            return f"Invalid Balance Sheet: {balance_msg}"
+        # Process if valid
         analyzer = FinancialAnalyzer()
+        income_summary = analyzer.process_csv(income_statement)
+        balance_summary = analyzer.process_csv(balance_sheet)
         result = analyzer.analyze_financials(income_summary, balance_summary)
         clear_gpu_memory()
         return result
     except Exception as e:
         logger.error(f"Analysis error: {str(e)}")
+        return f"""Analysis Error: {str(e)}
+        Please ensure your CSV files:
+        1. Have proper headers (Revenue, Expenses, Profit for Income Statement)
+        2. Contain numeric data
+        3. Follow standard financial statement format
+        4. Are not corrupted"""
+# Create Gradio interface
 iface = gr.Interface(
     fn=analyze_statements,
     inputs=[
     description="""## Financial Analysis Tool
 How to use:
+1. Prepare your CSV files with proper headers:
+   - Income Statement: Revenue, Expenses, Profit
+   - Balance Sheet: Assets, Liabilities, Equity
+2. Upload both files using the buttons below
 3. Wait for the analysis to complete
 The tool will provide:
 Requirements:
 - Files must be in CSV format
+- Must contain numeric data
+- Standard financial statement format required""",
     flagging_mode="never"
 )
+# Launch the interface
 if __name__ == "__main__":
     try:
+        iface.queue()
         iface.launch(
             share=False,
             server_name="0.0.0.0",
+            server_port=7860
         )
     except Exception as e:
         logger.error(f"Launch error: {str(e)}")