Spaces:

walaa2022
/

financial-analysis-system

Sleeping

App Files Files Community

walaa2022 commited on Nov 26, 2024

Commit

30c4182

verified ·

1 Parent(s): f95f954

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -171

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os
 import gradio as gr
 import pandas as pd
 import torch
@@ -17,200 +17,103 @@ logger = logging.getLogger(__name__)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"Using device: {DEVICE}")
 def clear_gpu_memory():
     """Utility function to clear GPU memory"""
     if DEVICE == "cuda":
         torch.cuda.empty_cache()
     gc.collect()
-class FinancialDataExtractor:
-    """Extract and clean financial data"""
-    def __init__(self):
-        self.logger = logger
-    def clean_number(self, value):
-        """Clean numeric values from financial statements"""
-        try:
-            if pd.isna(value) or value == '' or value == '-':
-                return 0.0
-            if isinstance(value, (int, float)):
-                return float(value)
-            # Remove currency symbols, spaces, commas
-            cleaned = str(value).replace('$', '').replace(',', '').replace('"', '').strip()
-            # Handle parentheses for negative numbers
-            if '(' in cleaned and ')' in cleaned:
-                cleaned = '-' + cleaned.replace('(', '').replace(')', '')
-            return float(cleaned)
-        except:
-            return 0.0
-    def extract_data(self, df: pd.DataFrame) -> pd.DataFrame:
-        """Extract and clean data from DataFrame"""
-        # Clean column names
-        df.columns = df.columns.str.strip()
-        # Get year columns
-        year_cols = [col for col in df.columns if str(col).isdigit()]
-        if not year_cols:
-            raise ValueError("No year columns found in data")
-        # Clean numeric data
-        for col in year_cols:
-            df[col] = df[col].apply(self.clean_number)
-        return df, year_cols
 class FinancialAnalyzer:
-    """Financial analysis using small models"""
     def __init__(self):
-        self.extractor = FinancialDataExtractor()
-        self.sentiment_model = None
         self.analysis_model = None
         self.load_models()
     def load_models(self):
-        """Load the required models"""
         try:
             # Load FinBERT for sentiment analysis
             self.sentiment_model = pipeline(
                 "text-classification",
-                model="ProsusAI/finbert",
-                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
-                truncation=True
             )
-            # Load small model for analysis
-            self.analysis_model = pipeline(
                 "text-generation",
-                model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
-            logger.info("Models loaded successfully")
         except Exception as e:
             logger.error(f"Error loading models: {str(e)}")
             raise
-    def calculate_metrics(self, income_df: pd.DataFrame, balance_df: pd.DataFrame, year_cols: list) -> dict:
-        """Calculate financial metrics"""
-        metrics = {}
-        for year in year_cols:
-            # Income Statement metrics
-            income = {
-                'Revenue': income_df[income_df['Period'].str.contains('Total Net Revenue|Revenue', na=False, case=False)][year].iloc[0],
-                'COGS': income_df[income_df['Period'].str.contains('Cost of Goods Sold', na=False, case=False)][year].iloc[0],
-                'Operating_Expenses': income_df[income_df['Period'].str.contains('Total Expenses', na=False, case=False)][year].iloc[0],
-                'EBIT': income_df[income_df['Period'].str.contains('Earnings Before Interest & Taxes', na=False, case=False)][year].iloc[0],
-                'Net_Income': income_df[income_df['Period'].str.contains('Net Income|Net Earnings', na=False, case=False)][year].iloc[-1]
-            }
-            # Balance Sheet metrics
-            balance = {
-                'Total_Assets': balance_df[balance_df['Period'].str.contains('Total Assets', na=False, case=False)][year].iloc[0],
-                'Current_Assets': balance_df[balance_df['Period'].str.contains('Total current assets', na=False, case=False)][year].iloc[0],
-                'Total_Liabilities': balance_df[balance_df['Period'].str.contains('Total Liabilities', na=False, case=False)][year].iloc[0],
-                'Current_Liabilities': balance_df[balance_df['Period'].str.contains('Total current liabilities', na=False, case=False)][year].iloc[0],
-                'Equity': balance_df[balance_df['Period'].str.contains("Shareholder's Equity", na=False, case=False)][year].iloc[-1]
-            }
-            # Calculate ratios
-            metrics[year] = {
-                'Profitability': {
-                    'Gross_Margin': ((income['Revenue'] - income['COGS']) / income['Revenue']) * 100,
-                    'Operating_Margin': (income['EBIT'] / income['Revenue']) * 100,
-                    'Net_Margin': (income['Net_Income'] / income['Revenue']) * 100,
-                    'ROE': (income['Net_Income'] / balance['Equity']) * 100,
-                    'ROA': (income['Net_Income'] / balance['Total_Assets']) * 100
-                },
-                'Liquidity': {
-                    'Current_Ratio': balance['Current_Assets'] / balance['Current_Liabilities'],
-                    'Working_Capital': balance['Current_Assets'] - balance['Current_Liabilities']
-                },
-                'Growth': {
-                    'Revenue': income['Revenue'],
-                    'Net_Income': income['Net_Income'],
-                    'Total_Assets': balance['Total_Assets']
-                }
-            }
-        return metrics
-    def analyze_financials(self, income_df: pd.DataFrame, balance_df: pd.DataFrame) -> str:
-        """Generate financial analysis"""
         try:
-            # Extract and clean data
-            income_df, year_cols = self.extractor.extract_data(income_df)
-            balance_df, _ = self.extractor.extract_data(balance_df)
-            # Calculate metrics
-            metrics = self.calculate_metrics(income_df, balance_df, year_cols)
-            # Get latest and earliest years
-            latest_year = max(year_cols)
-            earliest_year = min(year_cols)
-            # Calculate growth
-            revenue_growth = ((metrics[latest_year]['Growth']['Revenue'] / metrics[earliest_year]['Growth']['Revenue']) - 1) * 100
-            profit_growth = ((metrics[latest_year]['Growth']['Net_Income'] / metrics[earliest_year]['Growth']['Net_Income']) - 1) * 100
-            # Generate analysis context
-            context = f"""Financial Analysis ({earliest_year}-{latest_year}):
-Performance Metrics:
-- Revenue Growth: {revenue_growth:.1f}%
-- Profit Growth: {profit_growth:.1f}%
-- Current Gross Margin: {metrics[latest_year]['Profitability']['Gross_Margin']:.1f}%
-- Current Net Margin: {metrics[latest_year]['Profitability']['Net_Margin']:.1f}%
-- ROE: {metrics[latest_year]['Profitability']['ROE']:.1f}%
-- Current Ratio: {metrics[latest_year]['Liquidity']['Current_Ratio']:.2f}
-Trends:
-- Revenue has grown from ${metrics[earliest_year]['Growth']['Revenue']:,.0f} to ${metrics[latest_year]['Growth']['Revenue']:,.0f}
-- Net Income has changed from ${metrics[earliest_year]['Growth']['Net_Income']:,.0f} to ${metrics[latest_year]['Growth']['Net_Income']:,.0f}
-- Profitability margins show {('improving' if metrics[latest_year]['Profitability']['Net_Margin'] > metrics[earliest_year]['Profitability']['Net_Margin'] else 'declining')} trend"""
-            # Get sentiment
-            sentiment = self.sentiment_model(context[:512])[0]
-            # Generate detailed analysis
-            analysis = self.analysis_model(
-                f"[INST] As a financial analyst, provide a detailed analysis of this company:\n\n{context}\n\nInclude:\n1. Financial health assessment\n2. Key performance insights\n3. Strategic recommendations [/INST]",
                 max_length=1500,
                 num_return_sequences=1,
                 do_sample=True,
                 temperature=0.7
-            )[0]['generated_text']
-            # Format output
-            output = f"""# Financial Analysis Report
-## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.1%})
-## Key Performance Indicators ({latest_year})
-- Gross Margin: {metrics[latest_year]['Profitability']['Gross_Margin']:.1f}%
-- Operating Margin: {metrics[latest_year]['Profitability']['Operating_Margin']:.1f}%
-- Net Margin: {metrics[latest_year]['Profitability']['Net_Margin']:.1f}%
-- ROE: {metrics[latest_year]['Profitability']['ROE']:.1f}%
-- Current Ratio: {metrics[latest_year]['Liquidity']['Current_Ratio']:.2f}
-## Performance Trends ({earliest_year}-{latest_year})
-- Revenue Growth: {revenue_growth:.1f}%
-- Profit Growth: {profit_growth:.1f}%
-- Working Capital: ${metrics[latest_year]['Liquidity']['Working_Capital']:,.0f}
-## Analysis
-{analysis}"""
-            return output
         except Exception as e:
             logger.error(f"Analysis error: {str(e)}")
-            raise
 def analyze_statements(income_statement, balance_sheet):
     """Main function to analyze financial statements"""
@@ -218,13 +121,16 @@ def analyze_statements(income_statement, balance_sheet):
         if not income_statement or not balance_sheet:
             return "Please upload both Income Statement and Balance Sheet CSV files."
-        # Read files
-        income_df = pd.read_csv(income_statement.name)
-        balance_df = pd.read_csv(balance_sheet.name)
-        # Create analyzer and process
         analyzer = FinancialAnalyzer()
-        result = analyzer.analyze_financials(income_df, balance_df)
         # Clear memory
         clear_gpu_memory()
@@ -243,20 +149,16 @@ def analyze_statements(income_statement, balance_sheet):
 # Create Gradio interface
 iface = gr.Interface(
     fn=analyze_statements,
-    inputs=[
-        gr.File(label="Upload Income Statement (CSV)", file_types=[".csv"]),
-        gr.File(label="Upload Balance Sheet (CSV)", file_types=[".csv"])
-    ],
     outputs=gr.Markdown(),
-    title="Financial Statement Analyzer",
-    description="""## Financial Analysis Tool
 Upload your financial statements to get:
-- Performance Analysis
 - Key Metrics & Ratios
 - Trend Analysis
-- Strategic Recommendations""",
-    examples=None
 )
 # Launch the interface
@@ -265,4 +167,4 @@ if __name__ == "__main__":
         iface.launch(server_name="0.0.0.0", server_port=7860)
     except Exception as e:
         logger.error(f"Launch error: {str(e)}")
-        sys.exit(1)

+import os
 import gradio as gr
 import pandas as pd
 import torch
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"Using device: {DEVICE}")
+# Clear GPU memory utility
 def clear_gpu_memory():
     """Utility function to clear GPU memory"""
     if DEVICE == "cuda":
         torch.cuda.empty_cache()
     gc.collect()
 class FinancialAnalyzer:
+    """Financial analysis using Tiny Llama and Falcon models"""
     def __init__(self):
         self.analysis_model = None
+        self.sentiment_model = None
+        self.falcon_model = None
         self.load_models()
     def load_models(self):
+        """Load models for analysis and sentiment"""
         try:
+            # Load Tiny Llama for generating financial analysis and insights
+            self.analysis_model = pipeline(
+                "text-generation",
+                model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",  # Tiny Llama model for analysis
+                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
+            )
             # Load FinBERT for sentiment analysis
             self.sentiment_model = pipeline(
                 "text-classification",
+                model="yiyanghkust/finbert-tone",  # FinBERT model for sentiment analysis
+                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
+            # Load Falcon model for generating roadmap and recommendations
+            self.falcon_model = pipeline(
                 "text-generation",
+                model="tiiuae/falcon-7b",  # Falcon model for recommendations and roadmap
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
+            logger.info("Tiny Llama, FinBERT, and Falcon models loaded successfully")
         except Exception as e:
             logger.error(f"Error loading models: {str(e)}")
             raise
+    def analyze_financials(self, csv_data: str) -> str:
+        """Generate financial analysis using Tiny Llama and analyze sentiment using FinBERT"""
         try:
+            # Generate status and insights using Tiny Llama
+            status_prompt = f"Please analyze the following financial data and provide status, insights, and metrics:\n\n{csv_data}"
+            response = self.analysis_model(
+                status_prompt,
+                max_length=1500,
+                num_return_sequences=1,
+                do_sample=True,
+                temperature=0.7
+            )
+            insights_result = response[0]['generated_text'].strip()
+            # Get sentiment analysis from FinBERT
+            sentiment = self.sentiment_model(insights_result[:512])[0]  # Limit input to first 512 tokens
+            sentiment_label = sentiment['label']
+            sentiment_score = sentiment['score']
+            # Generate recommendations and roadmap using Falcon
+            roadmap_prompt = f"Based on the following financial insights, create a strategic roadmap and recommendations for the company:\n\n{insights_result}"
+            roadmap_response = self.falcon_model(
+                roadmap_prompt,
                 max_length=1500,
                 num_return_sequences=1,
                 do_sample=True,
                 temperature=0.7
+            )
+            roadmap_result = roadmap_response[0]['generated_text'].strip()
+            # Return a comprehensive report
+            result = f"""# Financial Analysis Report
+### Sentiment Analysis: {sentiment_label} ({sentiment_score:.1%})
+### Financial Status and Insights:
+{insights_result}
+### Recommendations and Roadmap:
+{roadmap_result}
+"""
+            return result
         except Exception as e:
             logger.error(f"Analysis error: {str(e)}")
+            return f"Analysis Error: {str(e)}"
+# Function to read CSV and convert to text format
+def csv_to_text(file_path: str) -> str:
+    """Convert CSV to raw text format for model input"""
+    df = pd.read_csv(file_path)
+    return df.to_string(index=False)  # Convert DataFrame to string without index
 def analyze_statements(income_statement, balance_sheet):
     """Main function to analyze financial statements"""
         if not income_statement or not balance_sheet:
             return "Please upload both Income Statement and Balance Sheet CSV files."
+        # Read files as raw text (no need to clean manually)
+        income_data = csv_to_text(income_statement.name)
+        balance_data = csv_to_text(balance_sheet.name)
+        # Combine the data for AI to process (can adjust prompt as needed)
+        combined_data = f"Income Statement Data:\n{income_data}\n\nBalance Sheet Data:\n{balance_data}"
+        # Create analyzer and process data
         analyzer = FinancialAnalyzer()
+        result = analyzer.analyze_financials(combined_data)
         # Clear memory
         clear_gpu_memory()
 # Create Gradio interface
 iface = gr.Interface(
     fn=analyze_statements,
+    inputs=[gr.File(label="Upload Income Statement (CSV)", file_types=[".csv"]),
+            gr.File(label="Upload Balance Sheet (CSV)", file_types=[".csv"])],
     outputs=gr.Markdown(),
+    title="Generative Financial Statement Analyzer with Tiny Llama, FinBERT, and Falcon",
+    description="""## Financial Analysis Tool (AI-powered)
 Upload your financial statements to get:
+- Status & Insights
 - Key Metrics & Ratios
 - Trend Analysis
+- Strategic Recommendations & Roadmap""",
 )
 # Launch the interface
         iface.launch(server_name="0.0.0.0", server_port=7860)
     except Exception as e:
         logger.error(f"Launch error: {str(e)}")
+        sys.exit(1)