Spaces:

walaa2022
/

financial-analysis-system

Sleeping

App Files Files Community

walaa2022 commited on Nov 26, 2024

Commit

a772146

verified ·

1 Parent(s): 98d6352

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -87

app.py CHANGED Viewed

@@ -6,119 +6,137 @@ import logging
 import gc
 from transformers import pipeline
-# Setup logging
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
-# Device configuration
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"Using device: {DEVICE}")
 def clear_gpu_memory():
-    """Utility function to clear GPU memory"""
     if DEVICE == "cuda":
         torch.cuda.empty_cache()
     gc.collect()
 class FinancialAnalyzer:
-    """Financial analysis using Tiny Llama and FinBERT models"""
     def __init__(self):
         self.analysis_model = None
         self.sentiment_model = None
         self.load_models()
     def load_models(self):
-        """Load models for analysis and sentiment"""
         try:
-            # Load Tiny Llama for analysis
-            logger.info("Loading TinyLlama model...")
             self.analysis_model = pipeline(
                 "text-generation",
                 model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
-            # Load FinBERT for sentiment
-            logger.info("Loading FinBERT model...")
             self.sentiment_model = pipeline(
                 "text-classification",
                 model="ProsusAI/finbert",
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
-            logger.info("Models loaded successfully")
         except Exception as e:
             logger.error(f"Error loading models: {str(e)}")
             raise
-    def process_csv(self, file_obj):
-        """Process CSV file and extract financial data"""
         try:
-            if file_obj is None:
-                raise ValueError("No file provided")
-            # Read CSV with better error handling
-            df = pd.read_csv(file_obj, skipinitialspace=True)
-            if df.empty:
-                raise ValueError("Empty CSV file")
-            # Clean column names
-            df.columns = df.columns.str.strip()
-            # Remove unnamed columns
-            df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
-            # Convert to numeric where possible
-            for col in df.columns:
-                df[col] = pd.to_numeric(df[col].str.replace('[$,()]', '', regex=True), errors='ignore')
-            # Get numeric columns
-            numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
-            if len(numeric_cols) == 0:
-                raise ValueError("No numeric columns found in CSV")
-            return df[numeric_cols].describe()
-        except Exception as e:
-            logger.error(f"Error processing CSV: {str(e)}")
-            raise
     def analyze_financials(self, income_data, balance_data):
-        """Generate financial analysis and recommendations"""
         try:
             financial_context = f"""
             Income Statement Analysis:
-            {income_data.to_string()}
             Balance Sheet Analysis:
-            {balance_data.to_string()}
             """
-            # Generate sentiment analysis
             sentiment = self.sentiment_model(
                 financial_context,
                 truncation=True,
                 max_length=512
             )[0]
-            # Generate analysis
-            analysis_prompt = f"""[INST] As a financial analyst, analyze these financial statements:
             {financial_context}
-            Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
-            Provide:
-            1. Business Status and Health Assessment
-            2. Key Financial Insights and Metrics
-            3. Strategic Recommendations and Action Plan
-            Be specific and data-driven in your analysis.
             [/INST]"""
             response = self.analysis_model(
@@ -129,40 +147,42 @@ class FinancialAnalyzer:
                 truncation=True
             )
-            return self.format_response(response[0]['generated_text'], sentiment)
         except Exception as e:
             logger.error(f"Error in analysis: {str(e)}")
             return f"Error generating analysis: {str(e)}"
-    def format_response(self, analysis_text, sentiment):
         """Format the analysis response"""
         try:
-            sections = analysis_text.split('\n\n')
             output = [
                 "# Financial Analysis Report\n\n",
-                f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n"
             ]
             current_section = None
             for section in sections:
                 section = section.strip()
                 if not section:
                     continue
                 if "Business Status" in section:
                     output.append("## Business Status\n")
-                    current_section = "status"
-                elif "Key Financial Insights" in section:
-                    output.append("\n## Key Insights\n")
-                    current_section = "insights"
-                elif "Strategic Recommendations" in section:
-                    output.append("\n## Recommendations\n")
-                    current_section = "recommendations"
-                elif current_section:
-                    output.append(f"- {section}\n")
             return "".join(output)
         except Exception as e:
@@ -175,18 +195,17 @@ def analyze_statements(income_statement, balance_sheet):
         if not income_statement or not balance_sheet:
             return "Please upload both Income Statement and Balance Sheet CSV files."
-        # Initialize analyzer
         analyzer = FinancialAnalyzer()
-        # Process statements
-        logger.info("Processing income statement...")
-        income_data = analyzer.process_csv(income_statement)
-        logger.info("Processing balance sheet...")
-        balance_data = analyzer.process_csv(balance_sheet)
         # Generate analysis
-        logger.info("Generating analysis...")
         result = analyzer.analyze_financials(income_data, balance_data)
         clear_gpu_memory()
@@ -196,10 +215,10 @@ def analyze_statements(income_statement, balance_sheet):
         logger.error(f"Analysis error: {str(e)}")
         return f"""Analysis Error: {str(e)}
-        Please ensure your CSV files:
-        1. Contain numeric financial data
-        2. Have proper column headers
-        3. Are not corrupted"""
 # Create Gradio interface
 iface = gr.Interface(
@@ -218,14 +237,12 @@ iface = gr.Interface(
     title="AI Financial Statement Analyzer",
     description="""## Financial Analysis Tool
-Upload your financial statements to get:
-- Business Status Assessment
-- Key Financial Insights
-- Strategic Recommendations
-Requirements:
-- CSV files with numeric data
-- Standard financial statement format""",
     flagging_mode="never"
 )

 import gc
 from transformers import pipeline
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"Using device: {DEVICE}")
 def clear_gpu_memory():
     if DEVICE == "cuda":
         torch.cuda.empty_cache()
     gc.collect()
 class FinancialAnalyzer:
     def __init__(self):
+        self.data_model = None
         self.analysis_model = None
         self.sentiment_model = None
         self.load_models()
     def load_models(self):
+        """Load models for data extraction and analysis"""
         try:
+            # Model for understanding and extracting data from CSV
+            logger.info("Loading data extraction model...")
+            self.data_model = pipeline(
+                "text-generation",
+                model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
+            )
+            # Model for financial analysis
+            logger.info("Loading analysis model...")
             self.analysis_model = pipeline(
                 "text-generation",
                 model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
+            # Model for sentiment analysis
+            logger.info("Loading sentiment model...")
             self.sentiment_model = pipeline(
                 "text-classification",
                 model="ProsusAI/finbert",
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
+            logger.info("All models loaded successfully")
         except Exception as e:
             logger.error(f"Error loading models: {str(e)}")
             raise
+    def extract_financial_data(self, file_obj, statement_type):
+        """Use generative AI to understand and extract data from CSV"""
         try:
+            # Read raw CSV content
+            df = pd.read_csv(file_obj)
+            raw_data = df.to_string()
+            # Create prompt for data extraction
+            extraction_prompt = f"""[INST] As a financial expert, analyze this raw {statement_type} data:
+            {raw_data}
+            Extract and summarize the following:
+            1. Key financial numbers (revenue, profit, assets, liabilities etc.)
+            2. Time periods covered
+            3. Important trends and patterns
+            4. Any significant financial metrics
+            Present the extracted data in a clear, structured format.
+            Focus on the most important financial information regardless of how the data is formatted.
+            [/INST]"""
+            # Generate structured extraction
+            response = self.data_model(
+                extraction_prompt,
+                max_length=1000,
+                do_sample=False,
+                num_return_sequences=1,
+                truncation=True
+            )
+            logger.info(f"Data extracted from {statement_type}")
+            return response[0]['generated_text']
     def analyze_financials(self, income_data, balance_data):
+        """Generate financial analysis based on extracted data"""
         try:
+            # Combine extracted data
             financial_context = f"""
             Income Statement Analysis:
+            {income_data}
             Balance Sheet Analysis:
+            {balance_data}
             """
+            # Get sentiment
             sentiment = self.sentiment_model(
                 financial_context,
                 truncation=True,
                 max_length=512
             )[0]
+            # Generate comprehensive analysis
+            analysis_prompt = f"""[INST] As a senior financial analyst, review this financial data:
             {financial_context}
+            Market Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
+            Provide a detailed analysis including:
+            1. Business Status
+            - Overall financial health
+            - Performance assessment
+            - Key metrics analysis
+            2. Strategic Insights
+            - Market position
+            - Competitive advantages
+            - Areas of concern
+            3. Recommendations & Roadmap
+            - Strategic initiatives
+            - Improvement opportunities
+            - Action timeline
+            Base your analysis on the extracted financial data and provide specific insights.
             [/INST]"""
             response = self.analysis_model(
                 truncation=True
             )
+            return self.format_response(response[0]['generated_text'], sentiment, financial_context)
         except Exception as e:
             logger.error(f"Error in analysis: {str(e)}")
             return f"Error generating analysis: {str(e)}"
+    def format_response(self, analysis_text, sentiment, context):
         """Format the analysis response"""
         try:
             output = [
                 "# Financial Analysis Report\n\n",
+                f"## Market Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
+                "## Extracted Financial Data\n```\n",
+                context,
+                "\n```\n\n"
             ]
+            sections = analysis_text.split('\n\n')
             current_section = None
             for section in sections:
                 section = section.strip()
                 if not section:
                     continue
                 if "Business Status" in section:
                     output.append("## Business Status\n")
+                elif "Strategic Insights" in section:
+                    output.append("\n## Strategic Insights\n")
+                elif "Recommendations" in section:
+                    output.append("\n## Recommendations & Roadmap\n")
+                else:
+                    if not section.startswith('-'):
+                        section = f"- {section}"
+                    output.append(f"{section}\n")
             return "".join(output)
         except Exception as e:
         if not income_statement or not balance_sheet:
             return "Please upload both Income Statement and Balance Sheet CSV files."
         analyzer = FinancialAnalyzer()
+        # Extract data from CSVs using generative AI
+        logger.info("Extracting data from Income Statement...")
+        income_data = analyzer.extract_financial_data(income_statement, "Income Statement")
+        logger.info("Extracting data from Balance Sheet...")
+        balance_data = analyzer.extract_financial_data(balance_sheet, "Balance Sheet")
         # Generate analysis
+        logger.info("Generating comprehensive analysis...")
         result = analyzer.analyze_financials(income_data, balance_data)
         clear_gpu_memory()
         logger.error(f"Analysis error: {str(e)}")
         return f"""Analysis Error: {str(e)}
+        Please verify:
+        1. Files are in CSV format
+        2. Files contain financial data
+        3. Files are not corrupted"""
 # Create Gradio interface
 iface = gr.Interface(
     title="AI Financial Statement Analyzer",
     description="""## Financial Analysis Tool
+Upload your financial statements (any CSV format) and let AI:
+- Extract and understand the financial data
+- Provide comprehensive analysis
+- Generate strategic recommendations
+No specific format required - AI will interpret your data!""",
     flagging_mode="never"
 )