Spaces:

walaa2022
/

financial-analysis-system

Sleeping

App Files Files Community

walaa2022 commited on Nov 26, 2024

Commit

a88d6ed

verified ·

1 Parent(s): a772146

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -118

app.py CHANGED Viewed

@@ -5,13 +5,17 @@ import torch
 import logging
 import gc
 from transformers import pipeline
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"Using device: {DEVICE}")
@@ -20,123 +24,71 @@ def clear_gpu_memory():
         torch.cuda.empty_cache()
     gc.collect()
 class FinancialAnalyzer:
     def __init__(self):
-        self.data_model = None
         self.analysis_model = None
         self.sentiment_model = None
         self.load_models()
     def load_models(self):
-        """Load models for data extraction and analysis"""
         try:
-            # Model for understanding and extracting data from CSV
-            logger.info("Loading data extraction model...")
-            self.data_model = pipeline(
-                "text-generation",
-                model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
-            )
-            # Model for financial analysis
-            logger.info("Loading analysis model...")
             self.analysis_model = pipeline(
                 "text-generation",
                 model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
-            # Model for sentiment analysis
-            logger.info("Loading sentiment model...")
             self.sentiment_model = pipeline(
                 "text-classification",
                 model="ProsusAI/finbert",
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
-            logger.info("All models loaded successfully")
         except Exception as e:
             logger.error(f"Error loading models: {str(e)}")
             raise
-    def extract_financial_data(self, file_obj, statement_type):
-        """Use generative AI to understand and extract data from CSV"""
-        try:
-            # Read raw CSV content
-            df = pd.read_csv(file_obj)
-            raw_data = df.to_string()
-            # Create prompt for data extraction
-            extraction_prompt = f"""[INST] As a financial expert, analyze this raw {statement_type} data:
-            {raw_data}
-            Extract and summarize the following:
-            1. Key financial numbers (revenue, profit, assets, liabilities etc.)
-            2. Time periods covered
-            3. Important trends and patterns
-            4. Any significant financial metrics
-            Present the extracted data in a clear, structured format.
-            Focus on the most important financial information regardless of how the data is formatted.
-            [/INST]"""
-            # Generate structured extraction
-            response = self.data_model(
-                extraction_prompt,
-                max_length=1000,
-                do_sample=False,
-                num_return_sequences=1,
-                truncation=True
-            )
-            logger.info(f"Data extracted from {statement_type}")
-            return response[0]['generated_text']
-    def analyze_financials(self, income_data, balance_data):
-        """Generate financial analysis based on extracted data"""
         try:
-            # Combine extracted data
-            financial_context = f"""
-            Income Statement Analysis:
-            {income_data}
-            Balance Sheet Analysis:
-            {balance_data}
-            """
-            # Get sentiment
             sentiment = self.sentiment_model(
-                financial_context,
                 truncation=True,
                 max_length=512
             )[0]
-            # Generate comprehensive analysis
-            analysis_prompt = f"""[INST] As a senior financial analyst, review this financial data:
-            {financial_context}
-            Market Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
-            Provide a detailed analysis including:
-            1. Business Status
-            - Overall financial health
-            - Performance assessment
-            - Key metrics analysis
-            2. Strategic Insights
-            - Market position
-            - Competitive advantages
-            - Areas of concern
-            3. Recommendations & Roadmap
-            - Strategic initiatives
-            - Improvement opportunities
-            - Action timeline
-            Base your analysis on the extracted financial data and provide specific insights.
             [/INST]"""
             response = self.analysis_model(
@@ -147,23 +99,21 @@ class FinancialAnalyzer:
                 truncation=True
             )
-            return self.format_response(response[0]['generated_text'], sentiment, financial_context)
         except Exception as e:
             logger.error(f"Error in analysis: {str(e)}")
             return f"Error generating analysis: {str(e)}"
     def format_response(self, analysis_text, sentiment, context):
-        """Format the analysis response"""
         try:
             output = [
                 "# Financial Analysis Report\n\n",
-                f"## Market Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
-                "## Extracted Financial Data\n```\n",
                 context,
                 "\n```\n\n"
             ]
             sections = analysis_text.split('\n\n')
             current_section = None
@@ -174,43 +124,69 @@ class FinancialAnalyzer:
                 if "Business Status" in section:
                     output.append("## Business Status\n")
-                elif "Strategic Insights" in section:
-                    output.append("\n## Strategic Insights\n")
-                elif "Recommendations" in section:
-                    output.append("\n## Recommendations & Roadmap\n")
                 else:
                     if not section.startswith('-'):
                         section = f"- {section}"
                     output.append(f"{section}\n")
-            return "".join(output)
         except Exception as e:
             logger.error(f"Error formatting response: {str(e)}")
             return "Error formatting analysis results"
 def analyze_statements(income_statement, balance_sheet):
-    """Main function to analyze financial statements"""
     try:
         if not income_statement or not balance_sheet:
             return "Please upload both Income Statement and Balance Sheet CSV files."
-        analyzer = FinancialAnalyzer()
-        # Extract data from CSVs using generative AI
-        logger.info("Extracting data from Income Statement...")
-        income_data = analyzer.extract_financial_data(income_statement, "Income Statement")
-        logger.info("Extracting data from Balance Sheet...")
-        balance_data = analyzer.extract_financial_data(balance_sheet, "Balance Sheet")
-        # Generate analysis
-        logger.info("Generating comprehensive analysis...")
-        result = analyzer.analyze_financials(income_data, balance_data)
-        clear_gpu_memory()
-        return result
     except Exception as e:
         logger.error(f"Analysis error: {str(e)}")
         return f"""Analysis Error: {str(e)}
@@ -237,12 +213,14 @@ iface = gr.Interface(
     title="AI Financial Statement Analyzer",
     description="""## Financial Analysis Tool
-Upload your financial statements (any CSV format) and let AI:
-- Extract and understand the financial data
-- Provide comprehensive analysis
-- Generate strategic recommendations
-No specific format required - AI will interpret your data!""",
     flagging_mode="never"
 )

 import logging
 import gc
 from transformers import pipeline
+import json
+import csv
+# Setup logging
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
+# Device configuration
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"Using device: {DEVICE}")
         torch.cuda.empty_cache()
     gc.collect()
+def clean_financial_value(value):
+    try:
+        if isinstance(value, str):
+            value = value.strip().replace('"', '').replace(' ', '')
+            if '(' in value and ')' in value:
+                value = '-' + value.replace('(', '').replace(')', '')
+            value = value.replace(',', '')
+            try:
+                return float(value)
+            except ValueError:
+                return 0.0
+        return float(value) if isinstance(value, (int, float)) else 0.0
+    except Exception as e:
+        logger.error(f"Error cleaning value: {str(e)}")
+        return 0.0
 class FinancialAnalyzer:
     def __init__(self):
         self.analysis_model = None
         self.sentiment_model = None
         self.load_models()
     def load_models(self):
         try:
+            # Load analysis model
             self.analysis_model = pipeline(
                 "text-generation",
                 model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
+            # Load sentiment model
             self.sentiment_model = pipeline(
                 "text-classification",
                 model="ProsusAI/finbert",
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
+            logger.info("Models loaded successfully")
         except Exception as e:
             logger.error(f"Error loading models: {str(e)}")
             raise
+    def analyze_financials(self, context):
         try:
+            # Generate sentiment analysis
             sentiment = self.sentiment_model(
+                context,
                 truncation=True,
                 max_length=512
             )[0]
+            # Generate analysis
+            analysis_prompt = f"""[INST] As a financial analyst, analyze this data:
+            {context}
+            Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
+            Provide:
+            1. Business Status and Health Assessment
+            2. Key Financial Insights and Metrics
+            3. Strategic Recommendations and Action Plan
+            Be specific and data-driven in your analysis.
             [/INST]"""
             response = self.analysis_model(
                 truncation=True
             )
+            return self.format_response(response[0]['generated_text'], sentiment, context)
         except Exception as e:
             logger.error(f"Error in analysis: {str(e)}")
             return f"Error generating analysis: {str(e)}"
     def format_response(self, analysis_text, sentiment, context):
         try:
             output = [
                 "# Financial Analysis Report\n\n",
+                f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
+                "## Financial Data\n```\n",
                 context,
                 "\n```\n\n"
             ]
             sections = analysis_text.split('\n\n')
             current_section = None
                 if "Business Status" in section:
                     output.append("## Business Status\n")
+                elif "Financial Insights" in section:
+                    output.append("\n## Key Insights\n")
+                elif "Strategic Recommendations" in section:
+                    output.append("\n## Recommendations\n")
                 else:
                     if not section.startswith('-'):
                         section = f"- {section}"
                     output.append(f"{section}\n")
+            return "".join(output)
         except Exception as e:
             logger.error(f"Error formatting response: {str(e)}")
             return "Error formatting analysis results"
+def save_organized_data(structured_data, filename):
+    try:
+        with open(filename, 'w') as f:
+            json.dump(structured_data, f, indent=4)
+        return True
+    except Exception as e:
+        logger.error(f"Error saving data: {str(e)}")
+        return False
 def analyze_statements(income_statement, balance_sheet):
     try:
         if not income_statement or not balance_sheet:
             return "Please upload both Income Statement and Balance Sheet CSV files."
+        # Read and organize data
+        try:
+            income_df = pd.read_csv(income_statement)
+            balance_df = pd.read_csv(balance_sheet)
+            # Clean and structure data
+            financial_data = {
+                "income_statement": income_df.to_dict(orient='records'),
+                "balance_sheet": balance_df.to_dict(orient='records')
+            }
+            # Save structured data
+            save_organized_data(financial_data, "organized_financial_data.json")
+            # Create analysis context
+            context = f"""Financial Data Summary:
+            Income Statement:
+            {income_df.to_string()}
+            Balance Sheet:
+            {balance_df.to_string()}
+            """
+            # Initialize analyzer and generate analysis
+            analyzer = FinancialAnalyzer()
+            result = analyzer.analyze_financials(context)
+            clear_gpu_memory()
+            return result
+        except Exception as e:
+            logger.error(f"Error processing files: {str(e)}")
+            raise
     except Exception as e:
         logger.error(f"Analysis error: {str(e)}")
         return f"""Analysis Error: {str(e)}
     title="AI Financial Statement Analyzer",
     description="""## Financial Analysis Tool
+Upload your financial statements to get:
+- Business Status Assessment
+- Key Financial Insights
+- Strategic Recommendations
+Requirements:
+- CSV files with financial data
+- Standard financial statement format""",
     flagging_mode="never"
 )