Spaces:

walaa2022
/

financial-analysis-system

Sleeping

App Files Files Community

walaa2022 commited on Nov 26, 2024

Commit

98d6352

verified ·

1 Parent(s): bc8be1d

Update app.py

Browse files

Files changed (1) hide show

app.py +153 -76

app.py CHANGED Viewed

@@ -17,7 +17,6 @@ logger = logging.getLogger(__name__)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"Using device: {DEVICE}")
-# Clear GPU memory utility
 def clear_gpu_memory():
     """Utility function to clear GPU memory"""
     if DEVICE == "cuda":
@@ -25,112 +24,172 @@ def clear_gpu_memory():
     gc.collect()
 class FinancialAnalyzer:
-    """Financial analysis using Tiny Llama and Falcon models"""
     def __init__(self):
         self.analysis_model = None
         self.sentiment_model = None
-        self.falcon_model = None
         self.load_models()
     def load_models(self):
         """Load models for analysis and sentiment"""
         try:
-            # Load Tiny Llama for generating financial analysis and insights
             self.analysis_model = pipeline(
                 "text-generation",
-                model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",  # Tiny Llama model for analysis
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
-            # Load FinBERT for sentiment analysis
             self.sentiment_model = pipeline(
                 "text-classification",
-                model="yiyanghkust/finbert-tone",  # FinBERT model for sentiment analysis
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
-            # Load Falcon model for generating roadmap and recommendations
-            self.falcon_model = pipeline(
-                "text-generation",
-                model="tiiuae/falcon-7b",  # Falcon model for recommendations and roadmap
-                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
-            )
-            logger.info("Tiny Llama, FinBERT, and Falcon models loaded successfully")
         except Exception as e:
             logger.error(f"Error loading models: {str(e)}")
             raise
-    def analyze_financials(self, income_data: pd.DataFrame, balance_data: pd.DataFrame) -> str:
-        """Generate financial analysis using Tiny Llama and analyze sentiment using FinBERT"""
         try:
-            # Combine the data for AI to process (can adjust prompt as needed)
-            combined_data = f"Income Statement Data:\n{income_data.to_string()}\n\nBalance Sheet Data:\n{balance_data.to_string()}"
-            # Generate status and insights using Tiny Llama
-            status_prompt = f"Please analyze the following financial data and provide status, insights, and metrics:\n\n{combined_data}"
             response = self.analysis_model(
-                status_prompt,
                 max_length=1500,
                 num_return_sequences=1,
-                do_sample=True,
-                temperature=0.7
             )
-            insights_result = response[0]['generated_text'].strip()
-            # Get sentiment analysis from FinBERT
-            sentiment = self.sentiment_model(insights_result[:512])[0]  # Limit input to first 512 tokens
-            sentiment_label = sentiment['label']
-            sentiment_score = sentiment['score']
-            # Generate recommendations and roadmap using Falcon
-            roadmap_prompt = f"Based on the following financial insights, create a strategic roadmap and recommendations for the company:\n\n{insights_result}"
-            roadmap_response = self.falcon_model(
-                roadmap_prompt,
-                max_length=1500,
-                num_return_sequences=1,
-                do_sample=True,
-                temperature=0.7
-            )
-            roadmap_result = roadmap_response[0]['generated_text'].strip()
-            # Return a comprehensive report
-            result = f"""# Financial Analysis Report
-### Sentiment Analysis: {sentiment_label} ({sentiment_score:.1%})
-### Financial Status and Insights:
-{insights_result}
-### Recommendations and Roadmap:
-{roadmap_result}
-"""
-            return result
         except Exception as e:
-            logger.error(f"Analysis error: {str(e)}")
-            return f"Analysis Error: {str(e)}"
-# Function to read CSV and convert to DataFrame
-def read_csv(file_path: str) -> pd.DataFrame:
-    """Read CSV and return a DataFrame"""
-    return pd.read_csv(file_path)
 def analyze_statements(income_statement, balance_sheet):
     """Main function to analyze financial statements"""
     try:
         if not income_statement or not balance_sheet:
             return "Please upload both Income Statement and Balance Sheet CSV files."
-        # Read files as DataFrames (no need to clean manually)
-        income_data = read_csv(income_statement.name)
-        balance_data = read_csv(balance_sheet.name)
-        # Create analyzer and process data
         analyzer = FinancialAnalyzer()
         result = analyzer.analyze_financials(income_data, balance_data)
-        # Clear memory
         clear_gpu_memory()
         return result
     except Exception as e:
@@ -138,29 +197,47 @@ def analyze_statements(income_statement, balance_sheet):
         return f"""Analysis Error: {str(e)}
         Please ensure your CSV files:
-        1. Have clear year columns
-        2. Contain recognizable financial metrics
-        3. Use consistent number formatting"""
 # Create Gradio interface
 iface = gr.Interface(
     fn=analyze_statements,
-    inputs=[gr.File(label="Upload Income Statement (CSV)", file_types=[".csv"]),
-            gr.File(label="Upload Balance Sheet (CSV)", file_types=[".csv"])],
     outputs=gr.Markdown(),
-    title="Generative Financial Statement Analyzer with Tiny Llama, FinBERT, and Falcon",
-    description="""## Financial Analysis Tool (AI-powered)
 Upload your financial statements to get:
-- Status & Insights
-- Key Metrics & Ratios
-- Trend Analysis
-- Strategic Recommendations & Roadmap""",
 )
 # Launch the interface
 if __name__ == "__main__":
     try:
-        iface.launch(server_name="0.0.0.0", server_port=7860)
     except Exception as e:
         logger.error(f"Launch error: {str(e)}")
         sys.exit(1)

 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"Using device: {DEVICE}")
 def clear_gpu_memory():
     """Utility function to clear GPU memory"""
     if DEVICE == "cuda":
     gc.collect()
 class FinancialAnalyzer:
+    """Financial analysis using Tiny Llama and FinBERT models"""
     def __init__(self):
         self.analysis_model = None
         self.sentiment_model = None
         self.load_models()
     def load_models(self):
         """Load models for analysis and sentiment"""
         try:
+            # Load Tiny Llama for analysis
+            logger.info("Loading TinyLlama model...")
             self.analysis_model = pipeline(
                 "text-generation",
+                model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
+            # Load FinBERT for sentiment
+            logger.info("Loading FinBERT model...")
             self.sentiment_model = pipeline(
                 "text-classification",
+                model="ProsusAI/finbert",
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
+            logger.info("Models loaded successfully")
         except Exception as e:
             logger.error(f"Error loading models: {str(e)}")
             raise
+    def process_csv(self, file_obj):
+        """Process CSV file and extract financial data"""
         try:
+            if file_obj is None:
+                raise ValueError("No file provided")
+            # Read CSV with better error handling
+            df = pd.read_csv(file_obj, skipinitialspace=True)
+            if df.empty:
+                raise ValueError("Empty CSV file")
+            # Clean column names
+            df.columns = df.columns.str.strip()
+            # Remove unnamed columns
+            df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
+            # Convert to numeric where possible
+            for col in df.columns:
+                df[col] = pd.to_numeric(df[col].str.replace('[$,()]', '', regex=True), errors='ignore')
+            # Get numeric columns
+            numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
+            if len(numeric_cols) == 0:
+                raise ValueError("No numeric columns found in CSV")
+            return df[numeric_cols].describe()
+        except Exception as e:
+            logger.error(f"Error processing CSV: {str(e)}")
+            raise
+    def analyze_financials(self, income_data, balance_data):
+        """Generate financial analysis and recommendations"""
+        try:
+            financial_context = f"""
+            Income Statement Analysis:
+            {income_data.to_string()}
+            Balance Sheet Analysis:
+            {balance_data.to_string()}
+            """
+            # Generate sentiment analysis
+            sentiment = self.sentiment_model(
+                financial_context,
+                truncation=True,
+                max_length=512
+            )[0]
+            # Generate analysis
+            analysis_prompt = f"""[INST] As a financial analyst, analyze these financial statements:
+            {financial_context}
+            Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
+            Provide:
+            1. Business Status and Health Assessment
+            2. Key Financial Insights and Metrics
+            3. Strategic Recommendations and Action Plan
+            Be specific and data-driven in your analysis.
+            [/INST]"""
             response = self.analysis_model(
+                analysis_prompt,
                 max_length=1500,
+                do_sample=False,
                 num_return_sequences=1,
+                truncation=True
             )
+            return self.format_response(response[0]['generated_text'], sentiment)
         except Exception as e:
+            logger.error(f"Error in analysis: {str(e)}")
+            return f"Error generating analysis: {str(e)}"
+    def format_response(self, analysis_text, sentiment):
+        """Format the analysis response"""
+        try:
+            sections = analysis_text.split('\n\n')
+            output = [
+                "# Financial Analysis Report\n\n",
+                f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n"
+            ]
+            current_section = None
+            for section in sections:
+                section = section.strip()
+                if not section:
+                    continue
+                if "Business Status" in section:
+                    output.append("## Business Status\n")
+                    current_section = "status"
+                elif "Key Financial Insights" in section:
+                    output.append("\n## Key Insights\n")
+                    current_section = "insights"
+                elif "Strategic Recommendations" in section:
+                    output.append("\n## Recommendations\n")
+                    current_section = "recommendations"
+                elif current_section:
+                    output.append(f"- {section}\n")
+            return "".join(output)
+        except Exception as e:
+            logger.error(f"Error formatting response: {str(e)}")
+            return "Error formatting analysis results"
 def analyze_statements(income_statement, balance_sheet):
     """Main function to analyze financial statements"""
     try:
         if not income_statement or not balance_sheet:
             return "Please upload both Income Statement and Balance Sheet CSV files."
+        # Initialize analyzer
         analyzer = FinancialAnalyzer()
+        # Process statements
+        logger.info("Processing income statement...")
+        income_data = analyzer.process_csv(income_statement)
+        logger.info("Processing balance sheet...")
+        balance_data = analyzer.process_csv(balance_sheet)
+        # Generate analysis
+        logger.info("Generating analysis...")
         result = analyzer.analyze_financials(income_data, balance_data)
         clear_gpu_memory()
         return result
     except Exception as e:
         return f"""Analysis Error: {str(e)}
         Please ensure your CSV files:
+        1. Contain numeric financial data
+        2. Have proper column headers
+        3. Are not corrupted"""
 # Create Gradio interface
 iface = gr.Interface(
     fn=analyze_statements,
+    inputs=[
+        gr.File(
+            label="Upload Income Statement (CSV)",
+            file_types=[".csv"]
+        ),
+        gr.File(
+            label="Upload Balance Sheet (CSV)",
+            file_types=[".csv"]
+        )
+    ],
     outputs=gr.Markdown(),
+    title="AI Financial Statement Analyzer",
+    description="""## Financial Analysis Tool
 Upload your financial statements to get:
+- Business Status Assessment
+- Key Financial Insights
+- Strategic Recommendations
+Requirements:
+- CSV files with numeric data
+- Standard financial statement format""",
+    flagging_mode="never"
 )
 # Launch the interface
 if __name__ == "__main__":
     try:
+        iface.queue()
+        iface.launch(
+            share=False,
+            server_name="0.0.0.0",
+            server_port=7860
+        )
     except Exception as e:
         logger.error(f"Launch error: {str(e)}")
         sys.exit(1)