Spaces:

walaa2022
/

financial-analysis-system

Sleeping

App Files Files Community

walaa2022 commited on Nov 26, 2024

Commit

17c0709

verified ·

1 Parent(s): 83dace5

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -91

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import gradio as gr
 import pandas as pd
 import torch
 import logging
-import gc
 from transformers import pipeline
 # Setup logging
@@ -13,15 +12,9 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
-# Device configuration
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"Using device: {DEVICE}")
-def clear_gpu_memory():
-    if DEVICE == "cuda":
-        torch.cuda.empty_cache()
-    gc.collect()
 class FinancialAnalyzer:
     def __init__(self):
         self.analysis_model = None
@@ -30,12 +23,14 @@ class FinancialAnalyzer:
     def load_models(self):
         try:
             self.analysis_model = pipeline(
                 "text-generation",
                 model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
             self.sentiment_model = pipeline(
                 "text-classification",
                 model="ProsusAI/finbert",
@@ -47,115 +42,105 @@ class FinancialAnalyzer:
             logger.error(f"Error loading models: {str(e)}")
             raise
-    def format_number(self, number):
         try:
-            if isinstance(number, str):
-                number = float(number.replace(',', '').replace('$', '').strip())
-            return f"${number:,.0f}"
-        except:
-            return str(number)
-    def process_dataframe(self, df, statement_type):
-        try:
-            df_cleaned = df.copy()
-            # Clean column names
-            df_cleaned.columns = df_cleaned.columns.str.strip()
-            # Clean numeric values
-            numeric_cols = df_cleaned.select_dtypes(include=['float64', 'int64']).columns
-            for col in numeric_cols:
-                df_cleaned[col] = pd.to_numeric(df_cleaned[col].astype(str).str.replace('[$,()]', '', regex=True), errors='coerce')
-            return df_cleaned
         except Exception as e:
-            logger.error(f"Error processing {statement_type}: {str(e)}")
             raise
-    def analyze_financials(self, income_df, balance_df):
         try:
-            # Process dataframes
-            income_clean = self.process_dataframe(income_df, "income_statement")
-            balance_clean = self.process_dataframe(balance_df, "balance_sheet")
-            # Create analysis context
-            context = self.create_analysis_context(income_clean, balance_clean)
-            # Generate sentiment
             sentiment = self.sentiment_model(
-                context[:512],
                 truncation=True
             )[0]
-            # Generate analysis
-            analysis_prompt = f"""[INST] Analyze these financial metrics:
-{context}
 Market Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
-Provide concise analysis of:
-1. Financial Health
-2. Key Insights
-3. Strategic Recommendations
 [/INST]"""
-            response = self.analysis_model(
-                analysis_prompt,
                 max_new_tokens=500,
                 temperature=0.7,
                 num_return_sequences=1,
                 truncation=True
             )
-            return self.format_response(response[0]['generated_text'], sentiment)
         except Exception as e:
             logger.error(f"Analysis error: {str(e)}")
             return f"Error in analysis: {str(e)}"
-    def create_analysis_context(self, income_df, balance_df):
-        try:
-            # Extract latest year metrics
-            latest_metrics = {
-                'Revenue': income_df.loc[income_df['year'] == 'Total Net Revenue', '2025'].iloc[0],
-                'Net_Income': income_df.loc[income_df['year'] == 'Net Income', '2025'].iloc[0],
-                'Assets': balance_df.loc[balance_df['year'] == 'Total Assets', '2025'].iloc[0],
-                'Liabilities': balance_df.loc[balance_df['year'] == 'Total Liabilities', '2025'].iloc[0],
-                'Equity': balance_df.loc[balance_df['year'] == "Shareholder's Equity", '2025'].iloc[0]
-            }
-            return f"""Financial Metrics (2025):
-Revenue: {self.format_number(latest_metrics['Revenue'])}
-Net Income: {self.format_number(latest_metrics['Net_Income'])}
-Total Assets: {self.format_number(latest_metrics['Assets'])}
-Total Liabilities: {self.format_number(latest_metrics['Liabilities'])}
-Shareholder's Equity: {self.format_number(latest_metrics['Equity'])}"""
-        except Exception as e:
-            logger.error(f"Error creating context: {str(e)}")
-            raise
-    def format_response(self, analysis_text, sentiment):
         try:
             sections = [
                 "# Financial Analysis Report\n\n",
-                f"## Market Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n"
             ]
-            current_section = None
             for line in analysis_text.split('\n'):
                 line = line.strip()
                 if not line:
                     continue
-                if "Financial Health" in line:
-                    sections.append("## Financial Health\n")
-                elif "Key Insights" in line:
-                    sections.append("\n## Key Insights\n")
-                elif "Strategic Recommendations" in line:
-                    sections.append("\n## Strategic Recommendations\n")
                 elif line:
-                    sections.append(f"- {line}\n")
             return "".join(sections)
         except Exception as e:
@@ -167,13 +152,22 @@ def analyze_statements(income_statement, balance_sheet):
         if not income_statement or not balance_sheet:
             return "Please upload both financial statements."
         income_df = pd.read_csv(income_statement)
         balance_df = pd.read_csv(balance_sheet)
         analyzer = FinancialAnalyzer()
-        result = analyzer.analyze_financials(income_df, balance_df)
-        clear_gpu_memory()
         return result
     except Exception as e:
@@ -181,31 +175,26 @@ def analyze_statements(income_statement, balance_sheet):
         return f"""Analysis Error: {str(e)}
 Please check:
-1. CSV format is correct
-2. Required financial data is present
 3. Files are not corrupted"""
 # Create Gradio interface
 iface = gr.Interface(
     fn=analyze_statements,
     inputs=[
-        gr.File(
-            label="Income Statement",
-            file_types=[".csv"]
-        ),
-        gr.File(
-            label="Balance Sheet",
-            file_types=[".csv"]
-        )
     ],
     outputs=gr.Markdown(),
-    title="Financial Statement Analyzer",
-    description="Upload financial statements for AI analysis",
     theme="default",
-    allow_flagging="never"
 )
-# Launch with basic configuration
 if __name__ == "__main__":
     iface.launch(
         server_name="0.0.0.0",

 import pandas as pd
 import torch
 import logging
 from transformers import pipeline
 # Setup logging
 )
 logger = logging.getLogger(__name__)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"Using device: {DEVICE}")
 class FinancialAnalyzer:
     def __init__(self):
         self.analysis_model = None
     def load_models(self):
         try:
+            logger.info("Loading TinyLlama model...")
             self.analysis_model = pipeline(
                 "text-generation",
                 model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
+            logger.info("Loading FinBERT model...")
             self.sentiment_model = pipeline(
                 "text-classification",
                 model="ProsusAI/finbert",
             logger.error(f"Error loading models: {str(e)}")
             raise
+    def extract_and_analyze(self, statement_text, statement_type):
+        """Extract information from financial statement text"""
         try:
+            prompt = f"""[INST] As a financial analyst, analyze this {statement_type}:
+{statement_text}
+Extract and summarize:
+1. Key financial numbers for 2025
+2. Notable trends
+3. Important metrics
+Focus on the most recent year (2025) and key financial indicators.
+[/INST]"""
+            response = self.analysis_model(
+                prompt,
+                max_new_tokens=300,
+                temperature=0.3,
+                num_return_sequences=1,
+                truncation=True
+            )
+            return response[0]['generated_text']
         except Exception as e:
+            logger.error(f"Error extracting data from {statement_type}: {str(e)}")
             raise
+    def analyze_financials(self, income_text, balance_text):
         try:
+            # First, extract key information from each statement
+            logger.info("Analyzing Income Statement...")
+            income_analysis = self.extract_and_analyze(income_text, "Income Statement")
+            logger.info("Analyzing Balance Sheet...")
+            balance_analysis = self.extract_and_analyze(balance_text, "Balance Sheet")
+            # Combine the analyses
+            combined_analysis = f"""Income Statement Analysis:
+{income_analysis}
+Balance Sheet Analysis:
+{balance_analysis}"""
+            # Get sentiment
             sentiment = self.sentiment_model(
+                combined_analysis[:512],
                 truncation=True
             )[0]
+            # Generate final analysis
+            final_prompt = f"""[INST] Based on this financial analysis:
+{combined_analysis}
 Market Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
+Provide a concise analysis with:
+1. Overall Financial Health (2-3 key points)
+2. Main Business Insights (2-3 insights)
+3. Key Recommendations (2-3 recommendations)
 [/INST]"""
+            final_response = self.analysis_model(
+                final_prompt,
                 max_new_tokens=500,
                 temperature=0.7,
                 num_return_sequences=1,
                 truncation=True
             )
+            return self.format_response(final_response[0]['generated_text'], sentiment, combined_analysis)
         except Exception as e:
             logger.error(f"Analysis error: {str(e)}")
             return f"Error in analysis: {str(e)}"
+    def format_response(self, analysis_text, sentiment, raw_analysis):
         try:
             sections = [
                 "# Financial Analysis Report\n\n",
+                f"## Market Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
+                "## Extracted Financial Data\n```\n",
+                raw_analysis,
+                "\n```\n\n",
+                "## Analysis\n\n"
             ]
             for line in analysis_text.split('\n'):
                 line = line.strip()
                 if not line:
                     continue
+                if any(header in line for header in ["Financial Health", "Business Insights", "Recommendations"]):
+                    sections.append(f"\n### {line}\n")
                 elif line:
+                    if not line.startswith('-'):
+                        line = f"- {line}"
+                    sections.append(f"{line}\n")
             return "".join(sections)
         except Exception as e:
         if not income_statement or not balance_sheet:
             return "Please upload both financial statements."
+        logger.info("Reading financial statements...")
+        # Read files as text
         income_df = pd.read_csv(income_statement)
         balance_df = pd.read_csv(balance_sheet)
+        # Convert to string while preserving format
+        income_text = income_df.to_string(index=False)
+        balance_text = balance_df.to_string(index=False)
+        logger.info("Initializing analysis...")
         analyzer = FinancialAnalyzer()
+        result = analyzer.analyze_financials(income_text, balance_text)
+        if DEVICE == "cuda":
+            torch.cuda.empty_cache()
         return result
     except Exception as e:
         return f"""Analysis Error: {str(e)}
 Please check:
+1. Files are readable CSV files
+2. Files contain financial data
 3. Files are not corrupted"""
 # Create Gradio interface
 iface = gr.Interface(
     fn=analyze_statements,
     inputs=[
+        gr.File(label="Income Statement (CSV)", file_types=[".csv"]),
+        gr.File(label="Balance Sheet (CSV)", file_types=[".csv"])
     ],
     outputs=gr.Markdown(),
+    title="AI Financial Statement Analyzer",
+    description="""Upload your financial statements for AI analysis.
+The model will extract and analyze key financial information automatically.""",
     theme="default",
+    flagging_mode="never"
 )
+# Launch
 if __name__ == "__main__":
     iface.launch(
         server_name="0.0.0.0",