Spaces:

walaa2022
/

financial-analysis-system

Sleeping

App Files Files Community

walaa2022 commited on Nov 26, 2024

Commit

9e42b37

verified ·

1 Parent(s): c714b22

Update app.py

Browse files

Files changed (1) hide show

app.py +114 -138

app.py CHANGED Viewed

@@ -5,8 +5,6 @@ import torch
 import logging
 import gc
 from transformers import pipeline
-import json
-import csv
 # Setup logging
 logging.basicConfig(
@@ -24,22 +22,6 @@ def clear_gpu_memory():
         torch.cuda.empty_cache()
     gc.collect()
-def clean_financial_value(value):
-    try:
-        if isinstance(value, str):
-            value = value.strip().replace('"', '').replace(' ', '')
-            if '(' in value and ')' in value:
-                value = '-' + value.replace('(', '').replace(')', '')
-            value = value.replace(',', '')
-            try:
-                return float(value)
-            except ValueError:
-                return 0.0
-        return float(value) if isinstance(value, (int, float)) else 0.0
-    except Exception as e:
-        logger.error(f"Error cleaning value: {str(e)}")
-        return 0.0
 class FinancialAnalyzer:
     def __init__(self):
         self.analysis_model = None
@@ -48,14 +30,12 @@ class FinancialAnalyzer:
     def load_models(self):
         try:
-            # Load analysis model
             self.analysis_model = pipeline(
                 "text-generation",
                 model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
-            # Load sentiment model
             self.sentiment_model = pipeline(
                 "text-classification",
                 model="ProsusAI/finbert",
@@ -67,172 +47,168 @@ class FinancialAnalyzer:
             logger.error(f"Error loading models: {str(e)}")
             raise
-    def analyze_financials(self, context):
         try:
-            # Generate sentiment analysis
             sentiment = self.sentiment_model(
-                context,
-                truncation=True,
-                max_length=512
             )[0]
             # Generate analysis
-            analysis_prompt = f"""[INST] As a financial analyst, analyze this data:
-            {context}
-            Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
-            Provide:
-            1. Business Status and Health Assessment
-            2. Key Financial Insights and Metrics
-            3. Strategic Recommendations and Action Plan
-            Be specific and data-driven in your analysis.
-            [/INST]"""
             response = self.analysis_model(
                 analysis_prompt,
-                max_new_tokens=1500,
-                do_sample=False,
                 num_return_sequences=1,
                 truncation=True
             )
-            return self.format_response(response[0]['generated_text'], sentiment, context)
         except Exception as e:
-            logger.error(f"Error in analysis: {str(e)}")
-            return f"Error generating analysis: {str(e)}"
-    def format_response(self, analysis_text, sentiment, context):
         try:
-            output = [
                 "# Financial Analysis Report\n\n",
-                f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
-                "## Financial Data\n```\n",
-                context,
-                "\n```\n\n"
             ]
-            sections = analysis_text.split('\n\n')
             current_section = None
-            for section in sections:
-                section = section.strip()
-                if not section:
                     continue
-                if "Business Status" in section:
-                    output.append("## Business Status\n")
-                elif "Financial Insights" in section:
-                    output.append("\n## Key Insights\n")
-                elif "Strategic Recommendations" in section:
-                    output.append("\n## Recommendations\n")
-                else:
-                    if not section.startswith('-'):
-                        section = f"- {section}"
-                    output.append(f"{section}\n")
-            return "".join(output)
         except Exception as e:
             logger.error(f"Error formatting response: {str(e)}")
-            return "Error formatting analysis results"
-def save_organized_data(structured_data, filename):
-    try:
-        with open(filename, 'w') as f:
-            json.dump(structured_data, f, indent=4)
-        return True
-    except Exception as e:
-        logger.error(f"Error saving data: {str(e)}")
-        return False
 def analyze_statements(income_statement, balance_sheet):
     try:
         if not income_statement or not balance_sheet:
-            return "Please upload both Income Statement and Balance Sheet CSV files."
-        # Read and organize data
-        try:
-            income_df = pd.read_csv(income_statement)
-            balance_df = pd.read_csv(balance_sheet)
-            # Clean and structure data
-            financial_data = {
-                "income_statement": income_df.to_dict(orient='records'),
-                "balance_sheet": balance_df.to_dict(orient='records')
-            }
-            # Save structured data
-            save_organized_data(financial_data, "organized_financial_data.json")
-            # Create analysis context
-            context = f"""Financial Data Summary:
-            Income Statement:
-            {income_df.to_string()}
-            Balance Sheet:
-            {balance_df.to_string()}
-            """
-            # Initialize analyzer and generate analysis
-            analyzer = FinancialAnalyzer()
-            result = analyzer.analyze_financials(context)
-            clear_gpu_memory()
-            return result
-        except Exception as e:
-            logger.error(f"Error processing files: {str(e)}")
-            raise
     except Exception as e:
-        logger.error(f"Analysis error: {str(e)}")
         return f"""Analysis Error: {str(e)}
-        Please verify:
-        1. Files are in CSV format
-        2. Files contain financial data
-        3. Files are not corrupted"""
 # Create Gradio interface
 iface = gr.Interface(
     fn=analyze_statements,
     inputs=[
         gr.File(
-            label="Upload Income Statement (CSV)",
             file_types=[".csv"]
         ),
         gr.File(
-            label="Upload Balance Sheet (CSV)",
             file_types=[".csv"]
         )
     ],
     outputs=gr.Markdown(),
-    title="AI Financial Statement Analyzer",
-    description="""## Financial Analysis Tool
-Upload your financial statements to get:
-- Business Status Assessment
-- Key Financial Insights
-- Strategic Recommendations
-Requirements:
-- CSV files with financial data
-- Standard financial statement format""",
-    flagging_mode="never"
 )
-# Launch the interface
 if __name__ == "__main__":
-    try:
-        iface.queue()
-        iface.launch(
-            share=False,
-            server_name="0.0.0.0",
-            server_port=7860
-        )
-    except Exception as e:
-        logger.error(f"Launch error: {str(e)}")
-        sys.exit(1)

 import logging
 import gc
 from transformers import pipeline
 # Setup logging
 logging.basicConfig(
         torch.cuda.empty_cache()
     gc.collect()
 class FinancialAnalyzer:
     def __init__(self):
         self.analysis_model = None
     def load_models(self):
         try:
             self.analysis_model = pipeline(
                 "text-generation",
                 model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
             )
             self.sentiment_model = pipeline(
                 "text-classification",
                 model="ProsusAI/finbert",
             logger.error(f"Error loading models: {str(e)}")
             raise
+    def format_number(self, number):
+        try:
+            if isinstance(number, str):
+                number = float(number.replace(',', '').replace('$', '').strip())
+            return f"${number:,.0f}"
+        except:
+            return str(number)
+    def process_dataframe(self, df, statement_type):
+        try:
+            df_cleaned = df.copy()
+            # Clean column names
+            df_cleaned.columns = df_cleaned.columns.str.strip()
+            # Clean numeric values
+            numeric_cols = df_cleaned.select_dtypes(include=['float64', 'int64']).columns
+            for col in numeric_cols:
+                df_cleaned[col] = pd.to_numeric(df_cleaned[col].astype(str).str.replace('[$,()]', '', regex=True), errors='coerce')
+            return df_cleaned
+        except Exception as e:
+            logger.error(f"Error processing {statement_type}: {str(e)}")
+            raise
+    def analyze_financials(self, income_df, balance_df):
         try:
+            # Process dataframes
+            income_clean = self.process_dataframe(income_df, "income_statement")
+            balance_clean = self.process_dataframe(balance_df, "balance_sheet")
+            # Create analysis context
+            context = self.create_analysis_context(income_clean, balance_clean)
+            # Generate sentiment
             sentiment = self.sentiment_model(
+                context[:512],
+                truncation=True
             )[0]
             # Generate analysis
+            analysis_prompt = f"""[INST] Analyze these financial metrics:
+{context}
+Market Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
+Provide concise analysis of:
+1. Financial Health
+2. Key Insights
+3. Strategic Recommendations
+[/INST]"""
             response = self.analysis_model(
                 analysis_prompt,
+                max_new_tokens=500,
+                temperature=0.7,
                 num_return_sequences=1,
                 truncation=True
             )
+            return self.format_response(response[0]['generated_text'], sentiment)
+        except Exception as e:
+            logger.error(f"Analysis error: {str(e)}")
+            return f"Error in analysis: {str(e)}"
+    def create_analysis_context(self, income_df, balance_df):
+        try:
+            # Extract latest year metrics
+            latest_metrics = {
+                'Revenue': income_df.loc[income_df['year'] == 'Total Net Revenue', '2025'].iloc[0],
+                'Net_Income': income_df.loc[income_df['year'] == 'Net Income', '2025'].iloc[0],
+                'Assets': balance_df.loc[balance_df['year'] == 'Total Assets', '2025'].iloc[0],
+                'Liabilities': balance_df.loc[balance_df['year'] == 'Total Liabilities', '2025'].iloc[0],
+                'Equity': balance_df.loc[balance_df['year'] == "Shareholder's Equity", '2025'].iloc[0]
+            }
+            return f"""Financial Metrics (2025):
+Revenue: {self.format_number(latest_metrics['Revenue'])}
+Net Income: {self.format_number(latest_metrics['Net_Income'])}
+Total Assets: {self.format_number(latest_metrics['Assets'])}
+Total Liabilities: {self.format_number(latest_metrics['Liabilities'])}
+Shareholder's Equity: {self.format_number(latest_metrics['Equity'])}"""
         except Exception as e:
+            logger.error(f"Error creating context: {str(e)}")
+            raise
+    def format_response(self, analysis_text, sentiment):
         try:
+            sections = [
                 "# Financial Analysis Report\n\n",
+                f"## Market Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n"
             ]
             current_section = None
+            for line in analysis_text.split('\n'):
+                line = line.strip()
+                if not line:
                     continue
+                if "Financial Health" in line:
+                    sections.append("## Financial Health\n")
+                elif "Key Insights" in line:
+                    sections.append("\n## Key Insights\n")
+                elif "Strategic Recommendations" in line:
+                    sections.append("\n## Strategic Recommendations\n")
+                elif line:
+                    sections.append(f"- {line}\n")
+            return "".join(sections)
         except Exception as e:
             logger.error(f"Error formatting response: {str(e)}")
+            return "Error formatting analysis"
 def analyze_statements(income_statement, balance_sheet):
     try:
         if not income_statement or not balance_sheet:
+            return "Please upload both financial statements."
+        income_df = pd.read_csv(income_statement)
+        balance_df = pd.read_csv(balance_sheet)
+        analyzer = FinancialAnalyzer()
+        result = analyzer.analyze_financials(income_df, balance_df)
+        clear_gpu_memory()
+        return result
     except Exception as e:
+        logger.error(f"Error: {str(e)}")
         return f"""Analysis Error: {str(e)}
+Please check:
+1. CSV format is correct
+2. Required financial data is present
+3. Files are not corrupted"""
 # Create Gradio interface
 iface = gr.Interface(
     fn=analyze_statements,
     inputs=[
         gr.File(
+            label="Income Statement",
             file_types=[".csv"]
         ),
         gr.File(
+            label="Balance Sheet",
             file_types=[".csv"]
         )
     ],
     outputs=gr.Markdown(),
+    title="Financial Statement Analyzer",
+    description="Upload financial statements for AI analysis",
+    theme="default",
+    allow_flagging=False
 )
+# Launch with basic configuration
 if __name__ == "__main__":
+    iface.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )