Spaces:

walaa2022
/

financial_analysis

Sleeping

App Files Files Community

walaa2022 commited on Dec 1, 2024

Commit

ab0bea5

verified ·

1 Parent(s): f712719

Update app.py

Browse files

Files changed (1) hide show

app.py +195 -160

app.py CHANGED Viewed

@@ -2,198 +2,233 @@ import gradio as gr
 import pandas as pd
 import numpy as np
 import json
-import chromadb
-from chromadb.config import Settings
-from datetime import datetime
-class FastFinancialAnalyzer:
     def __init__(self):
-        # Initialize ChromaDB
-        self.client = chromadb.Client(Settings(anonymized_telemetry=False))
-        # Create financial metrics collection
-        self.collection = self.client.create_collection(
-            name="financial_metrics_" + datetime.now().strftime("%Y%m%d_%H%M%S")
-        )
-        # Initialize ratio benchmarks
-        self.initialize_ratio_benchmarks()
-    def initialize_ratio_benchmarks(self):
-        """Initialize benchmark ratios for comparison"""
-        self.benchmarks = {
-            "liquidity_ratios": {
-                "current_ratio": {"good": 2.0, "warning": 1.0},
-                "quick_ratio": {"good": 1.0, "warning": 0.5}
             },
-            "profitability_ratios": {
-                "gross_margin": {"good": 0.4, "warning": 0.2},
-                "net_margin": {"good": 0.1, "warning": 0.05}
             },
-            "efficiency_ratios": {
-                "inventory_turnover": {"good": 4, "warning": 2},
-                "asset_turnover": {"good": 0.5, "warning": 0.25}
             }
         }
-    def calculate_ratios(self, balance_sheet_df, income_stmt_df):
-        """Calculate key financial ratios"""
         try:
-            ratios = {}
-            # Clean numeric data
-            for df in [balance_sheet_df, income_stmt_df]:
-                for col in df.select_dtypes(include=['object']).columns:
-                    df[col] = pd.to_numeric(df[col].astype(str).str.replace(r'[^\d.-]', ''), errors='coerce')
-            # Calculate ratios for each year
-            years = [col for col in balance_sheet_df.columns if str(col).isdigit()]
-            for year in years:
-                ratios[year] = {
-                    "liquidity": {
-                        "current_ratio": balance_sheet_df.loc[balance_sheet_df['Account'] == 'Total_Current_Assets', year].values[0] /
-                                       balance_sheet_df.loc[balance_sheet_df['Account'] == 'Total_Current_Liabilities', year].values[0],
-                        "quick_ratio": (balance_sheet_df.loc[balance_sheet_df['Account'] == 'Total_Current_Assets', year].values[0] -
-                                      balance_sheet_df.loc[balance_sheet_df['Account'] == 'Inventory', year].values[0]) /
-                                     balance_sheet_df.loc[balance_sheet_df['Account'] == 'Total_Current_Liabilities', year].values[0]
                     },
-                    "profitability": {
-                        "gross_margin": income_stmt_df.loc[income_stmt_df['Revenue Items'] == 'Gross Profit', year].values[0] /
-                                      income_stmt_df.loc[income_stmt_df['Revenue Items'] == 'Total Net Revenue', year].values[0],
-                        "net_margin": income_stmt_df.loc[income_stmt_df['Revenue Items'] == 'Net Earnings', year].values[0] /
-                                    income_stmt_df.loc[income_stmt_df['Revenue Items'] == 'Total Net Revenue', year].values[0]
                     },
-                    "growth": {
-                        "revenue_growth": None if year == years[0] else
-                            (income_stmt_df.loc[income_stmt_df['Revenue Items'] == 'Total Net Revenue', year].values[0] -
-                             income_stmt_df.loc[income_stmt_df['Revenue Items'] == 'Total Net Revenue', str(int(year)-1)].values[0]) /
-                            income_stmt_df.loc[income_stmt_df['Revenue Items'] == 'Total Net Revenue', str(int(year)-1)].values[0] * 100
                     }
                 }
-            return ratios
-        except Exception as e:
-            return f"Error calculating ratios: {str(e)}"
-    def analyze_trends(self, ratios):
-        """Analyze financial trends"""
-        trends = {
-            "liquidity": self.analyze_ratio_trend("current_ratio", ratios),
-            "profitability": self.analyze_ratio_trend("net_margin", ratios),
-            "growth": self.analyze_revenue_growth(ratios)
-        }
-        return trends
-    def analyze_ratio_trend(self, ratio_name, ratios):
-        """Analyze trend for a specific ratio"""
-        values = []
-        years = sorted(ratios.keys())
-        for year in years:
-            if ratio_name in ratios[year].get("liquidity", {}):
-                values.append(ratios[year]["liquidity"][ratio_name])
-            elif ratio_name in ratios[year].get("profitability", {}):
-                values.append(ratios[year]["profitability"][ratio_name])
-        if not values:
-            return "No data available"
-        trend = np.polyfit(range(len(values)), values, 1)[0]
-        if trend > 0.05:
-            return "Strong upward trend"
-        elif trend > 0:
-            return "Slight upward trend"
-        elif trend > -0.05:
-            return "Stable"
-        else:
-            return "Downward trend"
-    def analyze_revenue_growth(self, ratios):
-        """Analyze revenue growth trend"""
-        growth_rates = []
-        years = sorted(ratios.keys())[1:]  # Skip first year as it won't have growth rate
-        for year in years:
-            if ratios[year]["growth"]["revenue_growth"] is not None:
-                growth_rates.append(ratios[year]["growth"]["revenue_growth"])
-        if not growth_rates:
-            return "No growth data available"
-        avg_growth = np.mean(growth_rates)
-        if avg_growth > 10:
-            return f"Strong growth (avg {avg_growth:.1f}%)"
-        elif avg_growth > 0:
-            return f"Moderate growth (avg {avg_growth:.1f}%)"
-        else:
-            return f"Declining growth (avg {avg_growth:.1f}%)"
-    def generate_insights(self, ratios, trends):
-        """Generate actionable insights"""
-        insights = []
-        # Liquidity insights
-        current_ratio = ratios[max(ratios.keys())]["liquidity"]["current_ratio"]
-        if current_ratio < self.benchmarks["liquidity_ratios"]["current_ratio"]["warning"]:
-            insights.append("ALERT: Liquidity needs immediate attention")
-        elif current_ratio < self.benchmarks["liquidity_ratios"]["current_ratio"]["good"]:
-            insights.append("WATCH: Liquidity is below ideal levels")
-        # Profitability insights
-        net_margin = ratios[max(ratios.keys())]["profitability"]["net_margin"]
-        if net_margin > self.benchmarks["profitability_ratios"]["net_margin"]["good"]:
-            insights.append("STRONG: Excellent profit margins")
-        elif net_margin < self.benchmarks["profitability_ratios"]["net_margin"]["warning"]:
-            insights.append("ALERT: Profit margins need improvement")
-        # Growth insights
-        if "growth" in trends:
-            if "Strong" in trends["growth"]:
-                insights.append("POSITIVE: Strong revenue growth trend")
-            elif "Declining" in trends["growth"]:
-                insights.append("WATCH: Revenue growth is slowing")
-        return insights
     def analyze_financials(self, balance_sheet_file, income_stmt_file):
-        """Main analysis function"""
         try:
             # Read files
             balance_sheet_df = pd.read_csv(balance_sheet_file)
             income_stmt_df = pd.read_csv(income_stmt_file)
-            # Calculate ratios
-            ratios = self.calculate_ratios(balance_sheet_df, income_stmt_df)
-            # Analyze trends
-            trends = self.analyze_trends(ratios)
             # Generate insights
-            insights = self.generate_insights(ratios, trends)
-            # Prepare comprehensive analysis
             analysis = {
-                "Financial Ratios": ratios,
-                "Trend Analysis": trends,
-                "Key Insights": insights,
-                "Summary": {
-                    "Latest Year Analysis": {
-                        "Current Ratio": f"{ratios[max(ratios.keys())]['liquidity']['current_ratio']:.2f}",
-                        "Net Margin": f"{ratios[max(ratios.keys())]['profitability']['net_margin']:.2%}",
-                        "Revenue Growth": f"{ratios[max(ratios.keys())]['growth']['revenue_growth']:.2f}%" if ratios[max(ratios.keys())]['growth']['revenue_growth'] else "N/A"
-                    }
-                }
             }
             return json.dumps(analysis, indent=2)
         except Exception as e:
             return f"Error in analysis: {str(e)}"
 def create_interface():
-    analyzer = FastFinancialAnalyzer()
     iface = gr.Interface(
         fn=analyzer.analyze_financials,
@@ -201,9 +236,9 @@ def create_interface():
             gr.File(label="Balance Sheet (CSV)", type="filepath"),
             gr.File(label="Income Statement (CSV)", type="filepath")
         ],
-        outputs=gr.Textbox(label="Analysis Results", lines=20),
-        title="Fast Financial Statement Analyzer",
-        description="Upload financial statements for instant analysis with ratio calculations and trend detection."
     )
     return iface

 import pandas as pd
 import numpy as np
 import json
+from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
+import torch
+class FinancialAnalyzer:
     def __init__(self):
+        print("Initializing Financial Analyzer...")
+        self.initialize_models()
+        self.initialize_benchmarks()
+    def initialize_models(self):
+        print("Loading models...")
+        self.tiny_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+        self.tiny_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+        self.finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
+        self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
+        print("Models loaded successfully!")
+    def initialize_benchmarks(self):
+        self.industry_benchmarks = {
+            "Liquidity": {
+                "Current Ratio": 2.0,
+                "Quick Ratio": 1.0
             },
+            "Profitability": {
+                "Gross Margin": 40.0,
+                "Operating Margin": 15.0,
+                "Net Margin": 10.0
             },
+            "Efficiency": {
+                "Asset Turnover": 2.0,
+                "Inventory Turnover": 6.0
             }
         }
+    def clean_number(self, value):
+        """Clean numerical values from files (all in thousands)"""
+        if isinstance(value, str):
+            # Remove currency symbols, commas, spaces and handle parentheses
+            value = value.replace(',', '').replace('$', '').replace(' ', '')
+            value = value.replace('(', '-').replace(')', '')
         try:
+            return float(value)
+        except:
+            return 0.0
+    def calculate_metrics(self, balance_sheet_df, income_stmt_df):
+        """Calculate financial metrics (all values in thousands)"""
+        metrics = {}
+        years = [str(year) for year in range(2021, 2026)]
+        for year in years:
+            try:
+                # Balance Sheet metrics
+                total_current_assets = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Total_Type'] == 'Total_Current_Assets', year].iloc[0])
+                total_assets = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Total_Type'] == 'Total_Assets', year].iloc[0])
+                total_current_liabilities = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Total_Type'] == 'Total_Current_Liabilities', year].iloc[0])
+                total_liabilities = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Total_Type'] == 'Total_Liabilities', year].iloc[0])
+                total_equity = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Total_Type'] == 'Total_Shareholders_Equity', year].iloc[0])
+                inventory = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Account'] == 'Inventory', year].iloc[0])
+                # Income Statement metrics
+                revenue = self.clean_number(income_stmt_df.loc[income_stmt_df.get('Revenue Items') == 'Total Net Revenue', year].iloc[0])
+                gross_profit = self.clean_number(income_stmt_df.loc[income_stmt_df.get('Item') == 'Gross Profit', year].iloc[0])
+                operating_expenses = self.clean_number(income_stmt_df.loc[income_stmt_df.get('Expense Category') == 'Total Operating Expenses', year].iloc[0])
+                ebit = self.clean_number(income_stmt_df.loc[income_stmt_df.get('Item') == 'EBIT', year].iloc[0])
+                net_earnings = self.clean_number(income_stmt_df.loc[income_stmt_df.get('Item') == 'Net Earnings', year].iloc[0])
+                metrics[year] = {
+                    "Liquidity": {
+                        "Current Ratio": round(total_current_assets / total_current_liabilities, 2) if total_current_liabilities != 0 else 0,
+                        "Quick Ratio": round((total_current_assets - inventory) / total_current_liabilities, 2) if total_current_liabilities != 0 else 0
+                    },
+                    "Profitability": {
+                        "Gross Margin": round((gross_profit / revenue * 100), 2) if revenue != 0 else 0,
+                        "Operating Margin": round((ebit / revenue * 100), 2) if revenue != 0 else 0,
+                        "Net Margin": round((net_earnings / revenue * 100), 2) if revenue != 0 else 0,
+                        "ROE": round((net_earnings / total_equity * 100), 2) if total_equity != 0 else 0,
+                        "ROA": round((net_earnings / total_assets * 100), 2) if total_assets != 0 else 0
                     },
+                    "Efficiency": {
+                        "Asset Turnover": round(revenue / total_assets, 2) if total_assets != 0 else 0,
+                        "Inventory Turnover": round(operating_expenses / inventory, 2) if inventory != 0 else 0
                     },
+                    "Leverage": {
+                        "Debt to Equity": round(total_liabilities / total_equity, 2) if total_equity != 0 else 0,
+                        "Debt Ratio": round(total_liabilities / total_assets, 2) if total_assets != 0 else 0
+                    },
+                    "Growth": {
+                        "Revenue": None if year == '2021' else
+                            round(((revenue - self.clean_number(income_stmt_df.loc[income_stmt_df.get('Revenue Items') == 'Total Net Revenue', str(int(year)-1)].iloc[0])) /
+                             self.clean_number(income_stmt_df.loc[income_stmt_df.get('Revenue Items') == 'Total Net Revenue', str(int(year)-1)].iloc[0]) * 100), 2)
                     }
                 }
+                # Add key absolute values (in thousands)
+                metrics[year]["Key Values"] = {
+                    "Total Assets": total_assets,
+                    "Total Liabilities": total_liabilities,
+                    "Total Equity": total_equity,
+                    "Revenue": revenue,
+                    "Net Earnings": net_earnings
+                }
+            except Exception as e:
+                print(f"Error calculating metrics for year {year}: {str(e)}")
+                metrics[year] = "Error in calculation"
+        return metrics
+    def create_insights_prompt(self, metrics, balance_sheet, income_stmt):
+        latest_year_metrics = metrics['2025']
+        return f"""<human>Analyze these financial statements (all values in thousands) and provide detailed insights:
+Key Metrics for Latest Year (2025):
+- Current Ratio: {latest_year_metrics['Liquidity']['Current Ratio']}
+- Net Margin: {latest_year_metrics['Profitability']['Net Margin']}%
+- Revenue: {latest_year_metrics['Key Values']['Revenue']:,.0f}
+- Net Earnings: {latest_year_metrics['Key Values']['Net Earnings']:,.0f}
+Balance Sheet Trends:
+{balance_sheet[:800]}
+Income Statement Trends:
+{income_stmt[:800]}
+Provide specific analysis on:
+1. Financial Health and Stability
+2. Profitability and Efficiency
+3. Growth Trends and Patterns
+4. Risk Factors and Concerns
+5. Strategic Recommendations
+6. Future Outlook</human>"""
+    def generate_ai_insights(self, prompt):
+        inputs = self.tiny_tokenizer(prompt, return_tensors="pt", truncation=True)
+        outputs = self.tiny_model.generate(
+            inputs["input_ids"],
+            max_length=1000,
+            temperature=0.7,
+            top_p=0.95,
+            do_sample=True
+        )
+        return self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    def analyze_sentiment(self, metrics):
+        latest_metrics = json.dumps(metrics['2025'])
+        inputs = self.finbert_tokenizer(latest_metrics, return_tensors="pt", truncation=True)
+        outputs = self.finbert_model(**inputs)
+        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
+        sentiment_labels = ['negative', 'neutral', 'positive']
+        return {
+            'sentiment': sentiment_labels[probs.argmax().item()],
+            'confidence': f"{probs.max().item():.2f}"
+        }
+    def generate_roadmap(self, metrics):
+        latest_metrics = metrics['2025']
+        roadmap = {
+            "Immediate Actions (0-6 months)": [],
+            "Short-term Goals (6-12 months)": [],
+            "Medium-term Strategy (1-2 years)": [],
+            "Long-term Vision (3-5 years)": []
+        }
+        # Generate recommendations based on metrics comparison
+        current_ratio = latest_metrics["Liquidity"]["Current Ratio"]
+        net_margin = latest_metrics["Profitability"]["Net Margin"]
+        asset_turnover = latest_metrics["Efficiency"]["Asset Turnover"]
+        # Add specific recommendations based on metric analysis
+        if current_ratio > self.industry_benchmarks["Liquidity"]["Current Ratio"] * 1.5:
+            roadmap["Short-term Goals (6-12 months)"].append("Consider optimizing excess working capital")
+        elif current_ratio < self.industry_benchmarks["Liquidity"]["Current Ratio"]:
+            roadmap["Immediate Actions (0-6 months)"].append("Improve working capital management")
+        if net_margin < self.industry_benchmarks["Profitability"]["Net Margin"]:
+            roadmap["Immediate Actions (0-6 months)"].append("Review cost structure")
+            roadmap["Short-term Goals (6-12 months)"].append("Implement margin improvement initiatives")
+        if asset_turnover < self.industry_benchmarks["Efficiency"]["Asset Turnover"]:
+            roadmap["Medium-term Strategy (1-2 years)"].append("Optimize asset utilization")
+            roadmap["Long-term Vision (3-5 years)"].append("Consider strategic asset restructuring")
+        return roadmap
     def analyze_financials(self, balance_sheet_file, income_stmt_file):
         try:
             # Read files
             balance_sheet_df = pd.read_csv(balance_sheet_file)
             income_stmt_df = pd.read_csv(income_stmt_file)
+            # Also read raw content for context
+            with open(balance_sheet_file, 'r') as f:
+                balance_sheet_content = f.read()
+            with open(income_stmt_file, 'r') as f:
+                income_stmt_content = f.read()
+            # Calculate metrics
+            metrics = self.calculate_metrics(balance_sheet_df, income_stmt_df)
             # Generate insights
+            insights_prompt = self.create_insights_prompt(metrics, balance_sheet_content, income_stmt_content)
+            insights = self.generate_ai_insights(insights_prompt)
+            # Generate sentiment and roadmap
+            sentiment = self.analyze_sentiment(metrics)
+            roadmap = self.generate_roadmap(metrics)
+            # Compile analysis
             analysis = {
+                "Financial Metrics": metrics,
+                "AI Insights": insights,
+                "Sentiment Analysis": sentiment,
+                "Strategic Roadmap": roadmap,
+                "Analysis Period": "2021-2025",
+                "Note": "All values in thousands"
             }
             return json.dumps(analysis, indent=2)
         except Exception as e:
             return f"Error in analysis: {str(e)}"
 def create_interface():
+    analyzer = FinancialAnalyzer()
     iface = gr.Interface(
         fn=analyzer.analyze_financials,
             gr.File(label="Balance Sheet (CSV)", type="filepath"),
             gr.File(label="Income Statement (CSV)", type="filepath")
         ],
+        outputs=gr.Textbox(label="Analysis Results", lines=25),
+        title="Financial Statement Analyzer",
+        description="Upload financial statements for comprehensive analysis including AI insights, sentiment analysis, and strategic roadmap. (All values in thousands)"
     )
     return iface