import gradio as gr import pandas as pd import json from transformers import ( AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification ) import torch import numpy as np import re class FinancialDataset: def __init__(self, texts, labels, tokenizer, max_length=512): self.texts = texts self.labels = labels self.tokenizer = tokenizer self.max_length = max_length def __len__(self): return len(self.texts) def __getitem__(self, idx): text = str(self.texts[idx]) inputs = self.tokenizer( text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt' ) return { 'input_ids': inputs['input_ids'].squeeze(), 'attention_mask': inputs['attention_mask'].squeeze(), 'labels': torch.tensor(self.labels[idx], dtype=torch.long) } class FinancialAnalyzer: def __init__(self): print("Initializing Analyzer...") self.last_metrics = {} self.initialize_models() print("Initialization complete!") def initialize_models(self): """Initialize TinyLlama model""" try: self.llama_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") self.llama_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") self.llama_model.eval() print("Models loaded successfully!") except Exception as e: print(f"Error initializing models: {str(e)}") raise def clean_number(self, value): """Clean and convert numerical values""" try: if isinstance(value, str): value = value.replace('$', '').replace(',', '').strip() if '(' in value and ')' in value: value = '-' + value.replace('(', '').replace(')', '') return float(value or 0) except: return 0.0 def is_valid_markdown(self, file_path): """Check if a file is a valid Markdown file""" try: with open(file_path, 'r') as f: content = f.read() return any(line.startswith('#') or '|' in line for line in content.split('\n')) except: return False def parse_financial_data(self, content): """Parse markdown content into structured data""" try: data = {} current_section = "" current_table = [] headers = None for line in content.split('\n'): if line.startswith('#'): if current_table and headers: data[current_section] = self.process_table(headers, current_table) current_section = line.strip('# ') current_table = [] headers = None elif '|' in line: if '-|-' not in line: row = [cell.strip() for cell in line.split('|')[1:-1]] if not headers: headers = row else: current_table.append(row) if current_table and headers: data[current_section] = self.process_table(headers, current_table) return data except Exception as e: print(f"Error parsing financial data: {str(e)}") return {} def process_table(self, headers, rows): """Process table data into structured format""" try: processed_data = {} for row in rows: if len(row) == len(headers): item_name = row[0].strip('*').strip() processed_data[item_name] = {} for i, value in enumerate(row[1:], 1): processed_data[item_name][headers[i]] = self.clean_number(value) return processed_data except Exception as e: print(f"Error processing table: {str(e)}") return {} def get_nested_value(self, data, section, key, year): """Safely get nested dictionary value""" try: return data.get(section, {}).get(key, {}).get(str(year), 0) except: return 0 def calculate_metrics(self, income_data, balance_data): """Calculate all CFI standard financial metrics""" try: metrics = {} # 1. Gross Profit Margin Ratio # 1. Gross Profit Margin revenue = self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2025") cogs = self.get_nested_value(income_data, "Cost and Gross Profit", "Cost of Goods Sold", "2025") gross_profit = revenue - cogs metrics['gross_profit_margin'] = (gross_profit / revenue) * 100 if revenue != 0 else 0 # 2. Current Ratio current_assets = self.get_nested_value(balance_data, "Key Totals", "Total_Current_Assets", "2025") current_liabilities = self.get_nested_value(balance_data, "Key Totals", "Total_Current_Liabilities", "2025") metrics['current_ratio'] = current_assets / current_liabilities if current_liabilities != 0 else 0 # 3. Debt Ratio total_liabilities = self.get_nested_value(balance_data, "Key Totals", "Total_Liabilities", "2025") total_assets = self.get_nested_value(balance_data, "Key Totals", "Total_Assets", "2025") metrics['debt_ratio'] = (total_liabilities / total_assets) * 100 if total_assets != 0 else 0 # 4. Sustainable Growth Rate (SGR) net_income = self.get_nested_value(income_data, "Profit Summary", "Net Earnings", "2025") equity = self.get_nested_value(balance_data, "Key Totals", "Total_Shareholders_Equity", "2025") dividends = self.get_nested_value(income_data, "Profit Summary", "Dividends Paid", "2025") roe = (net_income / equity) * 100 if equity != 0 else 0 retention_ratio = (net_income - dividends) / net_income if net_income != 0 else 0 metrics['sgr'] = roe * retention_ratio / 100 if roe != 0 else 0 # 5. Accounts Receivable Turnover accounts_receivable = self.get_nested_value(balance_data, "Balance Sheet Data 2021-2025", "Accounts_Receivable", "2025") metrics['ar_turnover'] = revenue / accounts_receivable if accounts_receivable != 0 else 0 # 6. Return on Equity (ROE) metrics['roe'] = roe # 7. Net Profit Margin metrics['net_profit_margin'] = (net_income / revenue) * 100 if revenue != 0 else 0 # 8. Retained Earnings Ratio retained_earnings = self.get_nested_value(balance_data, "Balance Sheet Data 2021-2025", "Retained_Earnings", "2025") metrics['retained_earnings_ratio'] = (retained_earnings / total_assets) * 100 if total_assets != 0 else 0 # 9. Revenue Growth (YoY) revenue_2024 = self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2024") metrics['revenue_growth'] = ((revenue / revenue_2024) - 1) * 100 if revenue_2024 != 0 else 0 # 10. Revenue CAGR (2021-2025) revenue_2021 = self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2021") metrics['revenue_cagr'] = ((revenue / revenue_2021) ** (1 / 4) - 1) * 100 if revenue_2021 != 0 else 0 return metrics except Exception as e: print(f"Error calculating metrics: {e}") return {} def analyze_financials(self, balance_sheet_path, income_statement_path): try: # Validate markdown files if not self.is_valid_markdown(balance_sheet_path): return "Invalid Balance Sheet file format. Please upload a valid Markdown file." if not self.is_valid_markdown(income_statement_path): return "Invalid Income Statement file format. Please upload a valid Markdown file." # Read and parse files with open(balance_sheet_path, 'r') as f: balance_content = f.read() with open(income_statement_path, 'r') as f: income_content = f.read() balance_data = self.parse_financial_data(balance_content) income_data = self.parse_financial_data(income_content) # Calculate metrics metrics = self.calculate_metrics(income_data, balance_data) # Generate analysis return self.generate_analysis(metrics) except Exception as e: return f"Error analyzing financials: {e}" def generate_analysis(self, metrics): """Generate comprehensive analysis""" try: prompt = f"""[INST] As a financial analyst, provide a comprehensive analysis based on these metrics: 1. Profitability: - Gross Profit Margin: {metrics.get('gross_profit_margin', 0):.2f}% - Net Profit Margin: {metrics.get('net_profit_margin', 0):.2f}% - Return on Equity: {metrics.get('roe', 0):.2f}% 2. Liquidity & Efficiency: - Current Ratio: {metrics.get('current_ratio', 0):.2f} - Accounts Receivable Turnover: {metrics.get('ar_turnover', 0):.2f} 3. Financial Structure: - Debt Ratio: {metrics.get('debt_ratio', 0):.2f}% - Retained Earnings Ratio: {metrics.get('retained_earnings_ratio', 0):.2f}% 4. Growth: - Sustainable Growth Rate: {metrics.get('sgr', 0):.2f}% - Revenue Growth (YoY): {metrics.get('revenue_growth', 0):.2f}% Provide: 1. Overall financial health assessment 2. Key strengths and concerns 3. Operational efficiency analysis 4. Specific recommendations for improvement [/INST]""" inputs = self.llama_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048) outputs = self.llama_model.generate( inputs["input_ids"], max_new_tokens=1024, min_new_tokens=200, temperature=0.7, top_p=0.95, repetition_penalty=1.2, length_penalty=1.5 ) analysis = self.llama_tokenizer.decode(outputs[0], skip_special_tokens=True) if len(analysis.split()) < 100: return self.generate_fallback_analysis(metrics) return analysis except Exception as e: print(f"Error generating analysis: {str(e)}") return self.generate_fallback_analysis(metrics) def generate_fallback_analysis(self, metrics): """Generate basic analysis when model fails""" try: analysis = f"""Financial Analysis Summary: 1. Profitability Assessment: - Gross Profit Margin: {metrics.get('gross_profit_margin', 0):.2f}% ({self.interpret_metric('gross_profit_margin', metrics.get('gross_profit_margin', 0))}) - Net Profit Margin: {metrics.get('net_profit_margin', 0):.2f}% ({self.interpret_metric('net_profit_margin', metrics.get('net_profit_margin', 0))}) - Return on Equity: {metrics.get('roe', 0):.2f}% ({self.interpret_metric('roe', metrics.get('roe', 0))}) 2. Liquidity & Efficiency Analysis: - Current Ratio: {metrics.get('current_ratio', 0):.2f} ({self.interpret_metric('current_ratio', metrics.get('current_ratio', 0))}) - AR Turnover: {metrics.get('ar_turnover', 0):.2f} ({self.interpret_metric('ar_turnover', metrics.get('ar_turnover', 0))}) 3. Financial Structure: - Debt Ratio: {metrics.get('debt_ratio', 0):.2f}% ({self.interpret_metric('debt_ratio', metrics.get('debt_ratio', 0))}) - Retained Earnings Ratio: {metrics.get('retained_earnings_ratio', 0):.2f}% ({self.interpret_metric('retained_earnings_ratio', metrics.get('retained_earnings_ratio', 0))}) 4. Growth & Sustainability: - Sustainable Growth Rate: {metrics.get('sgr', 0):.2f}% ({self.interpret_metric('sgr', metrics.get('sgr', 0))}) - Revenue Growth: {metrics.get('revenue_growth', 0):.2f}% ({self.interpret_metric('revenue_growth', metrics.get('revenue_growth', 0))}) {self.generate_recommendations(metrics)}""" return analysis except Exception as e: return f"Error generating fallback analysis: {str(e)}" def interpret_metric(self, metric_name, value): """Interpret individual metrics based on CFI standards""" interpretations = { 'gross_profit_margin': lambda x: 'Strong' if x > 40 else 'Adequate' if x > 30 else 'Needs improvement', 'current_ratio': lambda x: 'Strong' if x > 2 else 'Adequate' if x > 1 else 'Concerning', 'debt_ratio': lambda x: 'Conservative' if x < 40 else 'Moderate' if x < 60 else 'High risk', 'ar_turnover': lambda x: 'Excellent' if x > 8 else 'Good' if x > 4 else 'Needs improvement', 'roe': lambda x: 'Strong' if x > 15 else 'Adequate' if x > 10 else 'Below target', 'net_profit_margin': lambda x: 'Strong' if x > 10 else 'Adequate' if x > 5 else 'Needs improvement', 'retained_earnings_ratio': lambda x: 'Strong' if x > 30 else 'Adequate' if x > 15 else 'Low retention', 'sgr': lambda x: 'Strong' if x > 10 else 'Moderate' if x > 5 else 'Limited growth potential', 'revenue_growth': lambda x: 'Strong' if x > 10 else 'Moderate' if x > 5 else 'Below industry average' } try: return interpretations.get(metric_name, lambda x: 'No interpretation')(value) except: return 'Unable to interpret' def generate_recommendations(self, metrics): """Generate specific recommendations based on metrics""" recommendations = [] if metrics.get('gross_profit_margin', 0) < 30: recommendations.append("- Review pricing strategy and cost structure to improve gross margins") if metrics.get('current_ratio', 0) < 1.5: recommendations.append("- Strengthen working capital management to improve liquidity") if metrics.get('debt_ratio', 0) > 60: recommendations.append("- Consider debt reduction strategies to improve financial flexibility") if metrics.get('ar_turnover', 0) < 4: recommendations.append("- Improve accounts receivable collection practices") if metrics.get('roe', 0) < 10: recommendations.append("- Focus on improving operational efficiency to enhance returns") if metrics.get('revenue_growth', 0) < 5: recommendations.append("- Develop strategies to accelerate revenue growth") recommendations.append("- Consider strategic acquisitions or new market entry") return "Key Recommendations:\n" + "\n".join(recommendations) def analyze_financials(self, balance_sheet_file, income_stmt_file): """Main analysis function""" try: # Validate input files if not (self.is_valid_markdown(balance_sheet_file) and self.is_valid_markdown(income_stmt_file)): return "Error: One or both files are invalid or not in Markdown format." # Read files with open(balance_sheet_file, 'r') as f: balance_sheet = f.read() with open(income_stmt_file, 'r') as f: income_stmt = f.read() # Process financial data income_data = self.parse_financial_data(income_stmt) balance_data = self.parse_financial_data(balance_sheet) # Calculate metrics metrics = self.calculate_metrics(income_data, balance_data) self.last_metrics = metrics # Generate analysis analysis = self.generate_analysis(metrics) # Prepare final results results = { "Financial Analysis": { "Key Metrics": { "Profitability": { "Gross Profit Margin": f"{metrics['gross_profit_margin']:.2f}%", "Net Profit Margin": f"{metrics['net_profit_margin']:.2f}%", "Return on Equity": f"{metrics['roe']:.2f}%" }, "Liquidity": { "Current Ratio": f"{metrics['current_ratio']:.2f}", "Accounts Receivable Turnover": f"{metrics['ar_turnover']:.2f}" }, "Solvency": { "Debt Ratio": f"{metrics['debt_ratio']:.2f}%", "Retained Earnings Ratio": f"{metrics['retained_earnings_ratio']:.2f}%" }, "Growth": { "Sustainable Growth Rate": f"{metrics['sgr']:.2f}%", "Revenue Growth (YoY)": f"{metrics['revenue_growth']:.2f}%" } }, "Analysis": analysis, "Analysis Period": "2021-2025", "Note": "Analysis based on CFI standards" } } return json.dumps(results, indent=2) except Exception as e: return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}" def fine_tune_models(self, train_texts, train_labels, epochs=3): """Fine-tune the model with custom data""" try: # Prepare dataset train_dataset = FinancialDataset(train_texts, train_labels, self.llama_tokenizer) # Training arguments training_args = TrainingArguments( output_dir="./financial_model_tuned", num_train_epochs=epochs, per_device_train_batch_size=4, logging_dir="./logs", logging_steps=10, save_steps=50, eval_steps=50, learning_rate=2e-5, weight_decay=0.01, warmup_steps=500 ) # Initialize trainer trainer = Trainer( model=self.llama_model, args=training_args, train_dataset=train_dataset ) # Fine-tune the model trainer.train() # Save the fine-tuned model self.llama_model.save_pretrained("./financial_model_tuned") self.llama_tokenizer.save_pretrained("./financial_model_tuned") print("Fine-tuning completed successfully!") except Exception as e: print(f"Error in fine-tuning: {str(e)}") def create_interface(): analyzer = FinancialAnalyzer() iface = gr.Interface( fn=analyzer.analyze_financials, inputs=[ gr.File(label="Balance Sheet (Markdown)", type="filepath"), gr.File(label="Income Statement (Markdown)", type="filepath") ], outputs=gr.Textbox(label="Analysis Results", lines=25), title="AI Financial Statement Analyzer", description="""Upload financial statements in Markdown format for AI-powered analysis. Analysis is based on Corporate Finance Institute (CFI) standards.""", cache_examples=False ) return iface if __name__ == "__main__": iface = create_interface() iface.launch()