import gradio as gr import pandas as pd import json from transformers import AutoTokenizer, AutoModelForCausalLM import torch import re class FinancialAnalyzer: def __init__(self): print("Initializing Analyzer...") self.initialize_model() print("Initialization complete!") def initialize_model(self): """Initialize TinyLlama model""" try: self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") self.model.eval() except Exception as e: print(f"Error initializing model: {str(e)}") raise def clean_number(self, value): """Clean and convert numerical values""" try: if isinstance(value, str): # Remove currency symbols, commas, spaces value = value.replace('$', '').replace(',', '').strip() # Handle parentheses for negative numbers if '(' in value and ')' in value: value = '-' + value.replace('(', '').replace(')', '') return float(value or 0) except: return 0.0 def is_valid_markdown(self, file_path): """Check if a file is a valid Markdown file""" try: with open(file_path, 'r') as f: content = f.read() # Simple check for Markdown structure return any(line.startswith('#') or '|' in line for line in content.split('\n')) except: return False def parse_financial_data(self, content): """Parse markdown content into structured data""" try: data = {} current_section = "" current_table = [] headers = None for line in content.split('\n'): if line.startswith('#'): if current_table and headers: data[current_section] = self.process_table(headers, current_table) current_section = line.strip('# ') current_table = [] headers = None elif '|' in line: if '-|-' not in line: # Skip separator lines row = [cell.strip() for cell in line.split('|')[1:-1]] if not headers: headers = row else: current_table.append(row) # Process last table if current_table and headers: data[current_section] = self.process_table(headers, current_table) return data except Exception as e: print(f"Error parsing financial data: {str(e)}") return {} def process_table(self, headers, rows): """Process table data into structured format""" try: processed_data = {} for row in rows: if len(row) == len(headers): item_name = row[0].strip('*').strip() processed_data[item_name] = {} for i, value in enumerate(row[1:], 1): processed_data[item_name][headers[i]] = self.clean_number(value) return processed_data except Exception as e: print(f"Error processing table: {str(e)}") return {} def extract_metrics(self, income_data, balance_data): """Extract and calculate key financial metrics""" try: metrics = { "Revenue": { "2025": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2025"), "2021": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2021") }, "Profitability": { "Gross_Profit_2025": self.get_nested_value(income_data, "Cost and Gross Profit", "Gross Profit", "2025"), "Net_Earnings_2025": self.get_nested_value(income_data, "Profit Summary", "Net Earnings", "2025"), "Operating_Expenses_2025": self.get_nested_value(income_data, "Operating Expenses", "Total Operating Expenses", "2025") }, "Balance_Sheet": { "Total_Assets_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Assets", "2025"), "Total_Liabilities_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Liabilities", "2025"), "Equity_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Shareholders_Equity", "2025") } } # Calculate additional metrics revenue_2025 = metrics["Revenue"]["2025"] if revenue_2025 != 0: metrics["Profitability"]["Gross_Margin"] = (metrics["Profitability"]["Gross_Profit_2025"] / revenue_2025) * 100 metrics["Profitability"]["Net_Margin"] = (metrics["Profitability"]["Net_Earnings_2025"] / revenue_2025) * 100 return metrics except Exception as e: print(f"Error extracting metrics: {str(e)}") return {} def get_nested_value(self, data, section, key, year): """Safely get nested dictionary value""" try: return data.get(section, {}).get(key, {}).get(year, 0) except: return 0 def generate_analysis_prompt(self, metrics): """Create analysis prompt from metrics""" try: return f""" Analyze these financial metrics for 2025 with a focus on business performance, trends, and risks: Revenue and Profitability: - Total Revenue: ${metrics['Revenue']['2025']:,.1f}M - Gross Profit: ${metrics['Profitability']['Gross_Profit_2025']:,.1f}M - Net Earnings: ${metrics['Profitability']['Net_Earnings_2025']:,.1f}M - Gross Margin: {metrics['Profitability'].get('Gross_Margin', 0):,.1f}% - Net Margin: {metrics['Profitability'].get('Net_Margin', 0):,.1f}% Balance Sheet Strength: - Total Assets: ${metrics['Balance_Sheet']['Total_Assets_2025']:,.1f}M - Total Liabilities: ${metrics['Balance_Sheet']['Total_Liabilities_2025']:,.1f}M - Shareholders' Equity: ${metrics['Balance_Sheet']['Equity_2025']:,.1f}M Explain key financial ratios and their implications. Discuss strategies for growth and risk mitigation. """ except Exception as e: print(f"Error generating prompt: {str(e)}") return "" def generate_analysis(self, prompt): """Generate analysis using TinyLlama""" try: inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500) outputs = self.model.generate( inputs["input_ids"], max_new_tokens=500, # Generate up to 500 new tokens temperature=0.7, top_p=0.9, do_sample=True, pad_token_id=self.tokenizer.eos_token_id, no_repeat_ngram_size=3 ) analysis = self.tokenizer.decode(outputs[0], skip_special_tokens=True) # Clean up the response analysis = analysis.split("")[-1].strip() return analysis except Exception as e: return f"Error generating analysis: {str(e)}" def analyze_financials(self, balance_sheet_file, income_stmt_file): """Main analysis function""" try: # Validate files if not (self.is_valid_markdown(balance_sheet_file) and self.is_valid_markdown(income_stmt_file)): return "Error: One or both files are invalid or not in Markdown format." # Read files with open(balance_sheet_file, 'r') as f: balance_sheet = f.read() with open(income_stmt_file, 'r') as f: income_stmt = f.read() # Parse financial data income_data = self.parse_financial_data(income_stmt) balance_data = self.parse_financial_data(balance_sheet) # Extract key metrics metrics = self.extract_metrics(income_data, balance_data) # Generate and get analysis prompt = self.generate_analysis_prompt(metrics) analysis = self.generate_analysis(prompt) # Prepare results results = { "Financial Analysis": { "Key Metrics": metrics, "AI Insights": analysis, "Analysis Period": "2021-2025", "Note": "All monetary values in millions ($M)" } } return json.dumps(results, indent=2) except Exception as e: return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}" def create_interface(): analyzer = FinancialAnalyzer() iface = gr.Interface( fn=analyzer.analyze_financials, inputs=[ gr.File(label="Balance Sheet (Markdown)", type="filepath"), gr.File(label="Income Statement (Markdown)", type="filepath") ], outputs=gr.Textbox(label="Analysis Results", lines=25), title="Financial Statement Analyzer", description="Upload financial statements in Markdown format for AI-powered analysis" ) return iface if __name__ == "__main__": iface = create_interface() iface.launch()