import gradio as gr import pandas as pd import numpy as np import json import re from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification import torch class FinancialAnalyzer: def __init__(self): print("Initializing Financial Analyzer...") self.initialize_models() def initialize_models(self): print("Loading models...") self.tiny_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") self.tiny_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") self.finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert") self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert") print("Models loaded successfully!") def parse_markdown_table(self, markdown_content): """Parse markdown table into pandas DataFrame""" # Split content into lines lines = markdown_content.strip().split('\n') # Find table start (line with |) table_lines = [] headers = None current_table = [] for line in lines: if '|' in line: # Skip separator lines (contains ---) if '-|-' in line: continue # Clean and split the line row = [cell.strip() for cell in line.split('|')[1:-1]] if headers is None: headers = row else: current_table.append(row) # Create DataFrame df = pd.DataFrame(current_table, columns=headers) return df def extract_financial_data(self, markdown_content): """Convert markdown content to a structured text format""" # Remove markdown formatting clean_text = markdown_content.replace('#', '').replace('*', '') # Extract tables tables = {} current_section = "General" for line in clean_text.split('\n'): if line.strip() and not line.startswith('|'): current_section = line.strip() elif '|' in line: if current_section not in tables: tables[current_section] = [] tables[current_section].append(line) # Convert to text format structured_text = [] for section, content in tables.items(): structured_text.append(f"\n{section}:") if content: df = self.parse_markdown_table('\n'.join(content)) structured_text.append(df.to_string()) return '\n'.join(structured_text) def analyze_financials(self, balance_sheet_file, income_stmt_file): """Main analysis function""" try: # Read markdown files with open(balance_sheet_file, 'r') as f: balance_sheet_content = f.read() with open(income_stmt_file, 'r') as f: income_stmt_content = f.read() # Convert to structured text structured_balance = self.extract_financial_data(balance_sheet_content) structured_income = self.extract_financial_data(income_stmt_content) # Create analysis prompt prompt = f"""Please analyze these financial statements and provide detailed insights: Financial Statements Analysis (2021-2025) Balance Sheet Summary: {structured_balance} Income Statement Summary: {structured_income} Please provide a detailed analysis including: 1. Financial Health Assessment - Liquidity position - Capital structure - Asset efficiency 2. Profitability Analysis - Revenue trends - Cost management - Profit margins 3. Growth Analysis - Year-over-year growth rates - Market position - Future growth potential 4. Risk Assessment - Operating risks - Financial risks - Strategic risks 5. Recommendations - Short-term actions - Medium-term strategy - Long-term planning 6. Future Outlook - Market conditions - Company positioning - Growth opportunities""" # Generate AI analysis inputs = self.tiny_tokenizer(prompt, return_tensors="pt", truncation=True) outputs = self.tiny_model.generate( inputs["input_ids"], max_length=2048, temperature=0.7, top_p=0.95, do_sample=True ) analysis = self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True) # Generate sentiment sentiment = self.analyze_sentiment(structured_balance + structured_income) # Compile results results = { "Financial Analysis": analysis, "Sentiment Analysis": sentiment, "Analysis Period": "2021-2025", "Note": "All values in millions ($M)" } return json.dumps(results, indent=2) except Exception as e: return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}" def analyze_sentiment(self, text): inputs = self.finbert_tokenizer(text, return_tensors="pt", truncation=True, max_length=512) outputs = self.finbert_model(**inputs) probs = torch.nn.functional.softmax(outputs.logits, dim=1) sentiment_labels = ['negative', 'neutral', 'positive'] return { 'sentiment': sentiment_labels[probs.argmax().item()], 'confidence': f"{probs.max().item():.2f}" } def create_interface(): analyzer = FinancialAnalyzer() iface = gr.Interface( fn=analyzer.analyze_financials, inputs=[ gr.File(label="Balance Sheet (Markdown)", type="filepath"), gr.File(label="Income Statement (Markdown)", type="filepath") ], outputs=gr.Textbox(label="Analysis Results", lines=25), title="Financial Statement Analyzer", description="Upload financial statements in Markdown format for comprehensive AI-powered analysis." ) return iface if __name__ == "__main__": iface = create_interface() iface.launch()