import gradio as gr
import pandas as pd
import json
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re

class FinancialAnalyzer:
    def __init__(self):
        print("Initializing Analyzer...")
        self.initialize_model()
        print("Initialization complete!")

    def initialize_model(self):
        """Initialize TinyLlama model"""
        try:
            self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
            self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
            self.model.eval()
        except Exception as e:
            print(f"Error initializing model: {str(e)}")
            raise

    def clean_number(self, value):
        """Clean and convert numerical values"""
        try:
            if isinstance(value, str):
                # Remove currency symbols, commas, spaces
                value = value.replace('$', '').replace(',', '').strip()
                # Handle parentheses for negative numbers
                if '(' in value and ')' in value:
                    value = '-' + value.replace('(', '').replace(')', '')
            return float(value or 0)
        except:
            return 0.0

    def is_valid_markdown(self, file_path):
        """Check if a file is a valid Markdown file"""
        try:
            with open(file_path, 'r') as f:
                content = f.read()
            # Simple check for Markdown structure
            return any(line.startswith('#') or '|' in line for line in content.split('\n'))
        except:
            return False

    def parse_financial_data(self, content):
        """Parse markdown content into structured data"""
        try:
            data = {}
            current_section = ""
            current_table = []
            headers = None

            for line in content.split('\n'):
                if line.startswith('#'):
                    if current_table and headers:
                        data[current_section] = self.process_table(headers, current_table)
                    current_section = line.strip('# ')
                    current_table = []
                    headers = None
                elif '|' in line:
                    if '-|-' not in line:  # Skip separator lines
                        row = [cell.strip() for cell in line.split('|')[1:-1]]
                        if not headers:
                            headers = row
                        else:
                            current_table.append(row)

            # Process last table
            if current_table and headers:
                data[current_section] = self.process_table(headers, current_table)

            return data
        except Exception as e:
            print(f"Error parsing financial data: {str(e)}")
            return {}

    def process_table(self, headers, rows):
        """Process table data into structured format"""
        try:
            processed_data = {}
            for row in rows:
                if len(row) == len(headers):
                    item_name = row[0].strip('*').strip()
                    processed_data[item_name] = {}
                    for i, value in enumerate(row[1:], 1):
                        processed_data[item_name][headers[i]] = self.clean_number(value)
            return processed_data
        except Exception as e:
            print(f"Error processing table: {str(e)}")
            return {}

    def extract_metrics(self, income_data, balance_data):
        """Extract and calculate key financial metrics"""
        try:
            metrics = {
                "Revenue": {
                    "2025": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2025"),
                    "2021": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2021")
                },
                "Profitability": {
                    "Gross_Profit_2025": self.get_nested_value(income_data, "Cost and Gross Profit", "Gross Profit", "2025"),
                    "Net_Earnings_2025": self.get_nested_value(income_data, "Profit Summary", "Net Earnings", "2025"),
                    "Operating_Expenses_2025": self.get_nested_value(income_data, "Operating Expenses", "Total Operating Expenses", "2025")
                },
                "Balance_Sheet": {
                    "Total_Assets_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Assets", "2025"),
                    "Total_Liabilities_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Liabilities", "2025"),
                    "Equity_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Shareholders_Equity", "2025")
                }
            }
            
            # Calculate additional metrics
            revenue_2025 = metrics["Revenue"]["2025"]
            if revenue_2025 != 0:
                metrics["Profitability"]["Gross_Margin"] = (metrics["Profitability"]["Gross_Profit_2025"] / revenue_2025) * 100
                metrics["Profitability"]["Net_Margin"] = (metrics["Profitability"]["Net_Earnings_2025"] / revenue_2025) * 100
            
            return metrics
        except Exception as e:
            print(f"Error extracting metrics: {str(e)}")
            return {}

    def get_nested_value(self, data, section, key, year):
        """Safely get nested dictionary value"""
        try:
            return data.get(section, {}).get(key, {}).get(year, 0)
        except:
            return 0

    def generate_analysis_prompt(self, metrics):
        """Create analysis prompt from metrics"""
        try:
            return f"""<human>
Analyze these financial metrics for 2025 with a focus on business performance, trends, and risks:

Revenue and Profitability:
- Total Revenue: ${metrics['Revenue']['2025']:,.1f}M
- Gross Profit: ${metrics['Profitability']['Gross_Profit_2025']:,.1f}M
- Net Earnings: ${metrics['Profitability']['Net_Earnings_2025']:,.1f}M
- Gross Margin: {metrics['Profitability'].get('Gross_Margin', 0):,.1f}%
- Net Margin: {metrics['Profitability'].get('Net_Margin', 0):,.1f}%

Balance Sheet Strength:
- Total Assets: ${metrics['Balance_Sheet']['Total_Assets_2025']:,.1f}M
- Total Liabilities: ${metrics['Balance_Sheet']['Total_Liabilities_2025']:,.1f}M
- Shareholders' Equity: ${metrics['Balance_Sheet']['Equity_2025']:,.1f}M

Explain key financial ratios and their implications. Discuss strategies for growth and risk mitigation.
</human>"""
        except Exception as e:
            print(f"Error generating prompt: {str(e)}")
            return ""

    def generate_analysis(self, prompt):
        """Generate analysis using TinyLlama"""
        try:
            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500)
            
            outputs = self.model.generate(
                inputs["input_ids"],
                max_new_tokens=500,  # Generate up to 500 new tokens
                temperature=0.7,
                top_p=0.9,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id,
                no_repeat_ngram_size=3
            )
            
            analysis = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            # Clean up the response
            analysis = analysis.split("<human>")[-1].strip()
            return analysis
        except Exception as e:
            return f"Error generating analysis: {str(e)}"

    def analyze_financials(self, balance_sheet_file, income_stmt_file):
        """Main analysis function"""
        try:
            # Validate files
            if not (self.is_valid_markdown(balance_sheet_file) and self.is_valid_markdown(income_stmt_file)):
                return "Error: One or both files are invalid or not in Markdown format."

            # Read files
            with open(balance_sheet_file, 'r') as f:
                balance_sheet = f.read()
            with open(income_stmt_file, 'r') as f:
                income_stmt = f.read()

            # Parse financial data
            income_data = self.parse_financial_data(income_stmt)
            balance_data = self.parse_financial_data(balance_sheet)

            # Extract key metrics
            metrics = self.extract_metrics(income_data, balance_data)

            # Generate and get analysis
            prompt = self.generate_analysis_prompt(metrics)
            analysis = self.generate_analysis(prompt)

            # Prepare results
            results = {
                "Financial Analysis": {
                    "Key Metrics": metrics,
                    "AI Insights": analysis,
                    "Analysis Period": "2021-2025",
                    "Note": "All monetary values in millions ($M)"
                }
            }

            return json.dumps(results, indent=2)

        except Exception as e:
            return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"

def create_interface():
    analyzer = FinancialAnalyzer()
    
    iface = gr.Interface(
        fn=analyzer.analyze_financials,
        inputs=[
            gr.File(label="Balance Sheet (Markdown)", type="filepath"),
            gr.File(label="Income Statement (Markdown)", type="filepath")
        ],
        outputs=gr.Textbox(label="Analysis Results", lines=25),
        title="Financial Statement Analyzer",
        description="Upload financial statements in Markdown format for AI-powered analysis"
    )
    
    return iface

if __name__ == "__main__":
    iface = create_interface()
    iface.launch()