import gradio as gr
import pandas as pd
import json
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re
class FinancialAnalyzer:
def __init__(self):
print("Initializing Analyzer...")
self.initialize_model()
print("Initialization complete!")
def initialize_model(self):
"""Initialize TinyLlama model"""
try:
self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
self.model.eval()
except Exception as e:
print(f"Error initializing model: {str(e)}")
raise
def clean_number(self, value):
"""Clean and convert numerical values"""
try:
if isinstance(value, str):
# Remove currency symbols, commas, spaces
value = value.replace('$', '').replace(',', '').strip()
# Handle parentheses for negative numbers
if '(' in value and ')' in value:
value = '-' + value.replace('(', '').replace(')', '')
return float(value or 0)
except:
return 0.0
def is_valid_markdown(self, file_path):
"""Check if a file is a valid Markdown file"""
try:
with open(file_path, 'r') as f:
content = f.read()
# Simple check for Markdown structure
return any(line.startswith('#') or '|' in line for line in content.split('\n'))
except:
return False
def parse_financial_data(self, content):
"""Parse markdown content into structured data"""
try:
data = {}
current_section = ""
current_table = []
headers = None
for line in content.split('\n'):
if line.startswith('#'):
if current_table and headers:
data[current_section] = self.process_table(headers, current_table)
current_section = line.strip('# ')
current_table = []
headers = None
elif '|' in line:
if '-|-' not in line: # Skip separator lines
row = [cell.strip() for cell in line.split('|')[1:-1]]
if not headers:
headers = row
else:
current_table.append(row)
# Process last table
if current_table and headers:
data[current_section] = self.process_table(headers, current_table)
return data
except Exception as e:
print(f"Error parsing financial data: {str(e)}")
return {}
def process_table(self, headers, rows):
"""Process table data into structured format"""
try:
processed_data = {}
for row in rows:
if len(row) == len(headers):
item_name = row[0].strip('*').strip()
processed_data[item_name] = {}
for i, value in enumerate(row[1:], 1):
processed_data[item_name][headers[i]] = self.clean_number(value)
return processed_data
except Exception as e:
print(f"Error processing table: {str(e)}")
return {}
def extract_metrics(self, income_data, balance_data):
"""Extract and calculate key financial metrics"""
try:
metrics = {
"Revenue": {
"2025": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2025"),
"2021": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2021")
},
"Profitability": {
"Gross_Profit_2025": self.get_nested_value(income_data, "Cost and Gross Profit", "Gross Profit", "2025"),
"Net_Earnings_2025": self.get_nested_value(income_data, "Profit Summary", "Net Earnings", "2025"),
"Operating_Expenses_2025": self.get_nested_value(income_data, "Operating Expenses", "Total Operating Expenses", "2025")
},
"Balance_Sheet": {
"Total_Assets_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Assets", "2025"),
"Total_Liabilities_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Liabilities", "2025"),
"Equity_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Shareholders_Equity", "2025")
}
}
# Calculate additional metrics
revenue_2025 = metrics["Revenue"]["2025"]
if revenue_2025 != 0:
metrics["Profitability"]["Gross_Margin"] = (metrics["Profitability"]["Gross_Profit_2025"] / revenue_2025) * 100
metrics["Profitability"]["Net_Margin"] = (metrics["Profitability"]["Net_Earnings_2025"] / revenue_2025) * 100
return metrics
except Exception as e:
print(f"Error extracting metrics: {str(e)}")
return {}
def get_nested_value(self, data, section, key, year):
"""Safely get nested dictionary value"""
try:
return data.get(section, {}).get(key, {}).get(year, 0)
except:
return 0
def generate_analysis_prompt(self, metrics):
"""Create analysis prompt from metrics"""
try:
return f"""
Analyze these financial metrics for 2025 with a focus on business performance, trends, and risks:
Revenue and Profitability:
- Total Revenue: ${metrics['Revenue']['2025']:,.1f}M
- Gross Profit: ${metrics['Profitability']['Gross_Profit_2025']:,.1f}M
- Net Earnings: ${metrics['Profitability']['Net_Earnings_2025']:,.1f}M
- Gross Margin: {metrics['Profitability'].get('Gross_Margin', 0):,.1f}%
- Net Margin: {metrics['Profitability'].get('Net_Margin', 0):,.1f}%
Balance Sheet Strength:
- Total Assets: ${metrics['Balance_Sheet']['Total_Assets_2025']:,.1f}M
- Total Liabilities: ${metrics['Balance_Sheet']['Total_Liabilities_2025']:,.1f}M
- Shareholders' Equity: ${metrics['Balance_Sheet']['Equity_2025']:,.1f}M
Explain key financial ratios and their implications. Discuss strategies for growth and risk mitigation.
"""
except Exception as e:
print(f"Error generating prompt: {str(e)}")
return ""
def generate_analysis(self, prompt):
"""Generate analysis using TinyLlama"""
try:
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500)
outputs = self.model.generate(
inputs["input_ids"],
max_new_tokens=500, # Generate up to 500 new tokens
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id,
no_repeat_ngram_size=3
)
analysis = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# Clean up the response
analysis = analysis.split("")[-1].strip()
return analysis
except Exception as e:
return f"Error generating analysis: {str(e)}"
def analyze_financials(self, balance_sheet_file, income_stmt_file):
"""Main analysis function"""
try:
# Validate files
if not (self.is_valid_markdown(balance_sheet_file) and self.is_valid_markdown(income_stmt_file)):
return "Error: One or both files are invalid or not in Markdown format."
# Read files
with open(balance_sheet_file, 'r') as f:
balance_sheet = f.read()
with open(income_stmt_file, 'r') as f:
income_stmt = f.read()
# Parse financial data
income_data = self.parse_financial_data(income_stmt)
balance_data = self.parse_financial_data(balance_sheet)
# Extract key metrics
metrics = self.extract_metrics(income_data, balance_data)
# Generate and get analysis
prompt = self.generate_analysis_prompt(metrics)
analysis = self.generate_analysis(prompt)
# Prepare results
results = {
"Financial Analysis": {
"Key Metrics": metrics,
"AI Insights": analysis,
"Analysis Period": "2021-2025",
"Note": "All monetary values in millions ($M)"
}
}
return json.dumps(results, indent=2)
except Exception as e:
return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"
def create_interface():
analyzer = FinancialAnalyzer()
iface = gr.Interface(
fn=analyzer.analyze_financials,
inputs=[
gr.File(label="Balance Sheet (Markdown)", type="filepath"),
gr.File(label="Income Statement (Markdown)", type="filepath")
],
outputs=gr.Textbox(label="Analysis Results", lines=25),
title="Financial Statement Analyzer",
description="Upload financial statements in Markdown format for AI-powered analysis"
)
return iface
if __name__ == "__main__":
iface = create_interface()
iface.launch()