Inni-23's picture
Update app.py
8eee353 verified
raw
history blame
9.58 kB
import gradio as gr
import pandas as pd
import json
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re
class FinancialAnalyzer:
def __init__(self):
print("Initializing Analyzer...")
self.initialize_model()
print("Initialization complete!")
def initialize_model(self):
"""Initialize TinyLlama model"""
try:
self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
self.model.eval()
except Exception as e:
print(f"Error initializing model: {str(e)}")
raise
def clean_number(self, value):
"""Clean and convert numerical values"""
try:
if isinstance(value, str):
# Remove currency symbols, commas, spaces
value = value.replace('$', '').replace(',', '').strip()
# Handle parentheses for negative numbers
if '(' in value and ')' in value:
value = '-' + value.replace('(', '').replace(')', '')
return float(value or 0)
except:
return 0.0
def is_valid_markdown(self, file_path):
"""Check if a file is a valid Markdown file"""
try:
with open(file_path, 'r') as f:
content = f.read()
# Simple check for Markdown structure
return any(line.startswith('#') or '|' in line for line in content.split('\n'))
except:
return False
def parse_financial_data(self, content):
"""Parse markdown content into structured data"""
try:
data = {}
current_section = ""
current_table = []
headers = None
for line in content.split('\n'):
if line.startswith('#'):
if current_table and headers:
data[current_section] = self.process_table(headers, current_table)
current_section = line.strip('# ')
current_table = []
headers = None
elif '|' in line:
if '-|-' not in line: # Skip separator lines
row = [cell.strip() for cell in line.split('|')[1:-1]]
if not headers:
headers = row
else:
current_table.append(row)
# Process last table
if current_table and headers:
data[current_section] = self.process_table(headers, current_table)
return data
except Exception as e:
print(f"Error parsing financial data: {str(e)}")
return {}
def process_table(self, headers, rows):
"""Process table data into structured format"""
try:
processed_data = {}
for row in rows:
if len(row) == len(headers):
item_name = row[0].strip('*').strip()
processed_data[item_name] = {}
for i, value in enumerate(row[1:], 1):
processed_data[item_name][headers[i]] = self.clean_number(value)
return processed_data
except Exception as e:
print(f"Error processing table: {str(e)}")
return {}
def extract_metrics(self, income_data, balance_data):
"""Extract and calculate key financial metrics"""
try:
metrics = {
"Revenue": {
"2025": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2025"),
"2021": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2021")
},
"Profitability": {
"Gross_Profit_2025": self.get_nested_value(income_data, "Cost and Gross Profit", "Gross Profit", "2025"),
"Net_Earnings_2025": self.get_nested_value(income_data, "Profit Summary", "Net Earnings", "2025"),
"Operating_Expenses_2025": self.get_nested_value(income_data, "Operating Expenses", "Total Operating Expenses", "2025")
},
"Balance_Sheet": {
"Total_Assets_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Assets", "2025"),
"Total_Liabilities_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Liabilities", "2025"),
"Equity_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Shareholders_Equity", "2025")
}
}
# Calculate additional metrics
revenue_2025 = metrics["Revenue"]["2025"]
if revenue_2025 != 0:
metrics["Profitability"]["Gross_Margin"] = (metrics["Profitability"]["Gross_Profit_2025"] / revenue_2025) * 100
metrics["Profitability"]["Net_Margin"] = (metrics["Profitability"]["Net_Earnings_2025"] / revenue_2025) * 100
return metrics
except Exception as e:
print(f"Error extracting metrics: {str(e)}")
return {}
def get_nested_value(self, data, section, key, year):
"""Safely get nested dictionary value"""
try:
return data.get(section, {}).get(key, {}).get(year, 0)
except:
return 0
def generate_analysis_prompt(self, metrics):
"""Create analysis prompt from metrics"""
try:
return f"""<human>
Analyze these financial metrics for 2025 with a focus on business performance, trends, and risks:
Revenue and Profitability:
- Total Revenue: ${metrics['Revenue']['2025']:,.1f}M
- Gross Profit: ${metrics['Profitability']['Gross_Profit_2025']:,.1f}M
- Net Earnings: ${metrics['Profitability']['Net_Earnings_2025']:,.1f}M
- Gross Margin: {metrics['Profitability'].get('Gross_Margin', 0):,.1f}%
- Net Margin: {metrics['Profitability'].get('Net_Margin', 0):,.1f}%
Balance Sheet Strength:
- Total Assets: ${metrics['Balance_Sheet']['Total_Assets_2025']:,.1f}M
- Total Liabilities: ${metrics['Balance_Sheet']['Total_Liabilities_2025']:,.1f}M
- Shareholders' Equity: ${metrics['Balance_Sheet']['Equity_2025']:,.1f}M
Explain key financial ratios and their implications. Discuss strategies for growth and risk mitigation.
</human>"""
except Exception as e:
print(f"Error generating prompt: {str(e)}")
return ""
def generate_analysis(self, prompt):
"""Generate analysis using TinyLlama"""
try:
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500)
outputs = self.model.generate(
inputs["input_ids"],
max_new_tokens=500, # Generate up to 500 new tokens
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id,
no_repeat_ngram_size=3
)
analysis = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# Clean up the response
analysis = analysis.split("<human>")[-1].strip()
return analysis
except Exception as e:
return f"Error generating analysis: {str(e)}"
def analyze_financials(self, balance_sheet_file, income_stmt_file):
"""Main analysis function"""
try:
# Validate files
if not (self.is_valid_markdown(balance_sheet_file) and self.is_valid_markdown(income_stmt_file)):
return "Error: One or both files are invalid or not in Markdown format."
# Read files
with open(balance_sheet_file, 'r') as f:
balance_sheet = f.read()
with open(income_stmt_file, 'r') as f:
income_stmt = f.read()
# Parse financial data
income_data = self.parse_financial_data(income_stmt)
balance_data = self.parse_financial_data(balance_sheet)
# Extract key metrics
metrics = self.extract_metrics(income_data, balance_data)
# Generate and get analysis
prompt = self.generate_analysis_prompt(metrics)
analysis = self.generate_analysis(prompt)
# Prepare results
results = {
"Financial Analysis": {
"Key Metrics": metrics,
"AI Insights": analysis,
"Analysis Period": "2021-2025",
"Note": "All monetary values in millions ($M)"
}
}
return json.dumps(results, indent=2)
except Exception as e:
return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"
def create_interface():
analyzer = FinancialAnalyzer()
iface = gr.Interface(
fn=analyzer.analyze_financials,
inputs=[
gr.File(label="Balance Sheet (Markdown)", type="filepath"),
gr.File(label="Income Statement (Markdown)", type="filepath")
],
outputs=gr.Textbox(label="Analysis Results", lines=25),
title="Financial Statement Analyzer",
description="Upload financial statements in Markdown format for AI-powered analysis"
)
return iface
if __name__ == "__main__":
iface = create_interface()
iface.launch()