walaa2022's picture
Update app.py
88b54ed verified
raw
history blame
9.46 kB
import gradio as gr
import pandas as pd
import json
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
class FastFinancialAnalyzer:
def __init__(self):
print("Initializing Analyzer...")
self.initialize_model()
print("Initialization complete!")
def initialize_model(self):
"""Initialize TinyLlama model"""
self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
self.model.eval() # Set to evaluation mode
def parse_markdown_table(self, content, section_name=""):
"""Extract data from markdown table"""
data = {}
lines = content.split('\n')
headers = []
current_section = section_name
for line in lines:
if line.startswith('##'):
current_section = line.strip('#').strip()
elif '|' in line:
# Skip separator lines
if '-|-' in line:
continue
# Process table rows
cells = [cell.strip() for cell in line.split('|')[1:-1]]
if not headers:
headers = cells
else:
if len(cells) == len(headers):
row_data = dict(zip(headers, cells))
key = row_data.get(headers[0], "").strip()
if key:
data[key] = row_data
return {current_section: data}
def clean_number(self, value):
"""Clean numerical values"""
if isinstance(value, str):
value = value.replace(',', '').replace('$', '').replace('(', '-').replace(')', '')
value = value.strip()
try:
return float(value)
except:
return 0.0
def extract_key_metrics(self, income_data, balance_data):
"""Extract key financial metrics with safety checks"""
try:
# First, safely extract values with error handling
revenue_2025 = self.safe_extract_number(income_data, "Total Net Revenue", "2025")
revenue_2021 = self.safe_extract_number(income_data, "Total Net Revenue", "2021")
profit_2025 = self.safe_extract_number(income_data, "Net Earnings", "2025")
profit_2021 = self.safe_extract_number(income_data, "Net Earnings", "2021")
assets_2025 = self.safe_extract_number(balance_data, "Total_Assets", "2025")
assets_2021 = self.safe_extract_number(balance_data, "Total_Assets", "2021")
metrics = {
"Revenue": {
"2025": revenue_2025,
"2021": revenue_2021,
"Growth": self.calculate_growth(revenue_2025, revenue_2021)
},
"Profit": {
"2025": profit_2025,
"2021": profit_2021,
"Growth": self.calculate_growth(profit_2025, profit_2021),
"Margin_2025": self.calculate_percentage(profit_2025, revenue_2025)
},
"Assets": {
"2025": assets_2025,
"2021": assets_2021,
"Growth": self.calculate_growth(assets_2025, assets_2021)
}
}
return metrics
except Exception as e:
print(f"Error in metric extraction: {str(e)}")
return self.get_default_metrics()
def safe_extract_number(self, data_dict, key, year):
"""Safely extract and convert number from data"""
try:
if isinstance(data_dict, dict):
for k, v in data_dict.items():
if isinstance(v, dict) and key in k:
value = v.get(year, '0')
return self.clean_number(value)
return 0.0
except Exception as e:
print(f"Error extracting {key} for {year}: {str(e)}")
return 0.0
def calculate_growth(self, current, previous):
"""Calculate growth percentage with safety check"""
try:
if previous and previous != 0:
return ((current - previous) / abs(previous)) * 100
return 0.0
except:
return 0.0
def calculate_percentage(self, numerator, denominator):
"""Calculate percentage with safety check"""
try:
if denominator and denominator != 0:
return (numerator / denominator) * 100
return 0.0
except:
return 0.0
def get_default_metrics(self):
"""Return default metrics structure"""
return {
"Revenue": {"2025": 0, "2021": 0, "Growth": 0},
"Profit": {"2025": 0, "2021": 0, "Growth": 0, "Margin_2025": 0},
"Assets": {"2025": 0, "2021": 0, "Growth": 0}
}
def generate_analysis_prompt(self, metrics):
"""Create focused analysis prompt with safety checks"""
return f"""<human>Analyze these financial metrics and provide insights:
Key Performance Indicators:
1. Revenue Performance:
- 2025: ${metrics['Revenue']['2025']:,.1f}M
- 2021: ${metrics['Revenue']['2021']:,.1f}M
- 5-Year Growth: {metrics['Revenue']['Growth']:.1f}%
2. Profitability:
- 2025 Net Profit: ${metrics['Profit']['2025']:,.1f}M
- 2021 Net Profit: ${metrics['Profit']['2021']:,.1f}M
- Profit Growth: {metrics['Profit']['Growth']:.1f}%
- 2025 Profit Margin: {metrics['Profit']['Margin_2025']:.1f}%
3. Asset Base:
- 2025 Total Assets: ${metrics['Assets']['2025']:,.1f}M
- 2021 Total Assets: ${metrics['Assets']['2021']:,.1f}M
- Asset Growth: {metrics['Assets']['Growth']:.1f}%
Based on these metrics, provide:
1. Financial Performance Assessment
2. Key Strengths and Weaknesses
3. Strategic Recommendations</human>"""
def generate_analysis_prompt(self, metrics):
"""Create focused analysis prompt"""
return f"""<human>Analyze these financial metrics and provide insights:
Key Performance Indicators (in millions):
1. Revenue:
- 2025: ${metrics['Revenue']['2025']:.1f}M
- 2021: ${metrics['Revenue']['2021']:.1f}M
- Growth: {((metrics['Revenue']['2025'] - metrics['Revenue']['2021']) / metrics['Revenue']['2021'] * 100):.1f}%
2. Net Profit:
- 2025: ${metrics['Profit']['2025']:.1f}M
- 2021: ${metrics['Profit']['2021']:.1f}M
- Margin 2025: {(metrics['Profit']['2025'] / metrics['Revenue']['2025'] * 100):.1f}%
3. Asset Utilization:
- 2025: ${metrics['Assets']['2025']:.1f}M
- 2021: ${metrics['Assets']['2021']:.1f}M
- Growth: {((metrics['Assets']['2025'] - metrics['Assets']['2021']) / metrics['Assets']['2021'] * 100):.1f}%
Provide:
1. Performance Assessment
2. Key Strengths and Concerns
3. Strategic Recommendations</human>"""
def generate_analysis(self, prompt):
"""Generate analysis using TinyLlama"""
try:
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500)
outputs = self.model.generate(
inputs["input_ids"],
max_new_tokens=500,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id,
no_repeat_ngram_size=3
)
return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
except Exception as e:
return f"Error generating analysis: {str(e)}"
def analyze_financials(self, balance_sheet_file, income_stmt_file):
"""Main analysis function"""
try:
# Read files
with open(balance_sheet_file, 'r') as f:
balance_sheet = f.read()
with open(income_stmt_file, 'r') as f:
income_stmt = f.read()
# Parse data
income_data = self.parse_markdown_table(income_stmt, "Income Statement")
balance_data = self.parse_markdown_table(balance_sheet, "Balance Sheet")
# Extract metrics
metrics = self.extract_key_metrics(income_data.get("Income Statement", {}),
balance_data.get("Balance Sheet", {}))
# Generate analysis
analysis_prompt = self.generate_analysis_prompt(metrics)
analysis = self.generate_analysis(analysis_prompt)
# Prepare results
results = {
"Financial Analysis": {
"Key Metrics": metrics,
"AI Analysis": analysis.split("<human>")[-1].strip(),
"Analysis Period": "2021-2025",
"Note": "All monetary values in millions ($M)"
}
}
return json.dumps(results, indent=2)
except Exception as e:
return f"Error in analysis: {str(e)}"
def create_interface():
analyzer = FastFinancialAnalyzer()
iface = gr.Interface(
fn=analyzer.analyze_financials,
inputs=[
gr.File(label="Balance Sheet (Markdown)", type="filepath"),
gr.File(label="Income Statement (Markdown)", type="filepath")
],
outputs=gr.Textbox(label="Analysis Results", lines=25),
title="Fast Financial Statement Analyzer",
description="Upload financial statements in Markdown format for quick AI-powered analysis"
)
return iface
if __name__ == "__main__":
iface = create_interface()
iface.launch()