Spaces:
Sleeping
Sleeping
File size: 9,583 Bytes
ea4b18b b2501de 5bc4f16 ab0bea5 6631d2e 74f7ba2 6631d2e ea4b18b 5bc4f16 6631d2e 5bc4f16 6631d2e 5bc4f16 6631d2e 5bc4f16 8eee353 6631d2e 88b54ed 6631d2e 88b54ed 6631d2e 88b54ed 6631d2e 88b54ed 6631d2e 88b54ed 6631d2e 88b54ed 6631d2e 88b54ed 6631d2e 88b54ed 6631d2e 8eee353 6631d2e 8eee353 6631d2e 5bc4f16 6631d2e 5bc4f16 6631d2e 5bc4f16 6631d2e 5bc4f16 74f7ba2 c6b42a6 9bdd84e 8eee353 5bc4f16 ab0bea5 5bc4f16 ab0bea5 5bc4f16 c6b42a6 6631d2e c6b42a6 6631d2e c6b42a6 6631d2e c6b42a6 5bc4f16 c6b42a6 5bc4f16 6631d2e 5bc4f16 9bdd84e ab0bea5 c6b42a6 74f7ba2 9bdd84e 6631d2e 9bdd84e 5462ac3 6631d2e ea4b18b 74f7ba2 ea4b18b c6b42a6 ea4b18b ab0bea5 6631d2e ea4b18b 5462ac3 ea4b18b 5462ac3 8eee353 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 |
import gradio as gr
import pandas as pd
import json
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re
class FinancialAnalyzer:
def __init__(self):
print("Initializing Analyzer...")
self.initialize_model()
print("Initialization complete!")
def initialize_model(self):
"""Initialize TinyLlama model"""
try:
self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
self.model.eval()
except Exception as e:
print(f"Error initializing model: {str(e)}")
raise
def clean_number(self, value):
"""Clean and convert numerical values"""
try:
if isinstance(value, str):
# Remove currency symbols, commas, spaces
value = value.replace('$', '').replace(',', '').strip()
# Handle parentheses for negative numbers
if '(' in value and ')' in value:
value = '-' + value.replace('(', '').replace(')', '')
return float(value or 0)
except:
return 0.0
def is_valid_markdown(self, file_path):
"""Check if a file is a valid Markdown file"""
try:
with open(file_path, 'r') as f:
content = f.read()
# Simple check for Markdown structure
return any(line.startswith('#') or '|' in line for line in content.split('\n'))
except:
return False
def parse_financial_data(self, content):
"""Parse markdown content into structured data"""
try:
data = {}
current_section = ""
current_table = []
headers = None
for line in content.split('\n'):
if line.startswith('#'):
if current_table and headers:
data[current_section] = self.process_table(headers, current_table)
current_section = line.strip('# ')
current_table = []
headers = None
elif '|' in line:
if '-|-' not in line: # Skip separator lines
row = [cell.strip() for cell in line.split('|')[1:-1]]
if not headers:
headers = row
else:
current_table.append(row)
# Process last table
if current_table and headers:
data[current_section] = self.process_table(headers, current_table)
return data
except Exception as e:
print(f"Error parsing financial data: {str(e)}")
return {}
def process_table(self, headers, rows):
"""Process table data into structured format"""
try:
processed_data = {}
for row in rows:
if len(row) == len(headers):
item_name = row[0].strip('*').strip()
processed_data[item_name] = {}
for i, value in enumerate(row[1:], 1):
processed_data[item_name][headers[i]] = self.clean_number(value)
return processed_data
except Exception as e:
print(f"Error processing table: {str(e)}")
return {}
def extract_metrics(self, income_data, balance_data):
"""Extract and calculate key financial metrics"""
try:
metrics = {
"Revenue": {
"2025": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2025"),
"2021": self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2021")
},
"Profitability": {
"Gross_Profit_2025": self.get_nested_value(income_data, "Cost and Gross Profit", "Gross Profit", "2025"),
"Net_Earnings_2025": self.get_nested_value(income_data, "Profit Summary", "Net Earnings", "2025"),
"Operating_Expenses_2025": self.get_nested_value(income_data, "Operating Expenses", "Total Operating Expenses", "2025")
},
"Balance_Sheet": {
"Total_Assets_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Assets", "2025"),
"Total_Liabilities_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Liabilities", "2025"),
"Equity_2025": self.get_nested_value(balance_data, "Key Totals", "Total_Shareholders_Equity", "2025")
}
}
# Calculate additional metrics
revenue_2025 = metrics["Revenue"]["2025"]
if revenue_2025 != 0:
metrics["Profitability"]["Gross_Margin"] = (metrics["Profitability"]["Gross_Profit_2025"] / revenue_2025) * 100
metrics["Profitability"]["Net_Margin"] = (metrics["Profitability"]["Net_Earnings_2025"] / revenue_2025) * 100
return metrics
except Exception as e:
print(f"Error extracting metrics: {str(e)}")
return {}
def get_nested_value(self, data, section, key, year):
"""Safely get nested dictionary value"""
try:
return data.get(section, {}).get(key, {}).get(year, 0)
except:
return 0
def generate_analysis_prompt(self, metrics):
"""Create analysis prompt from metrics"""
try:
return f"""<human>
Analyze these financial metrics for 2025 with a focus on business performance, trends, and risks:
Revenue and Profitability:
- Total Revenue: ${metrics['Revenue']['2025']:,.1f}M
- Gross Profit: ${metrics['Profitability']['Gross_Profit_2025']:,.1f}M
- Net Earnings: ${metrics['Profitability']['Net_Earnings_2025']:,.1f}M
- Gross Margin: {metrics['Profitability'].get('Gross_Margin', 0):,.1f}%
- Net Margin: {metrics['Profitability'].get('Net_Margin', 0):,.1f}%
Balance Sheet Strength:
- Total Assets: ${metrics['Balance_Sheet']['Total_Assets_2025']:,.1f}M
- Total Liabilities: ${metrics['Balance_Sheet']['Total_Liabilities_2025']:,.1f}M
- Shareholders' Equity: ${metrics['Balance_Sheet']['Equity_2025']:,.1f}M
Explain key financial ratios and their implications. Discuss strategies for growth and risk mitigation.
</human>"""
except Exception as e:
print(f"Error generating prompt: {str(e)}")
return ""
def generate_analysis(self, prompt):
"""Generate analysis using TinyLlama"""
try:
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500)
outputs = self.model.generate(
inputs["input_ids"],
max_new_tokens=500, # Generate up to 500 new tokens
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id,
no_repeat_ngram_size=3
)
analysis = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# Clean up the response
analysis = analysis.split("<human>")[-1].strip()
return analysis
except Exception as e:
return f"Error generating analysis: {str(e)}"
def analyze_financials(self, balance_sheet_file, income_stmt_file):
"""Main analysis function"""
try:
# Validate files
if not (self.is_valid_markdown(balance_sheet_file) and self.is_valid_markdown(income_stmt_file)):
return "Error: One or both files are invalid or not in Markdown format."
# Read files
with open(balance_sheet_file, 'r') as f:
balance_sheet = f.read()
with open(income_stmt_file, 'r') as f:
income_stmt = f.read()
# Parse financial data
income_data = self.parse_financial_data(income_stmt)
balance_data = self.parse_financial_data(balance_sheet)
# Extract key metrics
metrics = self.extract_metrics(income_data, balance_data)
# Generate and get analysis
prompt = self.generate_analysis_prompt(metrics)
analysis = self.generate_analysis(prompt)
# Prepare results
results = {
"Financial Analysis": {
"Key Metrics": metrics,
"AI Insights": analysis,
"Analysis Period": "2021-2025",
"Note": "All monetary values in millions ($M)"
}
}
return json.dumps(results, indent=2)
except Exception as e:
return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"
def create_interface():
analyzer = FinancialAnalyzer()
iface = gr.Interface(
fn=analyzer.analyze_financials,
inputs=[
gr.File(label="Balance Sheet (Markdown)", type="filepath"),
gr.File(label="Income Statement (Markdown)", type="filepath")
],
outputs=gr.Textbox(label="Analysis Results", lines=25),
title="Financial Statement Analyzer",
description="Upload financial statements in Markdown format for AI-powered analysis"
)
return iface
if __name__ == "__main__":
iface = create_interface()
iface.launch()
|