Spaces:

walaa2022
/

financial_analysis

Sleeping

App Files Files Community

financial_analysis / app.py

walaa2022

Update app.py

c6b42a6 verified 7 months ago

raw

history blame

6.31 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import json
	import re
	from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
	import torch

	class FinancialAnalyzer:
	def __init__(self):
	print("Initializing Financial Analyzer...")
	self.initialize_models()

	def initialize_models(self):
	print("Loading models...")
	self.tiny_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
	self.tiny_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

	self.finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
	self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
	print("Models loaded successfully!")

	def parse_markdown_table(self, markdown_content):
	"""Parse markdown table into pandas DataFrame"""
	# Split content into lines
	lines = markdown_content.strip().split('\n')

	# Find table start (line with \|)
	table_lines = []
	headers = None
	current_table = []

	for line in lines:
	if '\|' in line:
	# Skip separator lines (contains ---)
	if '-\|-' in line:
	continue
	# Clean and split the line
	row = [cell.strip() for cell in line.split('\|')[1:-1]]
	if headers is None:
	headers = row
	else:
	current_table.append(row)

	# Create DataFrame
	df = pd.DataFrame(current_table, columns=headers)
	return df

	def extract_financial_data(self, markdown_content):
	"""Convert markdown content to a structured text format"""
	# Remove markdown formatting
	clean_text = markdown_content.replace('#', '').replace('*', '')

	# Extract tables
	tables = {}
	current_section = "General"

	for line in clean_text.split('\n'):
	if line.strip() and not line.startswith('\|'):
	current_section = line.strip()
	elif '\|' in line:
	if current_section not in tables:
	tables[current_section] = []
	tables[current_section].append(line)

	# Convert to text format
	structured_text = []
	for section, content in tables.items():
	structured_text.append(f"\n{section}:")
	if content:
	df = self.parse_markdown_table('\n'.join(content))
	structured_text.append(df.to_string())

	return '\n'.join(structured_text)

	def analyze_financials(self, balance_sheet_file, income_stmt_file):
	"""Main analysis function"""
	try:
	# Read markdown files
	with open(balance_sheet_file, 'r') as f:
	balance_sheet_content = f.read()
	with open(income_stmt_file, 'r') as f:
	income_stmt_content = f.read()

	# Convert to structured text
	structured_balance = self.extract_financial_data(balance_sheet_content)
	structured_income = self.extract_financial_data(income_stmt_content)

	# Create analysis prompt
	prompt = f"""<human>Please analyze these financial statements and provide detailed insights:

	Financial Statements Analysis (2021-2025)

	Balance Sheet Summary:
	{structured_balance}

	Income Statement Summary:
	{structured_income}

	Please provide a detailed analysis including:
	1. Financial Health Assessment
	- Liquidity position
	- Capital structure
	- Asset efficiency

	2. Profitability Analysis
	- Revenue trends
	- Cost management
	- Profit margins

	3. Growth Analysis
	- Year-over-year growth rates
	- Market position
	- Future growth potential

	4. Risk Assessment
	- Operating risks
	- Financial risks
	- Strategic risks

	5. Recommendations
	- Short-term actions
	- Medium-term strategy
	- Long-term planning

	6. Future Outlook
	- Market conditions
	- Company positioning
	- Growth opportunities</human>"""

	# Generate AI analysis
	inputs = self.tiny_tokenizer(prompt, return_tensors="pt", truncation=True)
	outputs = self.tiny_model.generate(
	inputs["input_ids"],
	max_length=2048,
	temperature=0.7,
	top_p=0.95,
	do_sample=True
	)
	analysis = self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Generate sentiment
	sentiment = self.analyze_sentiment(structured_balance + structured_income)

	# Compile results
	results = {
	"Financial Analysis": analysis,
	"Sentiment Analysis": sentiment,
	"Analysis Period": "2021-2025",
	"Note": "All values in millions ($M)"
	}

	return json.dumps(results, indent=2)

	except Exception as e:
	return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"

	def analyze_sentiment(self, text):
	inputs = self.finbert_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
	outputs = self.finbert_model(**inputs)
	probs = torch.nn.functional.softmax(outputs.logits, dim=1)
	sentiment_labels = ['negative', 'neutral', 'positive']

	return {
	'sentiment': sentiment_labels[probs.argmax().item()],
	'confidence': f"{probs.max().item():.2f}"
	}

	def create_interface():
	analyzer = FinancialAnalyzer()

	iface = gr.Interface(
	fn=analyzer.analyze_financials,
	inputs=[
	gr.File(label="Balance Sheet (Markdown)", type="filepath"),
	gr.File(label="Income Statement (Markdown)", type="filepath")
	],
	outputs=gr.Textbox(label="Analysis Results", lines=25),
	title="Financial Statement Analyzer",
	description="Upload financial statements in Markdown format for comprehensive AI-powered analysis."
	)

	return iface

	if __name__ == "__main__":
	iface = create_interface()
	iface.launch()