Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
import json | |
import re | |
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification | |
import torch | |
class FinancialAnalyzer: | |
def __init__(self): | |
print("Initializing Financial Analyzer...") | |
self.initialize_models() | |
def initialize_models(self): | |
print("Loading models...") | |
self.tiny_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") | |
self.tiny_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") | |
self.finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert") | |
self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert") | |
print("Models loaded successfully!") | |
def parse_markdown_table(self, markdown_content): | |
"""Parse markdown table into pandas DataFrame""" | |
# Split content into lines | |
lines = markdown_content.strip().split('\n') | |
# Find table start (line with |) | |
table_lines = [] | |
headers = None | |
current_table = [] | |
for line in lines: | |
if '|' in line: | |
# Skip separator lines (contains ---) | |
if '-|-' in line: | |
continue | |
# Clean and split the line | |
row = [cell.strip() for cell in line.split('|')[1:-1]] | |
if headers is None: | |
headers = row | |
else: | |
current_table.append(row) | |
# Create DataFrame | |
df = pd.DataFrame(current_table, columns=headers) | |
return df | |
def extract_financial_data(self, markdown_content): | |
"""Convert markdown content to a structured text format""" | |
# Remove markdown formatting | |
clean_text = markdown_content.replace('#', '').replace('*', '') | |
# Extract tables | |
tables = {} | |
current_section = "General" | |
for line in clean_text.split('\n'): | |
if line.strip() and not line.startswith('|'): | |
current_section = line.strip() | |
elif '|' in line: | |
if current_section not in tables: | |
tables[current_section] = [] | |
tables[current_section].append(line) | |
# Convert to text format | |
structured_text = [] | |
for section, content in tables.items(): | |
structured_text.append(f"\n{section}:") | |
if content: | |
df = self.parse_markdown_table('\n'.join(content)) | |
structured_text.append(df.to_string()) | |
return '\n'.join(structured_text) | |
def analyze_financials(self, balance_sheet_file, income_stmt_file): | |
"""Main analysis function""" | |
try: | |
# Read markdown files | |
with open(balance_sheet_file, 'r') as f: | |
balance_sheet_content = f.read() | |
with open(income_stmt_file, 'r') as f: | |
income_stmt_content = f.read() | |
# Convert to structured text | |
structured_balance = self.extract_financial_data(balance_sheet_content) | |
structured_income = self.extract_financial_data(income_stmt_content) | |
# Create analysis prompt | |
prompt = f"""<human>Please analyze these financial statements and provide detailed insights: | |
Financial Statements Analysis (2021-2025) | |
Balance Sheet Summary: | |
{structured_balance} | |
Income Statement Summary: | |
{structured_income} | |
Please provide a detailed analysis including: | |
1. Financial Health Assessment | |
- Liquidity position | |
- Capital structure | |
- Asset efficiency | |
2. Profitability Analysis | |
- Revenue trends | |
- Cost management | |
- Profit margins | |
3. Growth Analysis | |
- Year-over-year growth rates | |
- Market position | |
- Future growth potential | |
4. Risk Assessment | |
- Operating risks | |
- Financial risks | |
- Strategic risks | |
5. Recommendations | |
- Short-term actions | |
- Medium-term strategy | |
- Long-term planning | |
6. Future Outlook | |
- Market conditions | |
- Company positioning | |
- Growth opportunities</human>""" | |
# Generate AI analysis | |
inputs = self.tiny_tokenizer(prompt, return_tensors="pt", truncation=True) | |
outputs = self.tiny_model.generate( | |
inputs["input_ids"], | |
max_length=2048, | |
temperature=0.7, | |
top_p=0.95, | |
do_sample=True | |
) | |
analysis = self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Generate sentiment | |
sentiment = self.analyze_sentiment(structured_balance + structured_income) | |
# Compile results | |
results = { | |
"Financial Analysis": analysis, | |
"Sentiment Analysis": sentiment, | |
"Analysis Period": "2021-2025", | |
"Note": "All values in millions ($M)" | |
} | |
return json.dumps(results, indent=2) | |
except Exception as e: | |
return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}" | |
def analyze_sentiment(self, text): | |
inputs = self.finbert_tokenizer(text, return_tensors="pt", truncation=True, max_length=512) | |
outputs = self.finbert_model(**inputs) | |
probs = torch.nn.functional.softmax(outputs.logits, dim=1) | |
sentiment_labels = ['negative', 'neutral', 'positive'] | |
return { | |
'sentiment': sentiment_labels[probs.argmax().item()], | |
'confidence': f"{probs.max().item():.2f}" | |
} | |
def create_interface(): | |
analyzer = FinancialAnalyzer() | |
iface = gr.Interface( | |
fn=analyzer.analyze_financials, | |
inputs=[ | |
gr.File(label="Balance Sheet (Markdown)", type="filepath"), | |
gr.File(label="Income Statement (Markdown)", type="filepath") | |
], | |
outputs=gr.Textbox(label="Analysis Results", lines=25), | |
title="Financial Statement Analyzer", | |
description="Upload financial statements in Markdown format for comprehensive AI-powered analysis." | |
) | |
return iface | |
if __name__ == "__main__": | |
iface = create_interface() | |
iface.launch() |