walaa2022's picture
Update app.py
17c0709 verified
import os
import gradio as gr
import pandas as pd
import torch
import logging
from transformers import pipeline
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"Using device: {DEVICE}")
class FinancialAnalyzer:
def __init__(self):
self.analysis_model = None
self.sentiment_model = None
self.load_models()
def load_models(self):
try:
logger.info("Loading TinyLlama model...")
self.analysis_model = pipeline(
"text-generation",
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
)
logger.info("Loading FinBERT model...")
self.sentiment_model = pipeline(
"text-classification",
model="ProsusAI/finbert",
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
)
logger.info("Models loaded successfully")
except Exception as e:
logger.error(f"Error loading models: {str(e)}")
raise
def extract_and_analyze(self, statement_text, statement_type):
"""Extract information from financial statement text"""
try:
prompt = f"""[INST] As a financial analyst, analyze this {statement_type}:
{statement_text}
Extract and summarize:
1. Key financial numbers for 2025
2. Notable trends
3. Important metrics
Focus on the most recent year (2025) and key financial indicators.
[/INST]"""
response = self.analysis_model(
prompt,
max_new_tokens=300,
temperature=0.3,
num_return_sequences=1,
truncation=True
)
return response[0]['generated_text']
except Exception as e:
logger.error(f"Error extracting data from {statement_type}: {str(e)}")
raise
def analyze_financials(self, income_text, balance_text):
try:
# First, extract key information from each statement
logger.info("Analyzing Income Statement...")
income_analysis = self.extract_and_analyze(income_text, "Income Statement")
logger.info("Analyzing Balance Sheet...")
balance_analysis = self.extract_and_analyze(balance_text, "Balance Sheet")
# Combine the analyses
combined_analysis = f"""Income Statement Analysis:
{income_analysis}
Balance Sheet Analysis:
{balance_analysis}"""
# Get sentiment
sentiment = self.sentiment_model(
combined_analysis[:512],
truncation=True
)[0]
# Generate final analysis
final_prompt = f"""[INST] Based on this financial analysis:
{combined_analysis}
Market Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
Provide a concise analysis with:
1. Overall Financial Health (2-3 key points)
2. Main Business Insights (2-3 insights)
3. Key Recommendations (2-3 recommendations)
[/INST]"""
final_response = self.analysis_model(
final_prompt,
max_new_tokens=500,
temperature=0.7,
num_return_sequences=1,
truncation=True
)
return self.format_response(final_response[0]['generated_text'], sentiment, combined_analysis)
except Exception as e:
logger.error(f"Analysis error: {str(e)}")
return f"Error in analysis: {str(e)}"
def format_response(self, analysis_text, sentiment, raw_analysis):
try:
sections = [
"# Financial Analysis Report\n\n",
f"## Market Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
"## Extracted Financial Data\n```\n",
raw_analysis,
"\n```\n\n",
"## Analysis\n\n"
]
for line in analysis_text.split('\n'):
line = line.strip()
if not line:
continue
if any(header in line for header in ["Financial Health", "Business Insights", "Recommendations"]):
sections.append(f"\n### {line}\n")
elif line:
if not line.startswith('-'):
line = f"- {line}"
sections.append(f"{line}\n")
return "".join(sections)
except Exception as e:
logger.error(f"Error formatting response: {str(e)}")
return "Error formatting analysis"
def analyze_statements(income_statement, balance_sheet):
try:
if not income_statement or not balance_sheet:
return "Please upload both financial statements."
logger.info("Reading financial statements...")
# Read files as text
income_df = pd.read_csv(income_statement)
balance_df = pd.read_csv(balance_sheet)
# Convert to string while preserving format
income_text = income_df.to_string(index=False)
balance_text = balance_df.to_string(index=False)
logger.info("Initializing analysis...")
analyzer = FinancialAnalyzer()
result = analyzer.analyze_financials(income_text, balance_text)
if DEVICE == "cuda":
torch.cuda.empty_cache()
return result
except Exception as e:
logger.error(f"Error: {str(e)}")
return f"""Analysis Error: {str(e)}
Please check:
1. Files are readable CSV files
2. Files contain financial data
3. Files are not corrupted"""
# Create Gradio interface
iface = gr.Interface(
fn=analyze_statements,
inputs=[
gr.File(label="Income Statement (CSV)", file_types=[".csv"]),
gr.File(label="Balance Sheet (CSV)", file_types=[".csv"])
],
outputs=gr.Markdown(),
title="AI Financial Statement Analyzer",
description="""Upload your financial statements for AI analysis.
The model will extract and analyze key financial information automatically.""",
theme="default",
flagging_mode="never"
)
# Launch
if __name__ == "__main__":
iface.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)