Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import pandas as pd | |
import torch | |
import logging | |
from transformers import pipeline | |
# Setup logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s' | |
) | |
logger = logging.getLogger(__name__) | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
logger.info(f"Using device: {DEVICE}") | |
class FinancialAnalyzer: | |
def __init__(self): | |
self.analysis_model = None | |
self.sentiment_model = None | |
self.load_models() | |
def load_models(self): | |
try: | |
logger.info("Loading TinyLlama model...") | |
self.analysis_model = pipeline( | |
"text-generation", | |
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", | |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32 | |
) | |
logger.info("Loading FinBERT model...") | |
self.sentiment_model = pipeline( | |
"text-classification", | |
model="ProsusAI/finbert", | |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32 | |
) | |
logger.info("Models loaded successfully") | |
except Exception as e: | |
logger.error(f"Error loading models: {str(e)}") | |
raise | |
def extract_and_analyze(self, statement_text, statement_type): | |
"""Extract information from financial statement text""" | |
try: | |
prompt = f"""[INST] As a financial analyst, analyze this {statement_type}: | |
{statement_text} | |
Extract and summarize: | |
1. Key financial numbers for 2025 | |
2. Notable trends | |
3. Important metrics | |
Focus on the most recent year (2025) and key financial indicators. | |
[/INST]""" | |
response = self.analysis_model( | |
prompt, | |
max_new_tokens=300, | |
temperature=0.3, | |
num_return_sequences=1, | |
truncation=True | |
) | |
return response[0]['generated_text'] | |
except Exception as e: | |
logger.error(f"Error extracting data from {statement_type}: {str(e)}") | |
raise | |
def analyze_financials(self, income_text, balance_text): | |
try: | |
# First, extract key information from each statement | |
logger.info("Analyzing Income Statement...") | |
income_analysis = self.extract_and_analyze(income_text, "Income Statement") | |
logger.info("Analyzing Balance Sheet...") | |
balance_analysis = self.extract_and_analyze(balance_text, "Balance Sheet") | |
# Combine the analyses | |
combined_analysis = f"""Income Statement Analysis: | |
{income_analysis} | |
Balance Sheet Analysis: | |
{balance_analysis}""" | |
# Get sentiment | |
sentiment = self.sentiment_model( | |
combined_analysis[:512], | |
truncation=True | |
)[0] | |
# Generate final analysis | |
final_prompt = f"""[INST] Based on this financial analysis: | |
{combined_analysis} | |
Market Sentiment: {sentiment['label']} ({sentiment['score']:.2%}) | |
Provide a concise analysis with: | |
1. Overall Financial Health (2-3 key points) | |
2. Main Business Insights (2-3 insights) | |
3. Key Recommendations (2-3 recommendations) | |
[/INST]""" | |
final_response = self.analysis_model( | |
final_prompt, | |
max_new_tokens=500, | |
temperature=0.7, | |
num_return_sequences=1, | |
truncation=True | |
) | |
return self.format_response(final_response[0]['generated_text'], sentiment, combined_analysis) | |
except Exception as e: | |
logger.error(f"Analysis error: {str(e)}") | |
return f"Error in analysis: {str(e)}" | |
def format_response(self, analysis_text, sentiment, raw_analysis): | |
try: | |
sections = [ | |
"# Financial Analysis Report\n\n", | |
f"## Market Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n", | |
"## Extracted Financial Data\n```\n", | |
raw_analysis, | |
"\n```\n\n", | |
"## Analysis\n\n" | |
] | |
for line in analysis_text.split('\n'): | |
line = line.strip() | |
if not line: | |
continue | |
if any(header in line for header in ["Financial Health", "Business Insights", "Recommendations"]): | |
sections.append(f"\n### {line}\n") | |
elif line: | |
if not line.startswith('-'): | |
line = f"- {line}" | |
sections.append(f"{line}\n") | |
return "".join(sections) | |
except Exception as e: | |
logger.error(f"Error formatting response: {str(e)}") | |
return "Error formatting analysis" | |
def analyze_statements(income_statement, balance_sheet): | |
try: | |
if not income_statement or not balance_sheet: | |
return "Please upload both financial statements." | |
logger.info("Reading financial statements...") | |
# Read files as text | |
income_df = pd.read_csv(income_statement) | |
balance_df = pd.read_csv(balance_sheet) | |
# Convert to string while preserving format | |
income_text = income_df.to_string(index=False) | |
balance_text = balance_df.to_string(index=False) | |
logger.info("Initializing analysis...") | |
analyzer = FinancialAnalyzer() | |
result = analyzer.analyze_financials(income_text, balance_text) | |
if DEVICE == "cuda": | |
torch.cuda.empty_cache() | |
return result | |
except Exception as e: | |
logger.error(f"Error: {str(e)}") | |
return f"""Analysis Error: {str(e)} | |
Please check: | |
1. Files are readable CSV files | |
2. Files contain financial data | |
3. Files are not corrupted""" | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=analyze_statements, | |
inputs=[ | |
gr.File(label="Income Statement (CSV)", file_types=[".csv"]), | |
gr.File(label="Balance Sheet (CSV)", file_types=[".csv"]) | |
], | |
outputs=gr.Markdown(), | |
title="AI Financial Statement Analyzer", | |
description="""Upload your financial statements for AI analysis. | |
The model will extract and analyze key financial information automatically.""", | |
theme="default", | |
flagging_mode="never" | |
) | |
# Launch | |
if __name__ == "__main__": | |
iface.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=False | |
) |