Spaces:

walaa2022
/

financial_analysis

Sleeping

App Files Files Community

walaa2022 commited on Nov 30, 2024

Commit

9bdd84e

verified ·

1 Parent(s): f5ea3f2

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -99

app.py CHANGED Viewed

@@ -1,33 +1,38 @@
 import gradio as gr
-import torch
 from transformers import (
-    AutoModelForCausalLM,
     AutoTokenizer,
     AutoModelForSequenceClassification,
     T5ForConditionalGeneration,
     T5Tokenizer
 )
 import pandas as pd
-import numpy as np
-import io
 import json
 class FinancialAnalyzer:
     def __init__(self):
-        # Initialize models and tokenizers
         print("Loading models...")
-        self.tiny_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat")
-        self.tiny_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat")
-        self.finbert_tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
-        self.finbert_model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")
-        self.t5_tokenizer = T5Tokenizer.from_pretrained("t5-base")
-        self.t5_model = T5ForConditionalGeneration.from_pretrained("t5-base")
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self._move_models_to_device()
-        print("Models loaded successfully!")
     def _move_models_to_device(self):
         self.tiny_model.to(self.device)
@@ -35,43 +40,42 @@ class FinancialAnalyzer:
         self.t5_model.to(self.device)
     def process_file(self, file, file_type):
-        """Process uploaded file based on its type"""
-        if file_type == "csv":
-            df = pd.read_csv(file)
-            return df.to_string()
-        elif file_type == "excel":
-            df = pd.read_excel(file)
-            return df.to_string()
-        elif file_type == "markdown":
-            return file.read().decode('utf-8')
-        else:
-            raise ValueError(f"Unsupported file type: {file_type}")
     def analyze_financials(self, balance_sheet_file, income_statement_file, file_type="csv"):
-        """Main analysis function for Gradio interface"""
         try:
             # Process uploaded files
             balance_sheet_data = self.process_file(balance_sheet_file, file_type)
             income_statement_data = self.process_file(income_statement_file, file_type)
             # Generate insights using TinyLlama
-            insights = self.generate_insights(balance_sheet_data, income_statement_data)
-            # Generate sentiment analysis using FinBERT
             sentiment = self.analyze_sentiment(balance_sheet_data, income_statement_data)
-            # Generate recommendations using T5
-            recommendations = self.generate_recommendations(balance_sheet_data, income_statement_data)
-            # Generate roadmap
-            roadmap = self.generate_roadmap(insights, sentiment, recommendations)
             # Combine results
             analysis_results = {
                 "Financial Insights": insights,
                 "Sentiment Analysis": sentiment,
-                "Recommendations": recommendations,
-                "Strategic Roadmap": roadmap
             }
             return json.dumps(analysis_results, indent=2)
@@ -79,67 +83,72 @@ class FinancialAnalyzer:
         except Exception as e:
             return f"Error during analysis: {str(e)}"
-    def generate_insights(self, balance_sheet, income_statement):
-        prompt = f"""Analyze these financial statements and provide key insights:
-        Balance Sheet:
-        {balance_sheet[:1000]}
-        Income Statement:
-        {income_statement[:1000]}
-        """
-        inputs = self.tiny_tokenizer(prompt, return_tensors="pt").to(self.device)
-        outputs = self.tiny_model.generate(
-            inputs["input_ids"],
-            max_length=500,
-            temperature=0.7
-        )
-        return self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
     def analyze_sentiment(self, balance_sheet, income_statement):
-        financial_text = f"{balance_sheet[:500]}\n{income_statement[:500]}"
-        inputs = self.finbert_tokenizer(financial_text, return_tensors="pt").to(self.device)
-        outputs = self.finbert_model(**inputs)
-        probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
-        sentiment_labels = ['negative', 'neutral', 'positive']
-        return {
-            'sentiment': sentiment_labels[probabilities.argmax().item()],
-            'confidence': f"{probabilities.max().item():.2f}"
-        }
-    def generate_recommendations(self, balance_sheet, income_statement):
-        prompt = f"generate financial recommendations based on: {balance_sheet[:200]} {income_statement[:200]}"
-        inputs = self.t5_tokenizer(prompt, return_tensors="pt").to(self.device)
-        outputs = self.t5_model.generate(inputs["input_ids"], max_length=200)
-        return self.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    def generate_roadmap(self, insights, sentiment, recommendations):
-        return {
-            "Short-term Actions (0-12 months)": self._generate_short_term_actions(insights, sentiment),
-            "Medium-term Strategy (1-2 years)": self._generate_medium_term_strategy(recommendations),
-            "Long-term Vision (3-5 years)": self._generate_long_term_vision(insights, recommendations)
-        }
-    def _generate_short_term_actions(self, insights, sentiment):
-        prompt = f"Generate short-term actions based on: {insights[:100]} Sentiment: {sentiment}"
-        inputs = self.t5_tokenizer(prompt, return_tensors="pt").to(self.device)
-        outputs = self.t5_model.generate(inputs["input_ids"], max_length=100)
-        return self.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    def _generate_medium_term_strategy(self, recommendations):
-        prompt = f"Generate medium-term strategy based on: {recommendations}"
-        inputs = self.t5_tokenizer(prompt, return_tensors="pt").to(self.device)
-        outputs = self.t5_model.generate(inputs["input_ids"], max_length=100)
-        return self.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    def _generate_long_term_vision(self, insights, recommendations):
-        prompt = f"Generate long-term vision based on: {insights[:100]} {recommendations[:100]}"
-        inputs = self.t5_tokenizer(prompt, return_tensors="pt").to(self.device)
-        outputs = self.t5_model.generate(inputs["input_ids"], max_length=100)
-        return self.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Create Gradio interface
 def create_gradio_interface():
     analyzer = FinancialAnalyzer()
@@ -159,14 +168,13 @@ def create_gradio_interface():
         ],
         outputs=gr.Textbox(label="Analysis Results", lines=20),
         title="Financial Statement Analyzer",
-        description="Upload your financial statements (Balance Sheet and Income Statement) to get AI-powered insights, recommendations, and strategic roadmap.",
         examples=[
             ["balance_sheet.csv", "income_statement.csv", "csv"],
             ["balance_sheet.xlsx", "income_statement.xlsx", "excel"],
             ["balance_sheet.md", "income_statement.md", "markdown"]
         ]
     )
     return iface
 if __name__ == "__main__":

 import gradio as gr
 from transformers import (
     AutoTokenizer,
+    AutoModelForCausalLM,
     AutoModelForSequenceClassification,
     T5ForConditionalGeneration,
     T5Tokenizer
 )
+import torch
 import pandas as pd
 import json
+from huggingface_hub import login
 class FinancialAnalyzer:
     def __init__(self):
         print("Loading models...")
+        try:
+            # Initialize TinyLlama with correct path
+            self.tiny_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+            self.tiny_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+            # Initialize FinBERT
+            self.finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
+            self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
+            # Initialize T5
+            self.t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
+            self.t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
+            self.device = "cpu"  # Force CPU usage for stability
+            self._move_models_to_device()
+            print("Models loaded successfully!")
+        except Exception as e:
+            print(f"Error loading models: {str(e)}")
+            raise
     def _move_models_to_device(self):
         self.tiny_model.to(self.device)
         self.t5_model.to(self.device)
     def process_file(self, file, file_type):
+        try:
+            if file_type == "csv":
+                df = pd.read_csv(file.name)
+                return df.to_string()
+            elif file_type == "excel":
+                df = pd.read_excel(file.name)
+                return df.to_string()
+            elif file_type == "markdown":
+                with open(file.name, 'r') as f:
+                    return f.read()
+        except Exception as e:
+            return f"Error processing file: {str(e)}"
     def analyze_financials(self, balance_sheet_file, income_statement_file, file_type="csv"):
         try:
             # Process uploaded files
             balance_sheet_data = self.process_file(balance_sheet_file, file_type)
             income_statement_data = self.process_file(income_statement_file, file_type)
+            # Format the prompt for TinyLlama
+            prompt = self.format_financial_prompt(balance_sheet_data, income_statement_data)
             # Generate insights using TinyLlama
+            insights = self.generate_insights(prompt)
+            # Generate sentiment analysis
             sentiment = self.analyze_sentiment(balance_sheet_data, income_statement_data)
+            # Generate recommendations
+            recommendations = self.generate_recommendations(insights, sentiment)
             # Combine results
             analysis_results = {
                 "Financial Insights": insights,
                 "Sentiment Analysis": sentiment,
+                "Recommendations": recommendations
             }
             return json.dumps(analysis_results, indent=2)
         except Exception as e:
             return f"Error during analysis: {str(e)}"
+    def format_financial_prompt(self, balance_sheet, income_statement):
+        return f"""<human>Please analyze these financial statements and provide key insights:
+Balance Sheet Summary:
+{balance_sheet[:1000]}
+Income Statement Summary:
+{income_statement[:1000]}
+Please provide:
+1. Key financial metrics analysis
+2. Growth trends
+3. Risk factors
+4. Areas of concern
+5. Positive indicators</human>
+<assistant>I'll analyze the financial statements and provide comprehensive insights:"""
+    def generate_insights(self, prompt):
+        try:
+            inputs = self.tiny_tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
+            outputs = self.tiny_model.generate(
+                inputs["input_ids"],
+                max_length=1000,
+                temperature=0.7,
+                top_p=0.95,
+                do_sample=True,
+                pad_token_id=self.tiny_tokenizer.eos_token_id
+            )
+            return self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
+        except Exception as e:
+            return f"Error generating insights: {str(e)}"
     def analyze_sentiment(self, balance_sheet, income_statement):
+        try:
+            text = f"{balance_sheet[:500]}\n{income_statement[:500]}"
+            inputs = self.finbert_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
+            outputs = self.finbert_model(**inputs)
+            probs = torch.nn.functional.softmax(outputs.logits, dim=1)
+            labels = ['negative', 'neutral', 'positive']
+            return {
+                'sentiment': labels[probs.argmax().item()],
+                'confidence': f"{probs.max().item():.2f}",
+                'detailed_scores': {
+                    label: f"{prob:.2f}"
+                    for label, prob in zip(labels, probs[0].tolist())
+                }
+            }
+        except Exception as e:
+            return f"Error in sentiment analysis: {str(e)}"
+    def generate_recommendations(self, insights, sentiment):
+        try:
+            prompt = f"summarize financial recommendations based on: {insights[:500]} Financial sentiment: {sentiment}"
+            inputs = self.t5_tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
+            outputs = self.t5_model.generate(
+                inputs["input_ids"],
+                max_length=200,
+                num_beams=4,
+                temperature=0.7,
+                top_p=0.95
+            )
+            return self.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
+        except Exception as e:
+            return f"Error generating recommendations: {str(e)}"
 def create_gradio_interface():
     analyzer = FinancialAnalyzer()
         ],
         outputs=gr.Textbox(label="Analysis Results", lines=20),
         title="Financial Statement Analyzer",
+        description="Upload your financial statements to get AI-powered insights and recommendations.",
         examples=[
             ["balance_sheet.csv", "income_statement.csv", "csv"],
             ["balance_sheet.xlsx", "income_statement.xlsx", "excel"],
             ["balance_sheet.md", "income_statement.md", "markdown"]
         ]
     )
     return iface
 if __name__ == "__main__":