walaa2022 commited on
Commit
9bdd84e
·
verified ·
1 Parent(s): f5ea3f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -99
app.py CHANGED
@@ -1,33 +1,38 @@
1
  import gradio as gr
2
- import torch
3
  from transformers import (
4
- AutoModelForCausalLM,
5
  AutoTokenizer,
 
6
  AutoModelForSequenceClassification,
7
  T5ForConditionalGeneration,
8
  T5Tokenizer
9
  )
 
10
  import pandas as pd
11
- import numpy as np
12
- import io
13
  import json
 
14
 
15
  class FinancialAnalyzer:
16
  def __init__(self):
17
- # Initialize models and tokenizers
18
  print("Loading models...")
19
- self.tiny_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat")
20
- self.tiny_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat")
21
-
22
- self.finbert_tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
23
- self.finbert_model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")
24
-
25
- self.t5_tokenizer = T5Tokenizer.from_pretrained("t5-base")
26
- self.t5_model = T5ForConditionalGeneration.from_pretrained("t5-base")
27
-
28
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29
- self._move_models_to_device()
30
- print("Models loaded successfully!")
 
 
 
 
 
 
 
31
 
32
  def _move_models_to_device(self):
33
  self.tiny_model.to(self.device)
@@ -35,43 +40,42 @@ class FinancialAnalyzer:
35
  self.t5_model.to(self.device)
36
 
37
  def process_file(self, file, file_type):
38
- """Process uploaded file based on its type"""
39
- if file_type == "csv":
40
- df = pd.read_csv(file)
41
- return df.to_string()
42
- elif file_type == "excel":
43
- df = pd.read_excel(file)
44
- return df.to_string()
45
- elif file_type == "markdown":
46
- return file.read().decode('utf-8')
47
- else:
48
- raise ValueError(f"Unsupported file type: {file_type}")
 
49
 
50
  def analyze_financials(self, balance_sheet_file, income_statement_file, file_type="csv"):
51
- """Main analysis function for Gradio interface"""
52
  try:
53
  # Process uploaded files
54
  balance_sheet_data = self.process_file(balance_sheet_file, file_type)
55
  income_statement_data = self.process_file(income_statement_file, file_type)
56
 
 
 
 
57
  # Generate insights using TinyLlama
58
- insights = self.generate_insights(balance_sheet_data, income_statement_data)
59
 
60
- # Generate sentiment analysis using FinBERT
61
  sentiment = self.analyze_sentiment(balance_sheet_data, income_statement_data)
62
 
63
- # Generate recommendations using T5
64
- recommendations = self.generate_recommendations(balance_sheet_data, income_statement_data)
65
-
66
- # Generate roadmap
67
- roadmap = self.generate_roadmap(insights, sentiment, recommendations)
68
 
69
  # Combine results
70
  analysis_results = {
71
  "Financial Insights": insights,
72
  "Sentiment Analysis": sentiment,
73
- "Recommendations": recommendations,
74
- "Strategic Roadmap": roadmap
75
  }
76
 
77
  return json.dumps(analysis_results, indent=2)
@@ -79,67 +83,72 @@ class FinancialAnalyzer:
79
  except Exception as e:
80
  return f"Error during analysis: {str(e)}"
81
 
82
- def generate_insights(self, balance_sheet, income_statement):
83
- prompt = f"""Analyze these financial statements and provide key insights:
84
- Balance Sheet:
85
- {balance_sheet[:1000]}
86
-
87
- Income Statement:
88
- {income_statement[:1000]}
89
- """
90
-
91
- inputs = self.tiny_tokenizer(prompt, return_tensors="pt").to(self.device)
92
- outputs = self.tiny_model.generate(
93
- inputs["input_ids"],
94
- max_length=500,
95
- temperature=0.7
96
- )
97
- return self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  def analyze_sentiment(self, balance_sheet, income_statement):
100
- financial_text = f"{balance_sheet[:500]}\n{income_statement[:500]}"
101
- inputs = self.finbert_tokenizer(financial_text, return_tensors="pt").to(self.device)
102
- outputs = self.finbert_model(**inputs)
103
- probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
104
- sentiment_labels = ['negative', 'neutral', 'positive']
105
-
106
- return {
107
- 'sentiment': sentiment_labels[probabilities.argmax().item()],
108
- 'confidence': f"{probabilities.max().item():.2f}"
109
- }
110
-
111
- def generate_recommendations(self, balance_sheet, income_statement):
112
- prompt = f"generate financial recommendations based on: {balance_sheet[:200]} {income_statement[:200]}"
113
- inputs = self.t5_tokenizer(prompt, return_tensors="pt").to(self.device)
114
- outputs = self.t5_model.generate(inputs["input_ids"], max_length=200)
115
- return self.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
116
-
117
- def generate_roadmap(self, insights, sentiment, recommendations):
118
- return {
119
- "Short-term Actions (0-12 months)": self._generate_short_term_actions(insights, sentiment),
120
- "Medium-term Strategy (1-2 years)": self._generate_medium_term_strategy(recommendations),
121
- "Long-term Vision (3-5 years)": self._generate_long_term_vision(insights, recommendations)
122
- }
123
-
124
- def _generate_short_term_actions(self, insights, sentiment):
125
- prompt = f"Generate short-term actions based on: {insights[:100]} Sentiment: {sentiment}"
126
- inputs = self.t5_tokenizer(prompt, return_tensors="pt").to(self.device)
127
- outputs = self.t5_model.generate(inputs["input_ids"], max_length=100)
128
- return self.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
129
-
130
- def _generate_medium_term_strategy(self, recommendations):
131
- prompt = f"Generate medium-term strategy based on: {recommendations}"
132
- inputs = self.t5_tokenizer(prompt, return_tensors="pt").to(self.device)
133
- outputs = self.t5_model.generate(inputs["input_ids"], max_length=100)
134
- return self.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
135
-
136
- def _generate_long_term_vision(self, insights, recommendations):
137
- prompt = f"Generate long-term vision based on: {insights[:100]} {recommendations[:100]}"
138
- inputs = self.t5_tokenizer(prompt, return_tensors="pt").to(self.device)
139
- outputs = self.t5_model.generate(inputs["input_ids"], max_length=100)
140
- return self.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
141
-
142
- # Create Gradio interface
143
  def create_gradio_interface():
144
  analyzer = FinancialAnalyzer()
145
 
@@ -159,14 +168,13 @@ def create_gradio_interface():
159
  ],
160
  outputs=gr.Textbox(label="Analysis Results", lines=20),
161
  title="Financial Statement Analyzer",
162
- description="Upload your financial statements (Balance Sheet and Income Statement) to get AI-powered insights, recommendations, and strategic roadmap.",
163
  examples=[
164
  ["balance_sheet.csv", "income_statement.csv", "csv"],
165
  ["balance_sheet.xlsx", "income_statement.xlsx", "excel"],
166
  ["balance_sheet.md", "income_statement.md", "markdown"]
167
  ]
168
  )
169
-
170
  return iface
171
 
172
  if __name__ == "__main__":
 
1
  import gradio as gr
 
2
  from transformers import (
 
3
  AutoTokenizer,
4
+ AutoModelForCausalLM,
5
  AutoModelForSequenceClassification,
6
  T5ForConditionalGeneration,
7
  T5Tokenizer
8
  )
9
+ import torch
10
  import pandas as pd
 
 
11
  import json
12
+ from huggingface_hub import login
13
 
14
  class FinancialAnalyzer:
15
  def __init__(self):
 
16
  print("Loading models...")
17
+ try:
18
+ # Initialize TinyLlama with correct path
19
+ self.tiny_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
20
+ self.tiny_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
21
+
22
+ # Initialize FinBERT
23
+ self.finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
24
+ self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
25
+
26
+ # Initialize T5
27
+ self.t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
28
+ self.t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
29
+
30
+ self.device = "cpu" # Force CPU usage for stability
31
+ self._move_models_to_device()
32
+ print("Models loaded successfully!")
33
+ except Exception as e:
34
+ print(f"Error loading models: {str(e)}")
35
+ raise
36
 
37
  def _move_models_to_device(self):
38
  self.tiny_model.to(self.device)
 
40
  self.t5_model.to(self.device)
41
 
42
  def process_file(self, file, file_type):
43
+ try:
44
+ if file_type == "csv":
45
+ df = pd.read_csv(file.name)
46
+ return df.to_string()
47
+ elif file_type == "excel":
48
+ df = pd.read_excel(file.name)
49
+ return df.to_string()
50
+ elif file_type == "markdown":
51
+ with open(file.name, 'r') as f:
52
+ return f.read()
53
+ except Exception as e:
54
+ return f"Error processing file: {str(e)}"
55
 
56
  def analyze_financials(self, balance_sheet_file, income_statement_file, file_type="csv"):
 
57
  try:
58
  # Process uploaded files
59
  balance_sheet_data = self.process_file(balance_sheet_file, file_type)
60
  income_statement_data = self.process_file(income_statement_file, file_type)
61
 
62
+ # Format the prompt for TinyLlama
63
+ prompt = self.format_financial_prompt(balance_sheet_data, income_statement_data)
64
+
65
  # Generate insights using TinyLlama
66
+ insights = self.generate_insights(prompt)
67
 
68
+ # Generate sentiment analysis
69
  sentiment = self.analyze_sentiment(balance_sheet_data, income_statement_data)
70
 
71
+ # Generate recommendations
72
+ recommendations = self.generate_recommendations(insights, sentiment)
 
 
 
73
 
74
  # Combine results
75
  analysis_results = {
76
  "Financial Insights": insights,
77
  "Sentiment Analysis": sentiment,
78
+ "Recommendations": recommendations
 
79
  }
80
 
81
  return json.dumps(analysis_results, indent=2)
 
83
  except Exception as e:
84
  return f"Error during analysis: {str(e)}"
85
 
86
+ def format_financial_prompt(self, balance_sheet, income_statement):
87
+ return f"""<human>Please analyze these financial statements and provide key insights:
88
+
89
+ Balance Sheet Summary:
90
+ {balance_sheet[:1000]}
91
+
92
+ Income Statement Summary:
93
+ {income_statement[:1000]}
94
+
95
+ Please provide:
96
+ 1. Key financial metrics analysis
97
+ 2. Growth trends
98
+ 3. Risk factors
99
+ 4. Areas of concern
100
+ 5. Positive indicators</human>
101
+
102
+ <assistant>I'll analyze the financial statements and provide comprehensive insights:"""
103
+
104
+ def generate_insights(self, prompt):
105
+ try:
106
+ inputs = self.tiny_tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
107
+ outputs = self.tiny_model.generate(
108
+ inputs["input_ids"],
109
+ max_length=1000,
110
+ temperature=0.7,
111
+ top_p=0.95,
112
+ do_sample=True,
113
+ pad_token_id=self.tiny_tokenizer.eos_token_id
114
+ )
115
+ return self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
116
+ except Exception as e:
117
+ return f"Error generating insights: {str(e)}"
118
 
119
  def analyze_sentiment(self, balance_sheet, income_statement):
120
+ try:
121
+ text = f"{balance_sheet[:500]}\n{income_statement[:500]}"
122
+ inputs = self.finbert_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
123
+ outputs = self.finbert_model(**inputs)
124
+ probs = torch.nn.functional.softmax(outputs.logits, dim=1)
125
+ labels = ['negative', 'neutral', 'positive']
126
+ return {
127
+ 'sentiment': labels[probs.argmax().item()],
128
+ 'confidence': f"{probs.max().item():.2f}",
129
+ 'detailed_scores': {
130
+ label: f"{prob:.2f}"
131
+ for label, prob in zip(labels, probs[0].tolist())
132
+ }
133
+ }
134
+ except Exception as e:
135
+ return f"Error in sentiment analysis: {str(e)}"
136
+
137
+ def generate_recommendations(self, insights, sentiment):
138
+ try:
139
+ prompt = f"summarize financial recommendations based on: {insights[:500]} Financial sentiment: {sentiment}"
140
+ inputs = self.t5_tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
141
+ outputs = self.t5_model.generate(
142
+ inputs["input_ids"],
143
+ max_length=200,
144
+ num_beams=4,
145
+ temperature=0.7,
146
+ top_p=0.95
147
+ )
148
+ return self.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
149
+ except Exception as e:
150
+ return f"Error generating recommendations: {str(e)}"
151
+
 
 
 
 
 
 
 
 
 
 
 
152
  def create_gradio_interface():
153
  analyzer = FinancialAnalyzer()
154
 
 
168
  ],
169
  outputs=gr.Textbox(label="Analysis Results", lines=20),
170
  title="Financial Statement Analyzer",
171
+ description="Upload your financial statements to get AI-powered insights and recommendations.",
172
  examples=[
173
  ["balance_sheet.csv", "income_statement.csv", "csv"],
174
  ["balance_sheet.xlsx", "income_statement.xlsx", "excel"],
175
  ["balance_sheet.md", "income_statement.md", "markdown"]
176
  ]
177
  )
 
178
  return iface
179
 
180
  if __name__ == "__main__":