walaa2022 commited on
Commit
eac8dde
·
verified ·
1 Parent(s): 2ac4fcc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +229 -137
app.py CHANGED
@@ -1,193 +1,284 @@
1
  import os
2
  import gradio as gr
3
  import pandas as pd
4
- from transformers import pipeline
5
  import torch
6
- import sys
7
  import logging
8
- import io
9
- from huggingface_hub import login
10
- from dotenv import load_dotenv
11
 
12
- # Load environment variables
13
- load_dotenv()
14
- import logging
15
-
16
- # Set up logging
17
- logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
19
 
20
- # Get token securely from environment variable
21
- hf_token = os.getenv('HUGGINGFACE_TOKEN')
 
22
 
 
 
 
 
 
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- # Check if the token is available
26
- if hf_token:
27
- # Log in to Hugging Face Hub
28
- login(token=hf_token)
29
- print("Successfully logged in to Hugging Face Hub.")
30
- else:
31
- print("HF_TOKEN environment variable not found. Please set it in the Space settings.")
 
 
 
 
 
 
 
 
32
 
 
 
 
33
 
34
  class FinancialAnalyzer:
 
 
35
  def __init__(self):
36
- """Initialize models with error handling"""
 
 
 
 
 
 
 
37
  try:
38
- # 1. Llama 2 for strategic analysis
39
- self.strategic_analyzer = pipeline(
40
- "text-generation",
41
- model="meta-llama/Llama-3.2-1B",
42
- device_map="auto"
43
- )
44
- logger.info("Llama 3 initialized successfully")
45
-
46
- # 2. FinBERT for financial sentiment
47
- self.financial_analyzer = pipeline(
48
- "text-classification",
49
- model="ProsusAI/finbert",
50
- top_k= None
51
- )
52
- logger.info("FinBERT initialized successfully")
53
-
54
- # 3. Falcon for recommendations
55
- self.recommendation_generator = pipeline(
56
- "text-generation",
57
- model="tiiuae/falcon-7b-instruct",
58
- device_map="auto"
59
- )
60
- logger.info("Falcon initialized successfully")
61
-
62
  except Exception as e:
63
- logger.error(f"Error initializing models: {str(e)}")
64
  raise
65
 
66
- def read_csv_file(self, file_obj):
67
- """Safely read CSV file"""
68
  try:
69
  if file_obj is None:
70
  raise ValueError("No file provided")
71
- return pd.read_csv(file_obj)
 
 
 
 
 
 
72
  except Exception as e:
73
- logger.error(f"Error reading CSV file: {str(e)}")
74
  raise
75
 
76
- def generate_strategic_analysis(self, financial_data):
77
- """Generate strategic analysis using Llama 3"""
 
78
  try:
79
- prompt = f"""[INST] As a senior financial analyst, analyze these financial statements:
80
- Financial Data:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  {financial_data}
82
  Provide:
83
  1. Business Health Assessment
84
  2. Key Strategic Insights
85
- 3. Market Position Analysis
86
  4. Growth Opportunities
87
  5. Risk Factors [/INST]"""
88
-
89
- response = self.strategic_analyzer(
90
  prompt,
91
- max_length=1500,
92
- temperature=0.7
 
 
 
93
  )
 
94
  return response[0]['generated_text']
95
  except Exception as e:
96
- logger.error(f"Error in strategic analysis: {str(e)}")
97
- return "Error generating strategic analysis"
98
-
99
- def analyze_sentiment(self, text):
100
- """Analyze financial sentiment using FinBERT"""
101
- try:
102
- return self.financial_analyzer(text)
103
- except Exception as e:
104
- logger.error(f"Error in sentiment analysis: {str(e)}")
105
- return [{"label": "error", "score": 1.0}]
106
-
107
  def generate_recommendations(self, analysis):
108
- """Generate recommendations using Falcon"""
109
  try:
110
- prompt = f"""Based on this financial analysis:
 
 
 
111
  {analysis}
112
 
113
- Provide specific, actionable recommendations covering:
114
  1. Strategic Initiatives
115
  2. Operational Improvements
116
  3. Financial Management
117
  4. Risk Mitigation
118
  5. Growth Strategy"""
119
-
120
- response = self.recommendation_generator(
121
  prompt,
122
- max_length=1500,
123
- temperature=0.6
 
 
 
124
  )
 
125
  return response[0]['generated_text']
126
  except Exception as e:
127
- logger.error(f"Error generating recommendations: {str(e)}")
128
  return "Error generating recommendations"
 
 
 
 
129
 
130
  def analyze_financial_statements(income_statement, balance_sheet):
131
- """Main analysis function with error handling"""
132
  try:
133
- # Initialize analyzer
134
  analyzer = FinancialAnalyzer()
135
-
136
- # Read CSV files safely
137
- logger.info("Reading input files...")
138
- income_df = analyzer.read_csv_file(income_statement)
139
- balance_df = analyzer.read_csv_file(balance_sheet)
140
-
141
- # Prepare financial data
 
 
 
142
  financial_data = f"""
143
  Income Statement Summary:
144
- {income_df.to_string()}
145
 
146
  Balance Sheet Summary:
147
- {balance_df.to_string()}
148
  """
149
-
150
- # Generate analyses
151
  logger.info("Generating analysis...")
152
- strategic_analysis = analyzer.generate_strategic_analysis(financial_data)
153
- sentiment = analyzer.analyze_sentiment(strategic_analysis)
154
- recommendations = analyzer.generate_recommendations(strategic_analysis)
155
-
156
- # Format output
157
- logger.info("Formatting results...")
158
- return format_results(strategic_analysis, sentiment, recommendations)
159
-
 
 
 
 
 
160
  except Exception as e:
161
- logger.error(f"Error in analysis: {str(e)}")
162
- return f"""Error analyzing files: {str(e)}
 
 
163
 
164
- Please check:
165
- Files are in correct CSV format & contain the expected data
166
- If the problem persists, try uploading the files again."""
 
167
 
168
  def format_results(analysis, sentiment, recommendations):
169
  """Format analysis results"""
170
  try:
171
- output = "# Financial Analysis Report\n\n"
172
-
173
- # Strategic Analysis
174
- output += "## Strategic Analysis\n\n"
175
- output += analysis + "\n\n"
176
-
177
- # Sentiment Analysis
178
- output += "## Market Sentiment\n\n"
179
- for score in sentiment[0]:
180
- output += f"- {score['label']}: {score['score']:.2%}\n"
181
- output += "\n"
182
-
183
- # Recommendations
184
- output += "## Strategic Recommendations\n\n"
185
- output += recommendations
186
-
187
- return output
 
 
 
188
  except Exception as e:
189
- logger.error(f"Error formatting results: {str(e)}")
190
- return "Error formatting analysis results"
191
 
192
  # Create Gradio interface
193
  iface = gr.Interface(
@@ -197,23 +288,24 @@ iface = gr.Interface(
197
  gr.File(label="Balance Sheet (CSV)")
198
  ],
199
  outputs=gr.Markdown(),
200
- title="AI-Powered Financial Statement Analysis",
201
- description="""Upload your financial statements for comprehensive analysis using:
202
- - Llama 3: Strategic Analysis
203
- - FinBERT: Financial Sentiment Analysis
204
- - Falcon: Strategic Recommendations""",
205
- examples=[
206
- [
207
- "OFINTECH-Income Statement-template.csv",
208
- "OFINTECH Balance Sheet template.csv"
209
- ]
210
- ]
211
  )
212
 
213
- # Launch the interface
214
  if __name__ == "__main__":
215
  try:
216
- iface.launch()
 
 
 
 
 
217
  except Exception as e:
218
- logger.error(f"Error launching application: {str(e)}")
219
  sys.exit(1)
 
1
  import os
2
  import gradio as gr
3
  import pandas as pd
 
4
  import torch
 
5
  import logging
6
+ from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
7
+ import gc
 
8
 
9
+ # Setup logging
10
+ logging.basicConfig(
11
+ level=logging.INFO,
12
+ format='%(asctime)s - %(levelname)s - %(message)s'
13
+ )
 
14
  logger = logging.getLogger(__name__)
15
 
16
+ # Device configuration
17
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
18
+ logger.info(f"Using device: {DEVICE}")
19
 
20
+ def clear_gpu_memory():
21
+ """Utility function to clear GPU memory"""
22
+ if DEVICE == "cuda":
23
+ torch.cuda.empty_cache()
24
+ gc.collect()
25
 
26
+ class ModelManager:
27
+ """Handles model loading and inference"""
28
+
29
+ def __init__(self):
30
+ self.device = DEVICE
31
+ self.models = {}
32
+ self.tokenizers = {}
33
+
34
+ def load_model(self, model_name, model_type="sentiment"):
35
+ """Load model and tokenizer"""
36
+ try:
37
+ if model_name not in self.models:
38
+ if model_type == "sentiment":
39
+ self.tokenizers[model_name] = AutoTokenizer.from_pretrained(model_name)
40
+ self.models[model_name] = AutoModelForSequenceClassification.from_pretrained(
41
+ model_name,
42
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
43
+ ).to(self.device)
44
+ else:
45
+ self.models[model_name] = pipeline(
46
+ "text-generation",
47
+ model=model_name,
48
+ device_map="auto" if self.device == "cuda" else None,
49
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
50
+ )
51
+ logger.info(f"Loaded model: {model_name}")
52
+ except Exception as e:
53
+ logger.error(f"Error loading model {model_name}: {str(e)}")
54
+ raise
55
 
56
+ def unload_model(self, model_name):
57
+ """Unload model and tokenizer"""
58
+ try:
59
+ if model_name in self.models:
60
+ del self.models[model_name]
61
+ if model_name in self.tokenizers:
62
+ del self.tokenizers[model_name]
63
+ clear_gpu_memory()
64
+ logger.info(f"Unloaded model: {model_name}")
65
+ except Exception as e:
66
+ logger.error(f"Error unloading model {model_name}: {str(e)}")
67
+
68
+ def get_model(self, model_name):
69
+ """Get loaded model"""
70
+ return self.models.get(model_name)
71
 
72
+ def get_tokenizer(self, model_name):
73
+ """Get loaded tokenizer"""
74
+ return self.tokenizers.get(model_name)
75
 
76
  class FinancialAnalyzer:
77
+ """Main analyzer class for financial statements"""
78
+
79
  def __init__(self):
80
+ self.model_manager = ModelManager()
81
+ self.models = {
82
+ "sentiment": "ProsusAI/finbert",
83
+ "analysis": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
84
+ "recommendation": "tiiuae/falcon-rw-1b"
85
+ }
86
+
87
+ # Load sentiment model at initialization
88
  try:
89
+ self.model_manager.load_model(self.models["sentiment"], "sentiment")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  except Exception as e:
91
+ logger.error(f"Failed to initialize sentiment model: {str(e)}")
92
  raise
93
 
94
+ def read_csv(self, file_obj):
95
+ """Read and validate CSV file"""
96
  try:
97
  if file_obj is None:
98
  raise ValueError("No file provided")
99
+
100
+ df = pd.read_csv(file_obj)
101
+
102
+ if df.empty:
103
+ raise ValueError("Empty CSV file")
104
+
105
+ return df.describe()
106
  except Exception as e:
107
+ logger.error(f"Error reading CSV: {str(e)}")
108
  raise
109
 
110
+
111
+ def analyze_sentiment(self, text):
112
+ """Analyze sentiment using FinBERT"""
113
  try:
114
+ model_name = self.models["sentiment"]
115
+ model = self.model_manager.get_model(model_name)
116
+ tokenizer = self.model_manager.get_tokenizer(model_name)
117
+
118
+ inputs = tokenizer(
119
+ text,
120
+ return_tensors="pt",
121
+ truncation=True,
122
+ max_length=512,
123
+ padding=True
124
+ ).to(DEVICE)
125
+
126
+ with torch.no_grad():
127
+ outputs = model(**inputs)
128
+ probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
129
+
130
+ labels = ['negative', 'neutral', 'positive']
131
+ scores = probabilities[0].cpu().tolist()
132
+
133
+ results = [
134
+ {'label': label, 'score': score}
135
+ for label, score in zip(labels, scores)
136
+ ]
137
+
138
+ return [results]
139
+ except Exception as e:
140
+ logger.error(f"Sentiment analysis error: {str(e)}")
141
+ return [{"label": "error", "score": 1.0}]
142
+
143
+ def generate_analysis(self, financial_data):
144
+ """Generate strategic analysis"""
145
+ try:
146
+ model_name = self.models["analysis"]
147
+ self.model_manager.load_model(model_name, "generation")
148
+
149
+ prompt = f"""[INST] Analyze these financial statements:
150
  {financial_data}
151
  Provide:
152
  1. Business Health Assessment
153
  2. Key Strategic Insights
154
+ 3. Market Position
155
  4. Growth Opportunities
156
  5. Risk Factors [/INST]"""
157
+
158
+ response = self.model_manager.get_model(model_name)(
159
  prompt,
160
+ max_length=1000,
161
+ temperature=0.7,
162
+ do_sample=True,
163
+ num_return_sequences=1,
164
+ truncation=True
165
  )
166
+
167
  return response[0]['generated_text']
168
  except Exception as e:
169
+ logger.error(f"Analysis generation error: {str(e)}")
170
+ return "Error in analysis generation"
171
+ finally:
172
+ self.model_manager.unload_model(model_name)
173
+
 
 
 
 
 
 
174
  def generate_recommendations(self, analysis):
175
+ """Generate recommendations"""
176
  try:
177
+ model_name = self.models["recommendation"]
178
+ self.model_manager.load_model(model_name, "generation")
179
+
180
+ prompt = f"""Based on this analysis:
181
  {analysis}
182
 
183
+ Provide actionable recommendations for:
184
  1. Strategic Initiatives
185
  2. Operational Improvements
186
  3. Financial Management
187
  4. Risk Mitigation
188
  5. Growth Strategy"""
189
+
190
+ response = self.model_manager.get_model(model_name)(
191
  prompt,
192
+ max_length=1000,
193
+ temperature=0.6,
194
+ do_sample=True,
195
+ num_return_sequences=1,
196
+ truncation=True
197
  )
198
+
199
  return response[0]['generated_text']
200
  except Exception as e:
201
+ logger.error(f"Recommendations generation error: {str(e)}")
202
  return "Error generating recommendations"
203
+ finally:
204
+ self.model_manager.unload_model(model_name)
205
+
206
+
207
 
208
  def analyze_financial_statements(income_statement, balance_sheet):
209
+ """Main analysis function"""
210
  try:
 
211
  analyzer = FinancialAnalyzer()
212
+
213
+ # Validate inputs
214
+ if not income_statement or not balance_sheet:
215
+ return "Error: Please provide both income statement and balance sheet files"
216
+
217
+ # Process financial statements
218
+ logger.info("Processing financial statements...")
219
+ income_summary = analyzer.read_csv(income_statement)
220
+ balance_summary = analyzer.read_csv(balance_sheet)
221
+
222
  financial_data = f"""
223
  Income Statement Summary:
224
+ {income_summary.to_string()}
225
 
226
  Balance Sheet Summary:
227
+ {balance_summary.to_string()}
228
  """
229
+
230
+ # Generate analysis
231
  logger.info("Generating analysis...")
232
+ analysis = analyzer.generate_analysis(financial_data)
233
+
234
+ # Analyze sentiment
235
+ logger.info("Analyzing sentiment...")
236
+ sentiment = analyzer.analyze_sentiment(analysis)
237
+
238
+ # Generate recommendations
239
+ logger.info("Generating recommendations...")
240
+ recommendations = analyzer.generate_recommendations(analysis)
241
+
242
+ # Format results
243
+ return format_results(analysis, sentiment, recommendations)
244
+
245
  except Exception as e:
246
+ logger.error(f"Analysis error: {str(e)}")
247
+ return f"""Analysis Error:
248
+
249
+ {str(e)}
250
 
251
+ Please verify:
252
+ 1. Files are valid CSV format
253
+ 2. Files contain required financial data
254
+ 3. File size is within limits"""
255
 
256
  def format_results(analysis, sentiment, recommendations):
257
  """Format analysis results"""
258
  try:
259
+ if not isinstance(analysis, str) or not isinstance(recommendations, str):
260
+ raise ValueError("Invalid input types")
261
+
262
+ output = [
263
+ "# Financial Analysis Report\n\n",
264
+ "## Strategic Analysis\n\n",
265
+ f"{analysis.strip()}\n\n",
266
+ "## Market Sentiment\n\n"
267
+ ]
268
+
269
+ if isinstance(sentiment, list) and sentiment:
270
+ for score in sentiment[0]:
271
+ if isinstance(score, dict) and 'label' in score and 'score' in score:
272
+ output.append(f"- {score['label']}: {score['score']:.2%}\n")
273
+ output.append("\n")
274
+
275
+ output.append("## Strategic Recommendations\n\n")
276
+ output.append(f"{recommendations.strip()}")
277
+
278
+ return "".join(output)
279
  except Exception as e:
280
+ logger.error(f"Formatting error: {str(e)}")
281
+ return "Error formatting results"
282
 
283
  # Create Gradio interface
284
  iface = gr.Interface(
 
288
  gr.File(label="Balance Sheet (CSV)")
289
  ],
290
  outputs=gr.Markdown(),
291
+ title="Financial Statement Analyzer",
292
+ description="""Upload financial statements for AI-powered analysis:
293
+ - Strategic Analysis (TinyLlama)
294
+ - Sentiment Analysis (FinBERT)
295
+ - Strategic Recommendations (Falcon)
296
+
297
+ Note: Please ensure files are in CSV format.""",
298
+ flagging_mode="never"
 
 
 
299
  )
300
 
 
301
  if __name__ == "__main__":
302
  try:
303
+ iface.queue()
304
+ iface.launch(
305
+ share=False,
306
+ server_name="0.0.0.0",
307
+ server_port=7860
308
+ )
309
  except Exception as e:
310
+ logger.error(f"Launch error: {str(e)}")
311
  sys.exit(1)