walaa2022 commited on
Commit
0ad8f2f
·
verified ·
1 Parent(s): cd3edfb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +181 -65
app.py CHANGED
@@ -1,24 +1,65 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import json
4
- from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
 
 
 
5
  import torch
 
 
6
  import re
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  class FinancialAnalyzer:
9
  def __init__(self):
10
  print("Initializing Analyzer...")
11
- self.initialize_model()
12
  print("Initialization complete!")
13
 
14
- def initialize_model(self):
15
- """Initialize TinyLlama model"""
16
  try:
17
- self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
18
- self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
19
- self.model.eval()
 
 
 
 
 
 
 
 
20
  except Exception as e:
21
- print(f"Error initializing model: {str(e)}")
22
  raise
23
 
24
  def clean_number(self, value):
@@ -71,7 +112,7 @@ class FinancialAnalyzer:
71
  except Exception as e:
72
  print(f"Error parsing financial data: {str(e)}")
73
  return {}
74
-
75
  def process_table(self, headers, rows):
76
  """Process table data into structured format"""
77
  try:
@@ -131,16 +172,12 @@ class FinancialAnalyzer:
131
  "Gross_Margin": (metrics["Profitability"]["Gross_Profit_2025"] / revenue_2025) * 100,
132
  "Operating_Margin": (metrics["Profitability"]["EBIT_2025"] / revenue_2025) * 100,
133
  "Net_Margin": (metrics["Profitability"]["Net_Earnings_2025"] / revenue_2025) * 100,
134
-
135
  "Current_Ratio": metrics["Balance_Sheet"]["Current_Assets_2025"] / metrics["Balance_Sheet"]["Current_Liabilities_2025"] if metrics["Balance_Sheet"]["Current_Liabilities_2025"] != 0 else 0,
136
  "Quick_Ratio": (metrics["Balance_Sheet"]["Current_Assets_2025"] - metrics["Balance_Sheet"]["Inventory_2025"]) / metrics["Balance_Sheet"]["Current_Liabilities_2025"] if metrics["Balance_Sheet"]["Current_Liabilities_2025"] != 0 else 0,
137
-
138
  "Asset_Turnover": revenue_2025 / metrics["Balance_Sheet"]["Total_Assets_2025"] if metrics["Balance_Sheet"]["Total_Assets_2025"] != 0 else 0,
139
  "Receivables_Turnover": revenue_2025 / metrics["Balance_Sheet"]["Accounts_Receivable_2025"] if metrics["Balance_Sheet"]["Accounts_Receivable_2025"] != 0 else 0,
140
-
141
  "Debt_to_Equity": metrics["Balance_Sheet"]["Total_Liabilities_2025"] / metrics["Balance_Sheet"]["Equity_2025"] if metrics["Balance_Sheet"]["Equity_2025"] != 0 else 0,
142
  "Interest_Coverage": metrics["Profitability"]["EBIT_2025"] / metrics["Cash_Flow"]["Interest_Expense_2025"] if metrics["Cash_Flow"]["Interest_Expense_2025"] != 0 else 0,
143
-
144
  "Revenue_Growth": ((metrics["Revenue"]["2025"] / metrics["Revenue"]["2024"]) - 1) * 100 if metrics["Revenue"]["2024"] != 0 else 0,
145
  "5Year_Revenue_CAGR": ((metrics["Revenue"]["2025"] / metrics["Revenue"]["2021"]) ** (1/4) - 1) * 100 if metrics["Revenue"]["2021"] != 0 else 0
146
  }
@@ -150,55 +187,74 @@ class FinancialAnalyzer:
150
  print(f"Error extracting metrics: {str(e)}")
151
  return {}
152
 
153
- def generate_prompt(self, metrics):
154
- """Create analysis prompt from metrics"""
155
  try:
156
- return f"""<human>
157
- Please provide a comprehensive financial analysis for 2025 with detailed insights on:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
- 1. Revenue and Growth:
160
- - Total Revenue: ${metrics['Revenue']['2025']:,.1f}M
161
- - YoY Growth Rate: {metrics['Ratios'].get('Revenue_Growth', 0):,.1f}%
 
 
162
  - 5-Year CAGR: {metrics['Ratios'].get('5Year_Revenue_CAGR', 0):,.1f}%
163
 
164
- 2. Profitability Analysis:
165
  - Gross Profit: ${metrics['Profitability']['Gross_Profit_2025']:,.1f}M
166
  - EBIT: ${metrics['Profitability']['EBIT_2025']:,.1f}M
167
  - Net Earnings: ${metrics['Profitability']['Net_Earnings_2025']:,.1f}M
168
- - Margin Analysis:
169
- * Gross Margin: {metrics['Ratios'].get('Gross_Margin', 0):,.1f}%
170
- * Operating Margin: {metrics['Ratios'].get('Operating_Margin', 0):,.1f}%
171
- * Net Margin: {metrics['Ratios'].get('Net_Margin', 0):,.1f}%
172
-
173
- 3. Balance Sheet Strength:
174
- - Total Assets: ${metrics['Balance_Sheet']['Total_Assets_2025']:,.1f}M
175
- - Total Liabilities: ${metrics['Balance_Sheet']['Total_Liabilities_2025']:,.1f}M
176
- - Shareholders' Equity: ${metrics['Balance_Sheet']['Equity_2025']:,.1f}M
177
-
178
- 4. Key Financial Ratios:
179
- - Liquidity:
180
- * Current Ratio: {metrics['Ratios'].get('Current_Ratio', 0):,.2f}
181
- * Quick Ratio: {metrics['Ratios'].get('Quick_Ratio', 0):,.2f}
182
- - Efficiency:
183
- * Asset Turnover: {metrics['Ratios'].get('Asset_Turnover', 0):,.2f}
184
- * Receivables Turnover: {metrics['Ratios'].get('Receivables_Turnover', 0):,.2f}
185
- - Solvency:
186
- * Debt-to-Equity: {metrics['Ratios'].get('Debt_to_Equity', 0):,.2f}
187
- * Interest Coverage: {metrics['Ratios'].get('Interest_Coverage', 0):,.2f}
188
-
189
- Please provide:
190
- 1. An assessment of overall financial health and performance trends
191
- 2. Key strengths and potential areas of concern
192
- 3. Analysis of operational efficiency and working capital management
193
- 4. Evaluation of capital structure and debt management
194
- 5. Specific recommendations for:
195
- - Improving operational efficiency
196
- - Optimizing capital structure
197
- - Enhancing shareholder value
198
- - Managing key risks identified
199
-
200
- Include quantitative support for your analysis and recommendations.
201
- </human>"""
202
  except Exception as e:
203
  print(f"Error generating prompt: {str(e)}")
204
  return ""
@@ -206,44 +262,103 @@ Include quantitative support for your analysis and recommendations.
206
  def generate_analysis(self, prompt):
207
  """Generate analysis using TinyLlama"""
208
  try:
209
- inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1500)
210
 
211
- outputs = self.model.generate(
212
  inputs["input_ids"],
213
- max_new_tokens=800,
214
  temperature=0.7,
215
  top_p=0.9,
216
  do_sample=True,
217
- pad_token_id=self.tokenizer.eos_token_id,
218
- no_repeat_ngram_size=3
 
 
219
  )
220
 
221
- analysis = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
222
- analysis = analysis.split("<human>")[-1].strip()
 
 
 
 
 
 
 
 
223
  return analysis
224
  except Exception as e:
225
  return f"Error generating analysis: {str(e)}"
226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  def analyze_financials(self, balance_sheet_file, income_stmt_file):
228
  """Main analysis function"""
229
  try:
 
230
  if not (self.is_valid_markdown(balance_sheet_file) and self.is_valid_markdown(income_stmt_file)):
231
  return "Error: One or both files are invalid or not in Markdown format."
232
 
 
233
  with open(balance_sheet_file, 'r') as f:
234
  balance_sheet = f.read()
235
  with open(income_stmt_file, 'r') as f:
236
  income_stmt = f.read()
237
 
 
238
  income_data = self.parse_financial_data(income_stmt)
239
  balance_data = self.parse_financial_data(balance_sheet)
240
  metrics = self.extract_metrics(income_data, balance_data)
241
- prompt = self.generate_prompt(metrics)
 
 
 
 
 
242
  analysis = self.generate_analysis(prompt)
243
 
 
244
  results = {
245
  "Financial Analysis": {
246
  "Key Metrics": metrics,
 
247
  "AI Insights": analysis,
248
  "Analysis Period": "2021-2025",
249
  "Note": "All monetary values in millions ($M)"
@@ -265,8 +380,9 @@ def create_interface():
265
  gr.File(label="Income Statement (Markdown)", type="filepath")
266
  ],
267
  outputs=gr.Textbox(label="Analysis Results", lines=25),
268
- title="Financial Statement Analyzer",
269
- description="Upload financial statements in Markdown format for AI-powered analysis"
 
270
  )
271
 
272
  return iface
 
1
  import gradio as gr
2
  import pandas as pd
3
  import json
4
+ from transformers import (
5
+ AutoTokenizer,
6
+ AutoModelForCausalLM,
7
+ AutoModelForSequenceClassification,
8
+ TrainingArguments,
9
+ Trainer
10
+ )
11
  import torch
12
+ import numpy as np
13
+ from torch.utils.data import Dataset, DataLoader
14
  import re
15
 
16
+ class FinancialDataset(Dataset):
17
+ def __init__(self, texts, labels, tokenizer, max_length=512):
18
+ self.texts = texts
19
+ self.labels = labels
20
+ self.tokenizer = tokenizer
21
+ self.max_length = max_length
22
+
23
+ def __len__(self):
24
+ return len(self.texts)
25
+
26
+ def __getitem__(self, idx):
27
+ text = str(self.texts[idx])
28
+ inputs = self.tokenizer(
29
+ text,
30
+ truncation=True,
31
+ padding='max_length',
32
+ max_length=self.max_length,
33
+ return_tensors='pt'
34
+ )
35
+ return {
36
+ 'input_ids': inputs['input_ids'].squeeze(),
37
+ 'attention_mask': inputs['attention_mask'].squeeze(),
38
+ 'labels': torch.tensor(self.labels[idx], dtype=torch.long)
39
+ }
40
+
41
  class FinancialAnalyzer:
42
  def __init__(self):
43
  print("Initializing Analyzer...")
44
+ self.initialize_models()
45
  print("Initialization complete!")
46
 
47
+ def initialize_models(self):
48
+ """Initialize both TinyLlama and FinBERT models"""
49
  try:
50
+ # Initialize TinyLlama
51
+ self.llama_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
52
+ self.llama_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
53
+ self.llama_model.eval()
54
+
55
+ # Initialize FinBERT
56
+ self.finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
57
+ self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
58
+ self.finbert_model.eval()
59
+
60
+ print("Models loaded successfully!")
61
  except Exception as e:
62
+ print(f"Error initializing models: {str(e)}")
63
  raise
64
 
65
  def clean_number(self, value):
 
112
  except Exception as e:
113
  print(f"Error parsing financial data: {str(e)}")
114
  return {}
115
+
116
  def process_table(self, headers, rows):
117
  """Process table data into structured format"""
118
  try:
 
172
  "Gross_Margin": (metrics["Profitability"]["Gross_Profit_2025"] / revenue_2025) * 100,
173
  "Operating_Margin": (metrics["Profitability"]["EBIT_2025"] / revenue_2025) * 100,
174
  "Net_Margin": (metrics["Profitability"]["Net_Earnings_2025"] / revenue_2025) * 100,
 
175
  "Current_Ratio": metrics["Balance_Sheet"]["Current_Assets_2025"] / metrics["Balance_Sheet"]["Current_Liabilities_2025"] if metrics["Balance_Sheet"]["Current_Liabilities_2025"] != 0 else 0,
176
  "Quick_Ratio": (metrics["Balance_Sheet"]["Current_Assets_2025"] - metrics["Balance_Sheet"]["Inventory_2025"]) / metrics["Balance_Sheet"]["Current_Liabilities_2025"] if metrics["Balance_Sheet"]["Current_Liabilities_2025"] != 0 else 0,
 
177
  "Asset_Turnover": revenue_2025 / metrics["Balance_Sheet"]["Total_Assets_2025"] if metrics["Balance_Sheet"]["Total_Assets_2025"] != 0 else 0,
178
  "Receivables_Turnover": revenue_2025 / metrics["Balance_Sheet"]["Accounts_Receivable_2025"] if metrics["Balance_Sheet"]["Accounts_Receivable_2025"] != 0 else 0,
 
179
  "Debt_to_Equity": metrics["Balance_Sheet"]["Total_Liabilities_2025"] / metrics["Balance_Sheet"]["Equity_2025"] if metrics["Balance_Sheet"]["Equity_2025"] != 0 else 0,
180
  "Interest_Coverage": metrics["Profitability"]["EBIT_2025"] / metrics["Cash_Flow"]["Interest_Expense_2025"] if metrics["Cash_Flow"]["Interest_Expense_2025"] != 0 else 0,
 
181
  "Revenue_Growth": ((metrics["Revenue"]["2025"] / metrics["Revenue"]["2024"]) - 1) * 100 if metrics["Revenue"]["2024"] != 0 else 0,
182
  "5Year_Revenue_CAGR": ((metrics["Revenue"]["2025"] / metrics["Revenue"]["2021"]) ** (1/4) - 1) * 100 if metrics["Revenue"]["2021"] != 0 else 0
183
  }
 
187
  print(f"Error extracting metrics: {str(e)}")
188
  return {}
189
 
190
+ def get_sentiment_analysis(self, metrics):
191
+ """Get financial sentiment analysis using FinBERT"""
192
  try:
193
+ financial_text = f"""
194
+ Revenue growth: {metrics['Ratios'].get('Revenue_Growth', 0):.2f}%
195
+ Profit margin: {metrics['Ratios'].get('Net_Margin', 0):.2f}%
196
+ Debt to equity: {metrics['Ratios'].get('Debt_to_Equity', 0):.2f}
197
+ Interest coverage: {metrics['Ratios'].get('Interest_Coverage', 0):.2f}
198
+ Current ratio: {metrics['Ratios'].get('Current_Ratio', 0):.2f}
199
+ """
200
+
201
+ inputs = self.finbert_tokenizer(financial_text, return_tensors="pt", padding=True, truncation=True)
202
+ outputs = self.finbert_model(**inputs)
203
+ probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
204
+ sentiment_scores = probabilities.detach().numpy()[0]
205
+
206
+ sentiments = ['negative', 'neutral', 'positive']
207
+ sentiment_dict = dict(zip(sentiments, sentiment_scores))
208
+
209
+ return sentiment_dict
210
+ except Exception as e:
211
+ print(f"Error in sentiment analysis: {str(e)}")
212
+ return {}
213
+
214
+ def generate_prompt(self, metrics, sentiment_dict):
215
+ """Create enhanced analysis prompt with sentiment"""
216
+ try:
217
+ return f"""[INST] As a financial analyst, provide a comprehensive analysis of this company's performance.
218
 
219
+ Financial Metrics (2025):
220
+ ------------------------
221
+ 1. Revenue & Growth:
222
+ - Revenue: ${metrics['Revenue']['2025']:,.1f}M
223
+ - Growth Rate: {metrics['Ratios'].get('Revenue_Growth', 0):,.1f}%
224
  - 5-Year CAGR: {metrics['Ratios'].get('5Year_Revenue_CAGR', 0):,.1f}%
225
 
226
+ 2. Profitability:
227
  - Gross Profit: ${metrics['Profitability']['Gross_Profit_2025']:,.1f}M
228
  - EBIT: ${metrics['Profitability']['EBIT_2025']:,.1f}M
229
  - Net Earnings: ${metrics['Profitability']['Net_Earnings_2025']:,.1f}M
230
+ - Margins:
231
+ * Gross: {metrics['Ratios'].get('Gross_Margin', 0):,.1f}%
232
+ * Operating: {metrics['Ratios'].get('Operating_Margin', 0):,.1f}%
233
+ * Net: {metrics['Ratios'].get('Net_Margin', 0):,.1f}%
234
+
235
+ 3. Financial Position:
236
+ - Assets: ${metrics['Balance_Sheet']['Total_Assets_2025']:,.1f}M
237
+ - Liabilities: ${metrics['Balance_Sheet']['Total_Liabilities_2025']:,.1f}M
238
+ - Equity: ${metrics['Balance_Sheet']['Equity_2025']:,.1f}M
239
+
240
+ 4. Key Ratios:
241
+ - Liquidity: Current Ratio {metrics['Ratios'].get('Current_Ratio', 0):,.2f}x
242
+ - Efficiency: Asset Turnover {metrics['Ratios'].get('Asset_Turnover', 0):,.2f}x
243
+ - Solvency: Debt/Equity {metrics['Ratios'].get('Debt_to_Equity', 0):,.2f}x
244
+ - Coverage: Interest Coverage {metrics['Ratios'].get('Interest_Coverage', 0):,.2f}x
245
+
246
+ Market Sentiment Indicators:
247
+ ---------------------------
248
+ - Positive: {sentiment_dict.get('positive', 0):,.2f}
249
+ - Neutral: {sentiment_dict.get('neutral', 0):,.2f}
250
+ - Negative: {sentiment_dict.get('negative', 0):,.2f}
251
+
252
+ Provide:
253
+ 1. Overall financial health assessment
254
+ 2. Key strengths and concerns
255
+ 3. Operational efficiency analysis
256
+ 4. Recommendations for improvement
257
+ [/INST]"""
 
 
 
 
 
 
258
  except Exception as e:
259
  print(f"Error generating prompt: {str(e)}")
260
  return ""
 
262
  def generate_analysis(self, prompt):
263
  """Generate analysis using TinyLlama"""
264
  try:
265
+ inputs = self.llama_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
266
 
267
+ outputs = self.llama_model.generate(
268
  inputs["input_ids"],
269
+ max_new_tokens=1024,
270
  temperature=0.7,
271
  top_p=0.9,
272
  do_sample=True,
273
+ repetition_penalty=1.2,
274
+ no_repeat_ngram_size=3,
275
+ num_return_sequences=1,
276
+ pad_token_id=self.llama_tokenizer.eos_token_id
277
  )
278
 
279
+ analysis = self.llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
280
+
281
+ # Extract response after instruction
282
+ if "[/INST]" in analysis:
283
+ analysis = analysis.split("[/INST]")[-1].strip()
284
+
285
+ # Validate the analysis
286
+ if len(analysis.split()) < 100:
287
+ return "Error: Generated analysis is too short. Please try again."
288
+
289
  return analysis
290
  except Exception as e:
291
  return f"Error generating analysis: {str(e)}"
292
 
293
+ def fine_tune_models(self, train_texts, train_labels, epochs=3):
294
+ """Fine-tune the models with custom data"""
295
+ try:
296
+ # Prepare dataset
297
+ train_dataset = FinancialDataset(train_texts, train_labels, self.llama_tokenizer)
298
+
299
+ # Training arguments
300
+ training_args = TrainingArguments(
301
+ output_dir="./financial_model_tuned",
302
+ num_train_epochs=epochs,
303
+ per_device_train_batch_size=4,
304
+ logging_dir="./logs",
305
+ logging_steps=10,
306
+ save_steps=50,
307
+ eval_steps=50,
308
+ evaluation_strategy="steps",
309
+ learning_rate=2e-5,
310
+ weight_decay=0.01,
311
+ warmup_steps=500,
312
+ )
313
+
314
+ # Initialize trainer
315
+ trainer = Trainer(
316
+ model=self.llama_model,
317
+ args=training_args,
318
+ train_dataset=train_dataset,
319
+ )
320
+
321
+ # Fine-tune the model
322
+ trainer.train()
323
+
324
+ # Save the fine-tuned model
325
+ self.llama_model.save_pretrained("./financial_model_tuned")
326
+ self.llama_tokenizer.save_pretrained("./financial_model_tuned")
327
+
328
+ print("Fine-tuning completed successfully!")
329
+ except Exception as e:
330
+ print(f"Error in fine-tuning: {str(e)}")
331
+
332
  def analyze_financials(self, balance_sheet_file, income_stmt_file):
333
  """Main analysis function"""
334
  try:
335
+ # Validate input files
336
  if not (self.is_valid_markdown(balance_sheet_file) and self.is_valid_markdown(income_stmt_file)):
337
  return "Error: One or both files are invalid or not in Markdown format."
338
 
339
+ # Read files
340
  with open(balance_sheet_file, 'r') as f:
341
  balance_sheet = f.read()
342
  with open(income_stmt_file, 'r') as f:
343
  income_stmt = f.read()
344
 
345
+ # Process financial data
346
  income_data = self.parse_financial_data(income_stmt)
347
  balance_data = self.parse_financial_data(balance_sheet)
348
  metrics = self.extract_metrics(income_data, balance_data)
349
+
350
+ # Get sentiment analysis
351
+ sentiment_dict = self.get_sentiment_analysis(metrics)
352
+
353
+ # Generate and get analysis
354
+ prompt = self.generate_prompt(metrics, sentiment_dict)
355
  analysis = self.generate_analysis(prompt)
356
 
357
+ # Prepare final results
358
  results = {
359
  "Financial Analysis": {
360
  "Key Metrics": metrics,
361
+ "Market Sentiment": sentiment_dict,
362
  "AI Insights": analysis,
363
  "Analysis Period": "2021-2025",
364
  "Note": "All monetary values in millions ($M)"
 
380
  gr.File(label="Income Statement (Markdown)", type="filepath")
381
  ],
382
  outputs=gr.Textbox(label="Analysis Results", lines=25),
383
+ title="AI Financial Statement Analyzer",
384
+ description="""Upload financial statements in Markdown format for AI-powered analysis.
385
+ The analysis combines LLM-based insights with sentiment analysis."""
386
  )
387
 
388
  return iface