walaa2022 commited on
Commit
c6b42a6
·
verified ·
1 Parent(s): ab0bea5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -194
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
  import json
 
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
6
  import torch
7
 
@@ -9,7 +10,6 @@ class FinancialAnalyzer:
9
  def __init__(self):
10
  print("Initializing Financial Analyzer...")
11
  self.initialize_models()
12
- self.initialize_benchmarks()
13
 
14
  def initialize_models(self):
15
  print("Loading models...")
@@ -20,212 +20,151 @@ class FinancialAnalyzer:
20
  self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
21
  print("Models loaded successfully!")
22
 
23
- def initialize_benchmarks(self):
24
- self.industry_benchmarks = {
25
- "Liquidity": {
26
- "Current Ratio": 2.0,
27
- "Quick Ratio": 1.0
28
- },
29
- "Profitability": {
30
- "Gross Margin": 40.0,
31
- "Operating Margin": 15.0,
32
- "Net Margin": 10.0
33
- },
34
- "Efficiency": {
35
- "Asset Turnover": 2.0,
36
- "Inventory Turnover": 6.0
37
- }
38
- }
39
-
40
- def clean_number(self, value):
41
- """Clean numerical values from files (all in thousands)"""
42
- if isinstance(value, str):
43
- # Remove currency symbols, commas, spaces and handle parentheses
44
- value = value.replace(',', '').replace('$', '').replace(' ', '')
45
- value = value.replace('(', '-').replace(')', '')
46
- try:
47
- return float(value)
48
- except:
49
- return 0.0
50
-
51
- def calculate_metrics(self, balance_sheet_df, income_stmt_df):
52
- """Calculate financial metrics (all values in thousands)"""
53
- metrics = {}
54
- years = [str(year) for year in range(2021, 2026)]
55
 
56
- for year in years:
57
- try:
58
- # Balance Sheet metrics
59
- total_current_assets = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Total_Type'] == 'Total_Current_Assets', year].iloc[0])
60
- total_assets = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Total_Type'] == 'Total_Assets', year].iloc[0])
61
- total_current_liabilities = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Total_Type'] == 'Total_Current_Liabilities', year].iloc[0])
62
- total_liabilities = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Total_Type'] == 'Total_Liabilities', year].iloc[0])
63
- total_equity = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Total_Type'] == 'Total_Shareholders_Equity', year].iloc[0])
64
- inventory = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Account'] == 'Inventory', year].iloc[0])
65
-
66
- # Income Statement metrics
67
- revenue = self.clean_number(income_stmt_df.loc[income_stmt_df.get('Revenue Items') == 'Total Net Revenue', year].iloc[0])
68
- gross_profit = self.clean_number(income_stmt_df.loc[income_stmt_df.get('Item') == 'Gross Profit', year].iloc[0])
69
- operating_expenses = self.clean_number(income_stmt_df.loc[income_stmt_df.get('Expense Category') == 'Total Operating Expenses', year].iloc[0])
70
- ebit = self.clean_number(income_stmt_df.loc[income_stmt_df.get('Item') == 'EBIT', year].iloc[0])
71
- net_earnings = self.clean_number(income_stmt_df.loc[income_stmt_df.get('Item') == 'Net Earnings', year].iloc[0])
72
-
73
- metrics[year] = {
74
- "Liquidity": {
75
- "Current Ratio": round(total_current_assets / total_current_liabilities, 2) if total_current_liabilities != 0 else 0,
76
- "Quick Ratio": round((total_current_assets - inventory) / total_current_liabilities, 2) if total_current_liabilities != 0 else 0
77
- },
78
- "Profitability": {
79
- "Gross Margin": round((gross_profit / revenue * 100), 2) if revenue != 0 else 0,
80
- "Operating Margin": round((ebit / revenue * 100), 2) if revenue != 0 else 0,
81
- "Net Margin": round((net_earnings / revenue * 100), 2) if revenue != 0 else 0,
82
- "ROE": round((net_earnings / total_equity * 100), 2) if total_equity != 0 else 0,
83
- "ROA": round((net_earnings / total_assets * 100), 2) if total_assets != 0 else 0
84
- },
85
- "Efficiency": {
86
- "Asset Turnover": round(revenue / total_assets, 2) if total_assets != 0 else 0,
87
- "Inventory Turnover": round(operating_expenses / inventory, 2) if inventory != 0 else 0
88
- },
89
- "Leverage": {
90
- "Debt to Equity": round(total_liabilities / total_equity, 2) if total_equity != 0 else 0,
91
- "Debt Ratio": round(total_liabilities / total_assets, 2) if total_assets != 0 else 0
92
- },
93
- "Growth": {
94
- "Revenue": None if year == '2021' else
95
- round(((revenue - self.clean_number(income_stmt_df.loc[income_stmt_df.get('Revenue Items') == 'Total Net Revenue', str(int(year)-1)].iloc[0])) /
96
- self.clean_number(income_stmt_df.loc[income_stmt_df.get('Revenue Items') == 'Total Net Revenue', str(int(year)-1)].iloc[0]) * 100), 2)
97
- }
98
- }
99
-
100
- # Add key absolute values (in thousands)
101
- metrics[year]["Key Values"] = {
102
- "Total Assets": total_assets,
103
- "Total Liabilities": total_liabilities,
104
- "Total Equity": total_equity,
105
- "Revenue": revenue,
106
- "Net Earnings": net_earnings
107
- }
108
-
109
- except Exception as e:
110
- print(f"Error calculating metrics for year {year}: {str(e)}")
111
- metrics[year] = "Error in calculation"
112
-
113
- return metrics
114
-
115
- def create_insights_prompt(self, metrics, balance_sheet, income_stmt):
116
- latest_year_metrics = metrics['2025']
117
- return f"""<human>Analyze these financial statements (all values in thousands) and provide detailed insights:
118
-
119
- Key Metrics for Latest Year (2025):
120
- - Current Ratio: {latest_year_metrics['Liquidity']['Current Ratio']}
121
- - Net Margin: {latest_year_metrics['Profitability']['Net Margin']}%
122
- - Revenue: {latest_year_metrics['Key Values']['Revenue']:,.0f}
123
- - Net Earnings: {latest_year_metrics['Key Values']['Net Earnings']:,.0f}
124
-
125
- Balance Sheet Trends:
126
- {balance_sheet[:800]}
127
-
128
- Income Statement Trends:
129
- {income_stmt[:800]}
130
-
131
- Provide specific analysis on:
132
- 1. Financial Health and Stability
133
- 2. Profitability and Efficiency
134
- 3. Growth Trends and Patterns
135
- 4. Risk Factors and Concerns
136
- 5. Strategic Recommendations
137
- 6. Future Outlook</human>"""
138
-
139
- def generate_ai_insights(self, prompt):
140
- inputs = self.tiny_tokenizer(prompt, return_tensors="pt", truncation=True)
141
- outputs = self.tiny_model.generate(
142
- inputs["input_ids"],
143
- max_length=1000,
144
- temperature=0.7,
145
- top_p=0.95,
146
- do_sample=True
147
- )
148
- return self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
149
-
150
- def analyze_sentiment(self, metrics):
151
- latest_metrics = json.dumps(metrics['2025'])
152
- inputs = self.finbert_tokenizer(latest_metrics, return_tensors="pt", truncation=True)
153
- outputs = self.finbert_model(**inputs)
154
- probs = torch.nn.functional.softmax(outputs.logits, dim=1)
155
- sentiment_labels = ['negative', 'neutral', 'positive']
156
 
157
- return {
158
- 'sentiment': sentiment_labels[probs.argmax().item()],
159
- 'confidence': f"{probs.max().item():.2f}"
160
- }
161
-
162
- def generate_roadmap(self, metrics):
163
- latest_metrics = metrics['2025']
164
- roadmap = {
165
- "Immediate Actions (0-6 months)": [],
166
- "Short-term Goals (6-12 months)": [],
167
- "Medium-term Strategy (1-2 years)": [],
168
- "Long-term Vision (3-5 years)": []
169
- }
170
-
171
- # Generate recommendations based on metrics comparison
172
- current_ratio = latest_metrics["Liquidity"]["Current Ratio"]
173
- net_margin = latest_metrics["Profitability"]["Net Margin"]
174
- asset_turnover = latest_metrics["Efficiency"]["Asset Turnover"]
175
-
176
- # Add specific recommendations based on metric analysis
177
- if current_ratio > self.industry_benchmarks["Liquidity"]["Current Ratio"] * 1.5:
178
- roadmap["Short-term Goals (6-12 months)"].append("Consider optimizing excess working capital")
179
- elif current_ratio < self.industry_benchmarks["Liquidity"]["Current Ratio"]:
180
- roadmap["Immediate Actions (0-6 months)"].append("Improve working capital management")
181
-
182
- if net_margin < self.industry_benchmarks["Profitability"]["Net Margin"]:
183
- roadmap["Immediate Actions (0-6 months)"].append("Review cost structure")
184
- roadmap["Short-term Goals (6-12 months)"].append("Implement margin improvement initiatives")
185
-
186
- if asset_turnover < self.industry_benchmarks["Efficiency"]["Asset Turnover"]:
187
- roadmap["Medium-term Strategy (1-2 years)"].append("Optimize asset utilization")
188
- roadmap["Long-term Vision (3-5 years)"].append("Consider strategic asset restructuring")
189
-
190
- return roadmap
 
 
 
 
 
 
 
 
191
 
192
  def analyze_financials(self, balance_sheet_file, income_stmt_file):
 
193
  try:
194
- # Read files
195
- balance_sheet_df = pd.read_csv(balance_sheet_file)
196
- income_stmt_df = pd.read_csv(income_stmt_file)
197
-
198
- # Also read raw content for context
199
  with open(balance_sheet_file, 'r') as f:
200
  balance_sheet_content = f.read()
201
  with open(income_stmt_file, 'r') as f:
202
  income_stmt_content = f.read()
203
 
204
- # Calculate metrics
205
- metrics = self.calculate_metrics(balance_sheet_df, income_stmt_df)
206
-
207
- # Generate insights
208
- insights_prompt = self.create_insights_prompt(metrics, balance_sheet_content, income_stmt_content)
209
- insights = self.generate_ai_insights(insights_prompt)
210
-
211
- # Generate sentiment and roadmap
212
- sentiment = self.analyze_sentiment(metrics)
213
- roadmap = self.generate_roadmap(metrics)
214
-
215
- # Compile analysis
216
- analysis = {
217
- "Financial Metrics": metrics,
218
- "AI Insights": insights,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  "Sentiment Analysis": sentiment,
220
- "Strategic Roadmap": roadmap,
221
  "Analysis Period": "2021-2025",
222
- "Note": "All values in thousands"
223
  }
224
 
225
- return json.dumps(analysis, indent=2)
226
 
227
  except Exception as e:
228
- return f"Error in analysis: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
229
 
230
  def create_interface():
231
  analyzer = FinancialAnalyzer()
@@ -233,12 +172,12 @@ def create_interface():
233
  iface = gr.Interface(
234
  fn=analyzer.analyze_financials,
235
  inputs=[
236
- gr.File(label="Balance Sheet (CSV)", type="filepath"),
237
- gr.File(label="Income Statement (CSV)", type="filepath")
238
  ],
239
  outputs=gr.Textbox(label="Analysis Results", lines=25),
240
  title="Financial Statement Analyzer",
241
- description="Upload financial statements for comprehensive analysis including AI insights, sentiment analysis, and strategic roadmap. (All values in thousands)"
242
  )
243
 
244
  return iface
 
2
  import pandas as pd
3
  import numpy as np
4
  import json
5
+ import re
6
  from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
7
  import torch
8
 
 
10
  def __init__(self):
11
  print("Initializing Financial Analyzer...")
12
  self.initialize_models()
 
13
 
14
  def initialize_models(self):
15
  print("Loading models...")
 
20
  self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
21
  print("Models loaded successfully!")
22
 
23
+ def parse_markdown_table(self, markdown_content):
24
+ """Parse markdown table into pandas DataFrame"""
25
+ # Split content into lines
26
+ lines = markdown_content.strip().split('\n')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ # Find table start (line with |)
29
+ table_lines = []
30
+ headers = None
31
+ current_table = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ for line in lines:
34
+ if '|' in line:
35
+ # Skip separator lines (contains ---)
36
+ if '-|-' in line:
37
+ continue
38
+ # Clean and split the line
39
+ row = [cell.strip() for cell in line.split('|')[1:-1]]
40
+ if headers is None:
41
+ headers = row
42
+ else:
43
+ current_table.append(row)
44
+
45
+ # Create DataFrame
46
+ df = pd.DataFrame(current_table, columns=headers)
47
+ return df
48
+
49
+ def extract_financial_data(self, markdown_content):
50
+ """Convert markdown content to a structured text format"""
51
+ # Remove markdown formatting
52
+ clean_text = markdown_content.replace('#', '').replace('*', '')
53
+
54
+ # Extract tables
55
+ tables = {}
56
+ current_section = "General"
57
+
58
+ for line in clean_text.split('\n'):
59
+ if line.strip() and not line.startswith('|'):
60
+ current_section = line.strip()
61
+ elif '|' in line:
62
+ if current_section not in tables:
63
+ tables[current_section] = []
64
+ tables[current_section].append(line)
65
+
66
+ # Convert to text format
67
+ structured_text = []
68
+ for section, content in tables.items():
69
+ structured_text.append(f"\n{section}:")
70
+ if content:
71
+ df = self.parse_markdown_table('\n'.join(content))
72
+ structured_text.append(df.to_string())
73
+
74
+ return '\n'.join(structured_text)
75
 
76
  def analyze_financials(self, balance_sheet_file, income_stmt_file):
77
+ """Main analysis function"""
78
  try:
79
+ # Read markdown files
 
 
 
 
80
  with open(balance_sheet_file, 'r') as f:
81
  balance_sheet_content = f.read()
82
  with open(income_stmt_file, 'r') as f:
83
  income_stmt_content = f.read()
84
 
85
+ # Convert to structured text
86
+ structured_balance = self.extract_financial_data(balance_sheet_content)
87
+ structured_income = self.extract_financial_data(income_stmt_content)
88
+
89
+ # Create analysis prompt
90
+ prompt = f"""<human>Please analyze these financial statements and provide detailed insights:
91
+
92
+ Financial Statements Analysis (2021-2025)
93
+
94
+ Balance Sheet Summary:
95
+ {structured_balance}
96
+
97
+ Income Statement Summary:
98
+ {structured_income}
99
+
100
+ Please provide a detailed analysis including:
101
+ 1. Financial Health Assessment
102
+ - Liquidity position
103
+ - Capital structure
104
+ - Asset efficiency
105
+
106
+ 2. Profitability Analysis
107
+ - Revenue trends
108
+ - Cost management
109
+ - Profit margins
110
+
111
+ 3. Growth Analysis
112
+ - Year-over-year growth rates
113
+ - Market position
114
+ - Future growth potential
115
+
116
+ 4. Risk Assessment
117
+ - Operating risks
118
+ - Financial risks
119
+ - Strategic risks
120
+
121
+ 5. Recommendations
122
+ - Short-term actions
123
+ - Medium-term strategy
124
+ - Long-term planning
125
+
126
+ 6. Future Outlook
127
+ - Market conditions
128
+ - Company positioning
129
+ - Growth opportunities</human>"""
130
+
131
+ # Generate AI analysis
132
+ inputs = self.tiny_tokenizer(prompt, return_tensors="pt", truncation=True)
133
+ outputs = self.tiny_model.generate(
134
+ inputs["input_ids"],
135
+ max_length=2048,
136
+ temperature=0.7,
137
+ top_p=0.95,
138
+ do_sample=True
139
+ )
140
+ analysis = self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
141
+
142
+ # Generate sentiment
143
+ sentiment = self.analyze_sentiment(structured_balance + structured_income)
144
+
145
+ # Compile results
146
+ results = {
147
+ "Financial Analysis": analysis,
148
  "Sentiment Analysis": sentiment,
 
149
  "Analysis Period": "2021-2025",
150
+ "Note": "All values in millions ($M)"
151
  }
152
 
153
+ return json.dumps(results, indent=2)
154
 
155
  except Exception as e:
156
+ return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"
157
+
158
+ def analyze_sentiment(self, text):
159
+ inputs = self.finbert_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
160
+ outputs = self.finbert_model(**inputs)
161
+ probs = torch.nn.functional.softmax(outputs.logits, dim=1)
162
+ sentiment_labels = ['negative', 'neutral', 'positive']
163
+
164
+ return {
165
+ 'sentiment': sentiment_labels[probs.argmax().item()],
166
+ 'confidence': f"{probs.max().item():.2f}"
167
+ }
168
 
169
  def create_interface():
170
  analyzer = FinancialAnalyzer()
 
172
  iface = gr.Interface(
173
  fn=analyzer.analyze_financials,
174
  inputs=[
175
+ gr.File(label="Balance Sheet (Markdown)", type="filepath"),
176
+ gr.File(label="Income Statement (Markdown)", type="filepath")
177
  ],
178
  outputs=gr.Textbox(label="Analysis Results", lines=25),
179
  title="Financial Statement Analyzer",
180
+ description="Upload financial statements in Markdown format for comprehensive AI-powered analysis."
181
  )
182
 
183
  return iface