Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
import json
|
|
|
5 |
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
|
6 |
import torch
|
7 |
|
@@ -9,7 +10,6 @@ class FinancialAnalyzer:
|
|
9 |
def __init__(self):
|
10 |
print("Initializing Financial Analyzer...")
|
11 |
self.initialize_models()
|
12 |
-
self.initialize_benchmarks()
|
13 |
|
14 |
def initialize_models(self):
|
15 |
print("Loading models...")
|
@@ -20,212 +20,151 @@ class FinancialAnalyzer:
|
|
20 |
self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
|
21 |
print("Models loaded successfully!")
|
22 |
|
23 |
-
def
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
"Quick Ratio": 1.0
|
28 |
-
},
|
29 |
-
"Profitability": {
|
30 |
-
"Gross Margin": 40.0,
|
31 |
-
"Operating Margin": 15.0,
|
32 |
-
"Net Margin": 10.0
|
33 |
-
},
|
34 |
-
"Efficiency": {
|
35 |
-
"Asset Turnover": 2.0,
|
36 |
-
"Inventory Turnover": 6.0
|
37 |
-
}
|
38 |
-
}
|
39 |
-
|
40 |
-
def clean_number(self, value):
|
41 |
-
"""Clean numerical values from files (all in thousands)"""
|
42 |
-
if isinstance(value, str):
|
43 |
-
# Remove currency symbols, commas, spaces and handle parentheses
|
44 |
-
value = value.replace(',', '').replace('$', '').replace(' ', '')
|
45 |
-
value = value.replace('(', '-').replace(')', '')
|
46 |
-
try:
|
47 |
-
return float(value)
|
48 |
-
except:
|
49 |
-
return 0.0
|
50 |
-
|
51 |
-
def calculate_metrics(self, balance_sheet_df, income_stmt_df):
|
52 |
-
"""Calculate financial metrics (all values in thousands)"""
|
53 |
-
metrics = {}
|
54 |
-
years = [str(year) for year in range(2021, 2026)]
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
total_assets = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Total_Type'] == 'Total_Assets', year].iloc[0])
|
61 |
-
total_current_liabilities = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Total_Type'] == 'Total_Current_Liabilities', year].iloc[0])
|
62 |
-
total_liabilities = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Total_Type'] == 'Total_Liabilities', year].iloc[0])
|
63 |
-
total_equity = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Total_Type'] == 'Total_Shareholders_Equity', year].iloc[0])
|
64 |
-
inventory = self.clean_number(balance_sheet_df.loc[balance_sheet_df['Account'] == 'Inventory', year].iloc[0])
|
65 |
-
|
66 |
-
# Income Statement metrics
|
67 |
-
revenue = self.clean_number(income_stmt_df.loc[income_stmt_df.get('Revenue Items') == 'Total Net Revenue', year].iloc[0])
|
68 |
-
gross_profit = self.clean_number(income_stmt_df.loc[income_stmt_df.get('Item') == 'Gross Profit', year].iloc[0])
|
69 |
-
operating_expenses = self.clean_number(income_stmt_df.loc[income_stmt_df.get('Expense Category') == 'Total Operating Expenses', year].iloc[0])
|
70 |
-
ebit = self.clean_number(income_stmt_df.loc[income_stmt_df.get('Item') == 'EBIT', year].iloc[0])
|
71 |
-
net_earnings = self.clean_number(income_stmt_df.loc[income_stmt_df.get('Item') == 'Net Earnings', year].iloc[0])
|
72 |
-
|
73 |
-
metrics[year] = {
|
74 |
-
"Liquidity": {
|
75 |
-
"Current Ratio": round(total_current_assets / total_current_liabilities, 2) if total_current_liabilities != 0 else 0,
|
76 |
-
"Quick Ratio": round((total_current_assets - inventory) / total_current_liabilities, 2) if total_current_liabilities != 0 else 0
|
77 |
-
},
|
78 |
-
"Profitability": {
|
79 |
-
"Gross Margin": round((gross_profit / revenue * 100), 2) if revenue != 0 else 0,
|
80 |
-
"Operating Margin": round((ebit / revenue * 100), 2) if revenue != 0 else 0,
|
81 |
-
"Net Margin": round((net_earnings / revenue * 100), 2) if revenue != 0 else 0,
|
82 |
-
"ROE": round((net_earnings / total_equity * 100), 2) if total_equity != 0 else 0,
|
83 |
-
"ROA": round((net_earnings / total_assets * 100), 2) if total_assets != 0 else 0
|
84 |
-
},
|
85 |
-
"Efficiency": {
|
86 |
-
"Asset Turnover": round(revenue / total_assets, 2) if total_assets != 0 else 0,
|
87 |
-
"Inventory Turnover": round(operating_expenses / inventory, 2) if inventory != 0 else 0
|
88 |
-
},
|
89 |
-
"Leverage": {
|
90 |
-
"Debt to Equity": round(total_liabilities / total_equity, 2) if total_equity != 0 else 0,
|
91 |
-
"Debt Ratio": round(total_liabilities / total_assets, 2) if total_assets != 0 else 0
|
92 |
-
},
|
93 |
-
"Growth": {
|
94 |
-
"Revenue": None if year == '2021' else
|
95 |
-
round(((revenue - self.clean_number(income_stmt_df.loc[income_stmt_df.get('Revenue Items') == 'Total Net Revenue', str(int(year)-1)].iloc[0])) /
|
96 |
-
self.clean_number(income_stmt_df.loc[income_stmt_df.get('Revenue Items') == 'Total Net Revenue', str(int(year)-1)].iloc[0]) * 100), 2)
|
97 |
-
}
|
98 |
-
}
|
99 |
-
|
100 |
-
# Add key absolute values (in thousands)
|
101 |
-
metrics[year]["Key Values"] = {
|
102 |
-
"Total Assets": total_assets,
|
103 |
-
"Total Liabilities": total_liabilities,
|
104 |
-
"Total Equity": total_equity,
|
105 |
-
"Revenue": revenue,
|
106 |
-
"Net Earnings": net_earnings
|
107 |
-
}
|
108 |
-
|
109 |
-
except Exception as e:
|
110 |
-
print(f"Error calculating metrics for year {year}: {str(e)}")
|
111 |
-
metrics[year] = "Error in calculation"
|
112 |
-
|
113 |
-
return metrics
|
114 |
-
|
115 |
-
def create_insights_prompt(self, metrics, balance_sheet, income_stmt):
|
116 |
-
latest_year_metrics = metrics['2025']
|
117 |
-
return f"""<human>Analyze these financial statements (all values in thousands) and provide detailed insights:
|
118 |
-
|
119 |
-
Key Metrics for Latest Year (2025):
|
120 |
-
- Current Ratio: {latest_year_metrics['Liquidity']['Current Ratio']}
|
121 |
-
- Net Margin: {latest_year_metrics['Profitability']['Net Margin']}%
|
122 |
-
- Revenue: {latest_year_metrics['Key Values']['Revenue']:,.0f}
|
123 |
-
- Net Earnings: {latest_year_metrics['Key Values']['Net Earnings']:,.0f}
|
124 |
-
|
125 |
-
Balance Sheet Trends:
|
126 |
-
{balance_sheet[:800]}
|
127 |
-
|
128 |
-
Income Statement Trends:
|
129 |
-
{income_stmt[:800]}
|
130 |
-
|
131 |
-
Provide specific analysis on:
|
132 |
-
1. Financial Health and Stability
|
133 |
-
2. Profitability and Efficiency
|
134 |
-
3. Growth Trends and Patterns
|
135 |
-
4. Risk Factors and Concerns
|
136 |
-
5. Strategic Recommendations
|
137 |
-
6. Future Outlook</human>"""
|
138 |
-
|
139 |
-
def generate_ai_insights(self, prompt):
|
140 |
-
inputs = self.tiny_tokenizer(prompt, return_tensors="pt", truncation=True)
|
141 |
-
outputs = self.tiny_model.generate(
|
142 |
-
inputs["input_ids"],
|
143 |
-
max_length=1000,
|
144 |
-
temperature=0.7,
|
145 |
-
top_p=0.95,
|
146 |
-
do_sample=True
|
147 |
-
)
|
148 |
-
return self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
149 |
-
|
150 |
-
def analyze_sentiment(self, metrics):
|
151 |
-
latest_metrics = json.dumps(metrics['2025'])
|
152 |
-
inputs = self.finbert_tokenizer(latest_metrics, return_tensors="pt", truncation=True)
|
153 |
-
outputs = self.finbert_model(**inputs)
|
154 |
-
probs = torch.nn.functional.softmax(outputs.logits, dim=1)
|
155 |
-
sentiment_labels = ['negative', 'neutral', 'positive']
|
156 |
|
157 |
-
|
158 |
-
'
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
|
192 |
def analyze_financials(self, balance_sheet_file, income_stmt_file):
|
|
|
193 |
try:
|
194 |
-
# Read files
|
195 |
-
balance_sheet_df = pd.read_csv(balance_sheet_file)
|
196 |
-
income_stmt_df = pd.read_csv(income_stmt_file)
|
197 |
-
|
198 |
-
# Also read raw content for context
|
199 |
with open(balance_sheet_file, 'r') as f:
|
200 |
balance_sheet_content = f.read()
|
201 |
with open(income_stmt_file, 'r') as f:
|
202 |
income_stmt_content = f.read()
|
203 |
|
204 |
-
#
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
"Sentiment Analysis": sentiment,
|
220 |
-
"Strategic Roadmap": roadmap,
|
221 |
"Analysis Period": "2021-2025",
|
222 |
-
"Note": "All values in
|
223 |
}
|
224 |
|
225 |
-
return json.dumps(
|
226 |
|
227 |
except Exception as e:
|
228 |
-
return f"Error in analysis: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
|
230 |
def create_interface():
|
231 |
analyzer = FinancialAnalyzer()
|
@@ -233,12 +172,12 @@ def create_interface():
|
|
233 |
iface = gr.Interface(
|
234 |
fn=analyzer.analyze_financials,
|
235 |
inputs=[
|
236 |
-
gr.File(label="Balance Sheet (
|
237 |
-
gr.File(label="Income Statement (
|
238 |
],
|
239 |
outputs=gr.Textbox(label="Analysis Results", lines=25),
|
240 |
title="Financial Statement Analyzer",
|
241 |
-
description="Upload financial statements for comprehensive
|
242 |
)
|
243 |
|
244 |
return iface
|
|
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
import json
|
5 |
+
import re
|
6 |
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
|
7 |
import torch
|
8 |
|
|
|
10 |
def __init__(self):
|
11 |
print("Initializing Financial Analyzer...")
|
12 |
self.initialize_models()
|
|
|
13 |
|
14 |
def initialize_models(self):
|
15 |
print("Loading models...")
|
|
|
20 |
self.finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
|
21 |
print("Models loaded successfully!")
|
22 |
|
23 |
+
def parse_markdown_table(self, markdown_content):
|
24 |
+
"""Parse markdown table into pandas DataFrame"""
|
25 |
+
# Split content into lines
|
26 |
+
lines = markdown_content.strip().split('\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
+
# Find table start (line with |)
|
29 |
+
table_lines = []
|
30 |
+
headers = None
|
31 |
+
current_table = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
+
for line in lines:
|
34 |
+
if '|' in line:
|
35 |
+
# Skip separator lines (contains ---)
|
36 |
+
if '-|-' in line:
|
37 |
+
continue
|
38 |
+
# Clean and split the line
|
39 |
+
row = [cell.strip() for cell in line.split('|')[1:-1]]
|
40 |
+
if headers is None:
|
41 |
+
headers = row
|
42 |
+
else:
|
43 |
+
current_table.append(row)
|
44 |
+
|
45 |
+
# Create DataFrame
|
46 |
+
df = pd.DataFrame(current_table, columns=headers)
|
47 |
+
return df
|
48 |
+
|
49 |
+
def extract_financial_data(self, markdown_content):
|
50 |
+
"""Convert markdown content to a structured text format"""
|
51 |
+
# Remove markdown formatting
|
52 |
+
clean_text = markdown_content.replace('#', '').replace('*', '')
|
53 |
+
|
54 |
+
# Extract tables
|
55 |
+
tables = {}
|
56 |
+
current_section = "General"
|
57 |
+
|
58 |
+
for line in clean_text.split('\n'):
|
59 |
+
if line.strip() and not line.startswith('|'):
|
60 |
+
current_section = line.strip()
|
61 |
+
elif '|' in line:
|
62 |
+
if current_section not in tables:
|
63 |
+
tables[current_section] = []
|
64 |
+
tables[current_section].append(line)
|
65 |
+
|
66 |
+
# Convert to text format
|
67 |
+
structured_text = []
|
68 |
+
for section, content in tables.items():
|
69 |
+
structured_text.append(f"\n{section}:")
|
70 |
+
if content:
|
71 |
+
df = self.parse_markdown_table('\n'.join(content))
|
72 |
+
structured_text.append(df.to_string())
|
73 |
+
|
74 |
+
return '\n'.join(structured_text)
|
75 |
|
76 |
def analyze_financials(self, balance_sheet_file, income_stmt_file):
|
77 |
+
"""Main analysis function"""
|
78 |
try:
|
79 |
+
# Read markdown files
|
|
|
|
|
|
|
|
|
80 |
with open(balance_sheet_file, 'r') as f:
|
81 |
balance_sheet_content = f.read()
|
82 |
with open(income_stmt_file, 'r') as f:
|
83 |
income_stmt_content = f.read()
|
84 |
|
85 |
+
# Convert to structured text
|
86 |
+
structured_balance = self.extract_financial_data(balance_sheet_content)
|
87 |
+
structured_income = self.extract_financial_data(income_stmt_content)
|
88 |
+
|
89 |
+
# Create analysis prompt
|
90 |
+
prompt = f"""<human>Please analyze these financial statements and provide detailed insights:
|
91 |
+
|
92 |
+
Financial Statements Analysis (2021-2025)
|
93 |
+
|
94 |
+
Balance Sheet Summary:
|
95 |
+
{structured_balance}
|
96 |
+
|
97 |
+
Income Statement Summary:
|
98 |
+
{structured_income}
|
99 |
+
|
100 |
+
Please provide a detailed analysis including:
|
101 |
+
1. Financial Health Assessment
|
102 |
+
- Liquidity position
|
103 |
+
- Capital structure
|
104 |
+
- Asset efficiency
|
105 |
+
|
106 |
+
2. Profitability Analysis
|
107 |
+
- Revenue trends
|
108 |
+
- Cost management
|
109 |
+
- Profit margins
|
110 |
+
|
111 |
+
3. Growth Analysis
|
112 |
+
- Year-over-year growth rates
|
113 |
+
- Market position
|
114 |
+
- Future growth potential
|
115 |
+
|
116 |
+
4. Risk Assessment
|
117 |
+
- Operating risks
|
118 |
+
- Financial risks
|
119 |
+
- Strategic risks
|
120 |
+
|
121 |
+
5. Recommendations
|
122 |
+
- Short-term actions
|
123 |
+
- Medium-term strategy
|
124 |
+
- Long-term planning
|
125 |
+
|
126 |
+
6. Future Outlook
|
127 |
+
- Market conditions
|
128 |
+
- Company positioning
|
129 |
+
- Growth opportunities</human>"""
|
130 |
+
|
131 |
+
# Generate AI analysis
|
132 |
+
inputs = self.tiny_tokenizer(prompt, return_tensors="pt", truncation=True)
|
133 |
+
outputs = self.tiny_model.generate(
|
134 |
+
inputs["input_ids"],
|
135 |
+
max_length=2048,
|
136 |
+
temperature=0.7,
|
137 |
+
top_p=0.95,
|
138 |
+
do_sample=True
|
139 |
+
)
|
140 |
+
analysis = self.tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
141 |
+
|
142 |
+
# Generate sentiment
|
143 |
+
sentiment = self.analyze_sentiment(structured_balance + structured_income)
|
144 |
+
|
145 |
+
# Compile results
|
146 |
+
results = {
|
147 |
+
"Financial Analysis": analysis,
|
148 |
"Sentiment Analysis": sentiment,
|
|
|
149 |
"Analysis Period": "2021-2025",
|
150 |
+
"Note": "All values in millions ($M)"
|
151 |
}
|
152 |
|
153 |
+
return json.dumps(results, indent=2)
|
154 |
|
155 |
except Exception as e:
|
156 |
+
return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"
|
157 |
+
|
158 |
+
def analyze_sentiment(self, text):
|
159 |
+
inputs = self.finbert_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
|
160 |
+
outputs = self.finbert_model(**inputs)
|
161 |
+
probs = torch.nn.functional.softmax(outputs.logits, dim=1)
|
162 |
+
sentiment_labels = ['negative', 'neutral', 'positive']
|
163 |
+
|
164 |
+
return {
|
165 |
+
'sentiment': sentiment_labels[probs.argmax().item()],
|
166 |
+
'confidence': f"{probs.max().item():.2f}"
|
167 |
+
}
|
168 |
|
169 |
def create_interface():
|
170 |
analyzer = FinancialAnalyzer()
|
|
|
172 |
iface = gr.Interface(
|
173 |
fn=analyzer.analyze_financials,
|
174 |
inputs=[
|
175 |
+
gr.File(label="Balance Sheet (Markdown)", type="filepath"),
|
176 |
+
gr.File(label="Income Statement (Markdown)", type="filepath")
|
177 |
],
|
178 |
outputs=gr.Textbox(label="Analysis Results", lines=25),
|
179 |
title="Financial Statement Analyzer",
|
180 |
+
description="Upload financial statements in Markdown format for comprehensive AI-powered analysis."
|
181 |
)
|
182 |
|
183 |
return iface
|