Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -23,11 +23,32 @@ def clear_gpu_memory():
|
|
23 |
torch.cuda.empty_cache()
|
24 |
gc.collect()
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
class FinancialAnalyzer:
|
27 |
"""Simplified Financial Analyzer using small models"""
|
28 |
|
29 |
def __init__(self):
|
30 |
-
# Initialize with two small models
|
31 |
self.sentiment_model = None
|
32 |
self.analysis_model = None
|
33 |
self.load_models()
|
@@ -39,7 +60,8 @@ class FinancialAnalyzer:
|
|
39 |
self.sentiment_model = pipeline(
|
40 |
"text-classification",
|
41 |
model="ProsusAI/finbert",
|
42 |
-
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
|
|
43 |
)
|
44 |
|
45 |
# Load small model for analysis and recommendations
|
@@ -60,27 +82,41 @@ class FinancialAnalyzer:
|
|
60 |
if file_obj is None:
|
61 |
raise ValueError("No file provided")
|
62 |
|
63 |
-
|
|
|
64 |
|
65 |
if df.empty:
|
66 |
raise ValueError("Empty CSV file")
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
# Get numeric columns
|
69 |
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
|
70 |
if len(numeric_cols) == 0:
|
71 |
raise ValueError("No numeric columns found in CSV")
|
72 |
-
|
73 |
-
# Calculate basic KPIs
|
74 |
-
summary = df[numeric_cols].describe()
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
'
|
|
|
|
|
|
|
81 |
}
|
82 |
|
83 |
-
return
|
84 |
|
85 |
except Exception as e:
|
86 |
logger.error(f"Error processing CSV: {str(e)}")
|
@@ -89,49 +125,87 @@ class FinancialAnalyzer:
|
|
89 |
def analyze_financials(self, income_summary, balance_summary):
|
90 |
"""Generate financial analysis and recommendations"""
|
91 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
financial_context = f"""
|
93 |
-
Income Statement
|
|
|
|
|
|
|
|
|
|
|
94 |
{income_summary[0].to_string()}
|
95 |
|
96 |
-
|
97 |
-
{
|
|
|
|
|
98 |
|
99 |
-
Balance
|
100 |
{balance_summary[0].to_string()}
|
101 |
-
|
102 |
-
Key Balance Sheet Indicators:
|
103 |
-
{balance_summary[1]}
|
104 |
"""
|
105 |
|
106 |
# Generate sentiment analysis
|
107 |
-
sentiment = self.sentiment_model(
|
|
|
|
|
|
|
|
|
108 |
|
109 |
# Generate business analysis
|
110 |
-
analysis_prompt = f"""[INST]
|
111 |
-
1. Current Business Status
|
112 |
-
2. Key Business Insights
|
113 |
-
3. Strategic Recommendations and Roadmap
|
114 |
|
115 |
-
Financial Context:
|
116 |
{financial_context}
|
117 |
|
118 |
Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
|
119 |
|
120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
[/INST]"""
|
122 |
|
123 |
response = self.analysis_model(
|
124 |
analysis_prompt,
|
125 |
-
max_length=
|
126 |
-
|
127 |
-
num_return_sequences=1
|
|
|
|
|
128 |
)
|
129 |
|
130 |
return self.format_response(response[0]['generated_text'], sentiment)
|
131 |
|
132 |
except Exception as e:
|
133 |
logger.error(f"Error in analysis: {str(e)}")
|
134 |
-
return "Error generating analysis"
|
135 |
|
136 |
def format_response(self, analysis_text, sentiment):
|
137 |
"""Format the analysis response into structured sections"""
|
@@ -149,7 +223,7 @@ class FinancialAnalyzer:
|
|
149 |
for section in sections:
|
150 |
if "Business Status" in section:
|
151 |
current_section = status
|
152 |
-
elif "Key
|
153 |
current_section = insights
|
154 |
elif "Strategic Recommendations" in section:
|
155 |
current_section = recommendations
|
@@ -160,7 +234,7 @@ class FinancialAnalyzer:
|
|
160 |
output = [
|
161 |
"# Financial Analysis Report\n\n",
|
162 |
f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
|
163 |
-
"##
|
164 |
"".join(f"- {item}\n" for item in status if item),
|
165 |
"\n## Key Business Insights\n",
|
166 |
"".join(f"- {item}\n" for item in insights if item),
|
@@ -177,52 +251,38 @@ class FinancialAnalyzer:
|
|
177 |
def analyze_statements(income_statement, balance_sheet):
|
178 |
"""Main function to analyze financial statements"""
|
179 |
try:
|
180 |
-
|
181 |
-
if income_statement is None or balance_sheet is None:
|
182 |
return "Please upload both Income Statement and Balance Sheet CSV files."
|
183 |
|
184 |
-
#
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
|
|
|
|
|
|
189 |
|
190 |
-
#
|
191 |
analyzer = FinancialAnalyzer()
|
|
|
|
|
192 |
|
193 |
-
# Process statements with better error handling
|
194 |
-
try:
|
195 |
-
income_summary = analyzer.process_csv(income_statement)
|
196 |
-
logger.info("Successfully processed Income Statement")
|
197 |
-
except Exception as e:
|
198 |
-
return f"Error processing Income Statement: {str(e)}\nPlease ensure it's a valid CSV file with numeric data."
|
199 |
-
|
200 |
-
try:
|
201 |
-
balance_summary = analyzer.process_csv(balance_sheet)
|
202 |
-
logger.info("Successfully processed Balance Sheet")
|
203 |
-
except Exception as e:
|
204 |
-
return f"Error processing Balance Sheet: {str(e)}\nPlease ensure it's a valid CSV file with numeric data."
|
205 |
-
|
206 |
-
# Generate analysis
|
207 |
-
logger.info("Generating analysis...")
|
208 |
result = analyzer.analyze_financials(income_summary, balance_summary)
|
209 |
-
|
210 |
clear_gpu_memory()
|
211 |
return result
|
212 |
|
213 |
except Exception as e:
|
214 |
logger.error(f"Analysis error: {str(e)}")
|
215 |
-
return f"""Analysis Error:
|
216 |
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
3. Files follow standard financial statement format"""
|
223 |
-
|
224 |
-
# Create Gradio interface with improved file handling
|
225 |
|
|
|
226 |
iface = gr.Interface(
|
227 |
fn=analyze_statements,
|
228 |
inputs=[
|
@@ -242,8 +302,10 @@ iface = gr.Interface(
|
|
242 |
description="""## Financial Analysis Tool
|
243 |
|
244 |
How to use:
|
245 |
-
1.
|
246 |
-
|
|
|
|
|
247 |
3. Wait for the analysis to complete
|
248 |
|
249 |
The tool will provide:
|
@@ -253,20 +315,19 @@ The tool will provide:
|
|
253 |
|
254 |
Requirements:
|
255 |
- Files must be in CSV format
|
256 |
-
- Must contain numeric data
|
257 |
-
- Standard financial statement format
|
258 |
flagging_mode="never"
|
259 |
)
|
260 |
|
261 |
-
# Launch the interface
|
262 |
if __name__ == "__main__":
|
263 |
try:
|
264 |
-
iface.queue()
|
265 |
iface.launch(
|
266 |
share=False,
|
267 |
server_name="0.0.0.0",
|
268 |
-
server_port=7860
|
269 |
-
show_api=False # Disable API tab for security
|
270 |
)
|
271 |
except Exception as e:
|
272 |
logger.error(f"Launch error: {str(e)}")
|
|
|
23 |
torch.cuda.empty_cache()
|
24 |
gc.collect()
|
25 |
|
26 |
+
def validate_financial_csv(file_obj, file_type):
|
27 |
+
"""Validate financial CSV files"""
|
28 |
+
try:
|
29 |
+
df = pd.read_csv(file_obj)
|
30 |
+
|
31 |
+
# Expected columns based on file type
|
32 |
+
expected_columns = {
|
33 |
+
'income_statement': ['Revenue', 'Expenses', 'Profit'],
|
34 |
+
'balance_sheet': ['Assets', 'Liabilities', 'Equity']
|
35 |
+
}
|
36 |
+
|
37 |
+
# Check for minimum required columns
|
38 |
+
found_columns = set(df.columns)
|
39 |
+
required_columns = set(expected_columns.get(file_type, []))
|
40 |
+
|
41 |
+
if not any(col in found_columns for col in required_columns):
|
42 |
+
return False, f"Missing required columns. Expected at least one of: {required_columns}"
|
43 |
+
|
44 |
+
return True, "Valid CSV file"
|
45 |
+
except Exception as e:
|
46 |
+
return False, f"Invalid CSV file: {str(e)}"
|
47 |
+
|
48 |
class FinancialAnalyzer:
|
49 |
"""Simplified Financial Analyzer using small models"""
|
50 |
|
51 |
def __init__(self):
|
|
|
52 |
self.sentiment_model = None
|
53 |
self.analysis_model = None
|
54 |
self.load_models()
|
|
|
60 |
self.sentiment_model = pipeline(
|
61 |
"text-classification",
|
62 |
model="ProsusAI/finbert",
|
63 |
+
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
64 |
+
truncation=True
|
65 |
)
|
66 |
|
67 |
# Load small model for analysis and recommendations
|
|
|
82 |
if file_obj is None:
|
83 |
raise ValueError("No file provided")
|
84 |
|
85 |
+
# Read CSV with better column handling
|
86 |
+
df = pd.read_csv(file_obj, skipinitialspace=True)
|
87 |
|
88 |
if df.empty:
|
89 |
raise ValueError("Empty CSV file")
|
90 |
|
91 |
+
# Clean column names
|
92 |
+
df.columns = df.columns.str.strip()
|
93 |
+
|
94 |
+
# Log the columns found
|
95 |
+
logger.info(f"Found columns: {df.columns.tolist()}")
|
96 |
+
|
97 |
+
# Remove any unnamed columns
|
98 |
+
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
|
99 |
+
|
100 |
+
# Convert columns to numeric where possible
|
101 |
+
for col in df.columns:
|
102 |
+
df[col] = pd.to_numeric(df[col].str.replace('[$,()]', '', regex=True), errors='ignore')
|
103 |
+
|
104 |
# Get numeric columns
|
105 |
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
|
106 |
if len(numeric_cols) == 0:
|
107 |
raise ValueError("No numeric columns found in CSV")
|
|
|
|
|
|
|
108 |
|
109 |
+
logger.info(f"Numeric columns: {numeric_cols.tolist()}")
|
110 |
+
|
111 |
+
# Calculate meaningful KPIs
|
112 |
+
kpis = {
|
113 |
+
'total_revenue': df[numeric_cols].sum().sum(),
|
114 |
+
'average_values': df[numeric_cols].mean(),
|
115 |
+
'year_over_year_growth': df[numeric_cols].pct_change().mean() * 100,
|
116 |
+
'key_metrics': df[numeric_cols].describe()
|
117 |
}
|
118 |
|
119 |
+
return df[numeric_cols].describe(), kpis
|
120 |
|
121 |
except Exception as e:
|
122 |
logger.error(f"Error processing CSV: {str(e)}")
|
|
|
125 |
def analyze_financials(self, income_summary, balance_summary):
|
126 |
"""Generate financial analysis and recommendations"""
|
127 |
try:
|
128 |
+
# Extract meaningful metrics
|
129 |
+
income_metrics = {
|
130 |
+
'Total Revenue': income_summary[1]['total_revenue'],
|
131 |
+
'Average Values': income_summary[1]['average_values'].mean(),
|
132 |
+
'Growth Rate': income_summary[1]['year_over_year_growth'].mean()
|
133 |
+
}
|
134 |
+
|
135 |
+
balance_metrics = {
|
136 |
+
'Total Assets': balance_summary[1]['total_revenue'],
|
137 |
+
'Average Values': balance_summary[1]['average_values'].mean(),
|
138 |
+
'Growth Rate': balance_summary[1]['year_over_year_growth'].mean()
|
139 |
+
}
|
140 |
+
|
141 |
financial_context = f"""
|
142 |
+
Income Statement Analysis:
|
143 |
+
- Total Revenue: ${income_metrics['Total Revenue']:,.2f}
|
144 |
+
- Average Revenue: ${income_metrics['Average Values']:,.2f}
|
145 |
+
- Growth Rate: {income_metrics['Growth Rate']:.2f}%
|
146 |
+
|
147 |
+
Detailed Income Metrics:
|
148 |
{income_summary[0].to_string()}
|
149 |
|
150 |
+
Balance Sheet Analysis:
|
151 |
+
- Total Assets: ${balance_metrics['Total Assets']:,.2f}
|
152 |
+
- Average Assets: ${balance_metrics['Average Values']:,.2f}
|
153 |
+
- Growth Rate: {balance_metrics['Growth Rate']:.2f}%
|
154 |
|
155 |
+
Detailed Balance Metrics:
|
156 |
{balance_summary[0].to_string()}
|
|
|
|
|
|
|
157 |
"""
|
158 |
|
159 |
# Generate sentiment analysis
|
160 |
+
sentiment = self.sentiment_model(
|
161 |
+
financial_context,
|
162 |
+
truncation=True,
|
163 |
+
max_length=512
|
164 |
+
)[0]
|
165 |
|
166 |
# Generate business analysis
|
167 |
+
analysis_prompt = f"""[INST] As a financial analyst, provide a detailed analysis based on these financial metrics:
|
|
|
|
|
|
|
168 |
|
|
|
169 |
{financial_context}
|
170 |
|
171 |
Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
|
172 |
|
173 |
+
Please provide:
|
174 |
+
|
175 |
+
1. Business Status:
|
176 |
+
- Financial health assessment
|
177 |
+
- Growth trajectory
|
178 |
+
- Key performance indicators analysis
|
179 |
+
|
180 |
+
2. Key Insights:
|
181 |
+
- Revenue trends
|
182 |
+
- Asset utilization
|
183 |
+
- Financial efficiency metrics
|
184 |
+
- Areas of concern or opportunity
|
185 |
+
|
186 |
+
3. Strategic Recommendations:
|
187 |
+
- Specific action items based on the metrics
|
188 |
+
- Growth opportunities
|
189 |
+
- Risk mitigation strategies
|
190 |
+
- Timeline-based roadmap
|
191 |
+
|
192 |
+
Be specific and data-driven in your analysis.
|
193 |
[/INST]"""
|
194 |
|
195 |
response = self.analysis_model(
|
196 |
analysis_prompt,
|
197 |
+
max_length=1500,
|
198 |
+
do_sample=False,
|
199 |
+
num_return_sequences=1,
|
200 |
+
truncation=True,
|
201 |
+
pad_token_id=self.analysis_model.tokenizer.eos_token_id
|
202 |
)
|
203 |
|
204 |
return self.format_response(response[0]['generated_text'], sentiment)
|
205 |
|
206 |
except Exception as e:
|
207 |
logger.error(f"Error in analysis: {str(e)}")
|
208 |
+
return f"Error generating analysis: {str(e)}"
|
209 |
|
210 |
def format_response(self, analysis_text, sentiment):
|
211 |
"""Format the analysis response into structured sections"""
|
|
|
223 |
for section in sections:
|
224 |
if "Business Status" in section:
|
225 |
current_section = status
|
226 |
+
elif "Key Insights" in section:
|
227 |
current_section = insights
|
228 |
elif "Strategic Recommendations" in section:
|
229 |
current_section = recommendations
|
|
|
234 |
output = [
|
235 |
"# Financial Analysis Report\n\n",
|
236 |
f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
|
237 |
+
"## Business Status\n",
|
238 |
"".join(f"- {item}\n" for item in status if item),
|
239 |
"\n## Key Business Insights\n",
|
240 |
"".join(f"- {item}\n" for item in insights if item),
|
|
|
251 |
def analyze_statements(income_statement, balance_sheet):
|
252 |
"""Main function to analyze financial statements"""
|
253 |
try:
|
254 |
+
if not income_statement or not balance_sheet:
|
|
|
255 |
return "Please upload both Income Statement and Balance Sheet CSV files."
|
256 |
|
257 |
+
# Validate files
|
258 |
+
income_valid, income_msg = validate_financial_csv(income_statement, 'income_statement')
|
259 |
+
if not income_valid:
|
260 |
+
return f"Invalid Income Statement: {income_msg}"
|
261 |
+
|
262 |
+
balance_valid, balance_msg = validate_financial_csv(balance_sheet, 'balance_sheet')
|
263 |
+
if not balance_valid:
|
264 |
+
return f"Invalid Balance Sheet: {balance_msg}"
|
265 |
|
266 |
+
# Process if valid
|
267 |
analyzer = FinancialAnalyzer()
|
268 |
+
income_summary = analyzer.process_csv(income_statement)
|
269 |
+
balance_summary = analyzer.process_csv(balance_sheet)
|
270 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
result = analyzer.analyze_financials(income_summary, balance_summary)
|
|
|
272 |
clear_gpu_memory()
|
273 |
return result
|
274 |
|
275 |
except Exception as e:
|
276 |
logger.error(f"Analysis error: {str(e)}")
|
277 |
+
return f"""Analysis Error: {str(e)}
|
278 |
|
279 |
+
Please ensure your CSV files:
|
280 |
+
1. Have proper headers (Revenue, Expenses, Profit for Income Statement)
|
281 |
+
2. Contain numeric data
|
282 |
+
3. Follow standard financial statement format
|
283 |
+
4. Are not corrupted"""
|
|
|
|
|
|
|
284 |
|
285 |
+
# Create Gradio interface
|
286 |
iface = gr.Interface(
|
287 |
fn=analyze_statements,
|
288 |
inputs=[
|
|
|
302 |
description="""## Financial Analysis Tool
|
303 |
|
304 |
How to use:
|
305 |
+
1. Prepare your CSV files with proper headers:
|
306 |
+
- Income Statement: Revenue, Expenses, Profit
|
307 |
+
- Balance Sheet: Assets, Liabilities, Equity
|
308 |
+
2. Upload both files using the buttons below
|
309 |
3. Wait for the analysis to complete
|
310 |
|
311 |
The tool will provide:
|
|
|
315 |
|
316 |
Requirements:
|
317 |
- Files must be in CSV format
|
318 |
+
- Must contain numeric data
|
319 |
+
- Standard financial statement format required""",
|
320 |
flagging_mode="never"
|
321 |
)
|
322 |
|
323 |
+
# Launch the interface
|
324 |
if __name__ == "__main__":
|
325 |
try:
|
326 |
+
iface.queue()
|
327 |
iface.launch(
|
328 |
share=False,
|
329 |
server_name="0.0.0.0",
|
330 |
+
server_port=7860
|
|
|
331 |
)
|
332 |
except Exception as e:
|
333 |
logger.error(f"Launch error: {str(e)}")
|