walaa2022 commited on
Commit
195e9d5
·
verified ·
1 Parent(s): f4bbd39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -73
app.py CHANGED
@@ -23,11 +23,32 @@ def clear_gpu_memory():
23
  torch.cuda.empty_cache()
24
  gc.collect()
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  class FinancialAnalyzer:
27
  """Simplified Financial Analyzer using small models"""
28
 
29
  def __init__(self):
30
- # Initialize with two small models
31
  self.sentiment_model = None
32
  self.analysis_model = None
33
  self.load_models()
@@ -39,7 +60,8 @@ class FinancialAnalyzer:
39
  self.sentiment_model = pipeline(
40
  "text-classification",
41
  model="ProsusAI/finbert",
42
- torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
 
43
  )
44
 
45
  # Load small model for analysis and recommendations
@@ -60,27 +82,41 @@ class FinancialAnalyzer:
60
  if file_obj is None:
61
  raise ValueError("No file provided")
62
 
63
- df = pd.read_csv(file_obj)
 
64
 
65
  if df.empty:
66
  raise ValueError("Empty CSV file")
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  # Get numeric columns
69
  numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
70
  if len(numeric_cols) == 0:
71
  raise ValueError("No numeric columns found in CSV")
72
-
73
- # Calculate basic KPIs
74
- summary = df[numeric_cols].describe()
75
 
76
- # Extract key metrics
77
- metrics = {
78
- 'total': df[numeric_cols].sum(),
79
- 'average': df[numeric_cols].mean(),
80
- 'growth': df[numeric_cols].pct_change().mean() * 100
 
 
 
81
  }
82
 
83
- return summary, metrics
84
 
85
  except Exception as e:
86
  logger.error(f"Error processing CSV: {str(e)}")
@@ -89,49 +125,87 @@ class FinancialAnalyzer:
89
  def analyze_financials(self, income_summary, balance_summary):
90
  """Generate financial analysis and recommendations"""
91
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  financial_context = f"""
93
- Income Statement Metrics:
 
 
 
 
 
94
  {income_summary[0].to_string()}
95
 
96
- Key Income Indicators:
97
- {income_summary[1]}
 
 
98
 
99
- Balance Sheet Metrics:
100
  {balance_summary[0].to_string()}
101
-
102
- Key Balance Sheet Indicators:
103
- {balance_summary[1]}
104
  """
105
 
106
  # Generate sentiment analysis
107
- sentiment = self.sentiment_model(financial_context)[0]
 
 
 
 
108
 
109
  # Generate business analysis
110
- analysis_prompt = f"""[INST] Based on the following financial data, provide:
111
- 1. Current Business Status
112
- 2. Key Business Insights
113
- 3. Strategic Recommendations and Roadmap
114
 
115
- Financial Context:
116
  {financial_context}
117
 
118
  Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
119
 
120
- Provide a concise but detailed analysis for each section.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  [/INST]"""
122
 
123
  response = self.analysis_model(
124
  analysis_prompt,
125
- max_length=1000,
126
- temperature=0.7,
127
- num_return_sequences=1
 
 
128
  )
129
 
130
  return self.format_response(response[0]['generated_text'], sentiment)
131
 
132
  except Exception as e:
133
  logger.error(f"Error in analysis: {str(e)}")
134
- return "Error generating analysis"
135
 
136
  def format_response(self, analysis_text, sentiment):
137
  """Format the analysis response into structured sections"""
@@ -149,7 +223,7 @@ class FinancialAnalyzer:
149
  for section in sections:
150
  if "Business Status" in section:
151
  current_section = status
152
- elif "Key Business Insights" in section:
153
  current_section = insights
154
  elif "Strategic Recommendations" in section:
155
  current_section = recommendations
@@ -160,7 +234,7 @@ class FinancialAnalyzer:
160
  output = [
161
  "# Financial Analysis Report\n\n",
162
  f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
163
- "## Current Business Status\n",
164
  "".join(f"- {item}\n" for item in status if item),
165
  "\n## Key Business Insights\n",
166
  "".join(f"- {item}\n" for item in insights if item),
@@ -177,52 +251,38 @@ class FinancialAnalyzer:
177
  def analyze_statements(income_statement, balance_sheet):
178
  """Main function to analyze financial statements"""
179
  try:
180
- # Check if files are uploaded
181
- if income_statement is None or balance_sheet is None:
182
  return "Please upload both Income Statement and Balance Sheet CSV files."
183
 
184
- # Get file names
185
- income_filename = income_statement.name if hasattr(income_statement, 'name') else 'Income Statement'
186
- balance_filename = balance_sheet.name if hasattr(balance_sheet, 'name') else 'Balance Sheet'
187
-
188
- logger.info(f"Processing {income_filename} and {balance_filename}")
 
 
 
189
 
190
- # Initialize analyzer
191
  analyzer = FinancialAnalyzer()
 
 
192
 
193
- # Process statements with better error handling
194
- try:
195
- income_summary = analyzer.process_csv(income_statement)
196
- logger.info("Successfully processed Income Statement")
197
- except Exception as e:
198
- return f"Error processing Income Statement: {str(e)}\nPlease ensure it's a valid CSV file with numeric data."
199
-
200
- try:
201
- balance_summary = analyzer.process_csv(balance_sheet)
202
- logger.info("Successfully processed Balance Sheet")
203
- except Exception as e:
204
- return f"Error processing Balance Sheet: {str(e)}\nPlease ensure it's a valid CSV file with numeric data."
205
-
206
- # Generate analysis
207
- logger.info("Generating analysis...")
208
  result = analyzer.analyze_financials(income_summary, balance_summary)
209
-
210
  clear_gpu_memory()
211
  return result
212
 
213
  except Exception as e:
214
  logger.error(f"Analysis error: {str(e)}")
215
- return f"""Analysis Error:
216
 
217
- {str(e)}
218
-
219
- Please verify:
220
- 1. Files are in CSV format
221
- 2. Files contain numeric data columns
222
- 3. Files follow standard financial statement format"""
223
-
224
- # Create Gradio interface with improved file handling
225
 
 
226
  iface = gr.Interface(
227
  fn=analyze_statements,
228
  inputs=[
@@ -242,8 +302,10 @@ iface = gr.Interface(
242
  description="""## Financial Analysis Tool
243
 
244
  How to use:
245
- 1. Click 'Upload Income Statement' to select your income statement CSV file
246
- 2. Click 'Upload Balance Sheet' to select your balance sheet CSV file
 
 
247
  3. Wait for the analysis to complete
248
 
249
  The tool will provide:
@@ -253,20 +315,19 @@ The tool will provide:
253
 
254
  Requirements:
255
  - Files must be in CSV format
256
- - Must contain numeric data columns
257
- - Standard financial statement format preferred""",
258
  flagging_mode="never"
259
  )
260
 
261
- # Launch the interface with better error handling
262
  if __name__ == "__main__":
263
  try:
264
- iface.queue() # Enable queuing for better file handling
265
  iface.launch(
266
  share=False,
267
  server_name="0.0.0.0",
268
- server_port=7860,
269
- show_api=False # Disable API tab for security
270
  )
271
  except Exception as e:
272
  logger.error(f"Launch error: {str(e)}")
 
23
  torch.cuda.empty_cache()
24
  gc.collect()
25
 
26
+ def validate_financial_csv(file_obj, file_type):
27
+ """Validate financial CSV files"""
28
+ try:
29
+ df = pd.read_csv(file_obj)
30
+
31
+ # Expected columns based on file type
32
+ expected_columns = {
33
+ 'income_statement': ['Revenue', 'Expenses', 'Profit'],
34
+ 'balance_sheet': ['Assets', 'Liabilities', 'Equity']
35
+ }
36
+
37
+ # Check for minimum required columns
38
+ found_columns = set(df.columns)
39
+ required_columns = set(expected_columns.get(file_type, []))
40
+
41
+ if not any(col in found_columns for col in required_columns):
42
+ return False, f"Missing required columns. Expected at least one of: {required_columns}"
43
+
44
+ return True, "Valid CSV file"
45
+ except Exception as e:
46
+ return False, f"Invalid CSV file: {str(e)}"
47
+
48
  class FinancialAnalyzer:
49
  """Simplified Financial Analyzer using small models"""
50
 
51
  def __init__(self):
 
52
  self.sentiment_model = None
53
  self.analysis_model = None
54
  self.load_models()
 
60
  self.sentiment_model = pipeline(
61
  "text-classification",
62
  model="ProsusAI/finbert",
63
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
64
+ truncation=True
65
  )
66
 
67
  # Load small model for analysis and recommendations
 
82
  if file_obj is None:
83
  raise ValueError("No file provided")
84
 
85
+ # Read CSV with better column handling
86
+ df = pd.read_csv(file_obj, skipinitialspace=True)
87
 
88
  if df.empty:
89
  raise ValueError("Empty CSV file")
90
 
91
+ # Clean column names
92
+ df.columns = df.columns.str.strip()
93
+
94
+ # Log the columns found
95
+ logger.info(f"Found columns: {df.columns.tolist()}")
96
+
97
+ # Remove any unnamed columns
98
+ df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
99
+
100
+ # Convert columns to numeric where possible
101
+ for col in df.columns:
102
+ df[col] = pd.to_numeric(df[col].str.replace('[$,()]', '', regex=True), errors='ignore')
103
+
104
  # Get numeric columns
105
  numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
106
  if len(numeric_cols) == 0:
107
  raise ValueError("No numeric columns found in CSV")
 
 
 
108
 
109
+ logger.info(f"Numeric columns: {numeric_cols.tolist()}")
110
+
111
+ # Calculate meaningful KPIs
112
+ kpis = {
113
+ 'total_revenue': df[numeric_cols].sum().sum(),
114
+ 'average_values': df[numeric_cols].mean(),
115
+ 'year_over_year_growth': df[numeric_cols].pct_change().mean() * 100,
116
+ 'key_metrics': df[numeric_cols].describe()
117
  }
118
 
119
+ return df[numeric_cols].describe(), kpis
120
 
121
  except Exception as e:
122
  logger.error(f"Error processing CSV: {str(e)}")
 
125
  def analyze_financials(self, income_summary, balance_summary):
126
  """Generate financial analysis and recommendations"""
127
  try:
128
+ # Extract meaningful metrics
129
+ income_metrics = {
130
+ 'Total Revenue': income_summary[1]['total_revenue'],
131
+ 'Average Values': income_summary[1]['average_values'].mean(),
132
+ 'Growth Rate': income_summary[1]['year_over_year_growth'].mean()
133
+ }
134
+
135
+ balance_metrics = {
136
+ 'Total Assets': balance_summary[1]['total_revenue'],
137
+ 'Average Values': balance_summary[1]['average_values'].mean(),
138
+ 'Growth Rate': balance_summary[1]['year_over_year_growth'].mean()
139
+ }
140
+
141
  financial_context = f"""
142
+ Income Statement Analysis:
143
+ - Total Revenue: ${income_metrics['Total Revenue']:,.2f}
144
+ - Average Revenue: ${income_metrics['Average Values']:,.2f}
145
+ - Growth Rate: {income_metrics['Growth Rate']:.2f}%
146
+
147
+ Detailed Income Metrics:
148
  {income_summary[0].to_string()}
149
 
150
+ Balance Sheet Analysis:
151
+ - Total Assets: ${balance_metrics['Total Assets']:,.2f}
152
+ - Average Assets: ${balance_metrics['Average Values']:,.2f}
153
+ - Growth Rate: {balance_metrics['Growth Rate']:.2f}%
154
 
155
+ Detailed Balance Metrics:
156
  {balance_summary[0].to_string()}
 
 
 
157
  """
158
 
159
  # Generate sentiment analysis
160
+ sentiment = self.sentiment_model(
161
+ financial_context,
162
+ truncation=True,
163
+ max_length=512
164
+ )[0]
165
 
166
  # Generate business analysis
167
+ analysis_prompt = f"""[INST] As a financial analyst, provide a detailed analysis based on these financial metrics:
 
 
 
168
 
 
169
  {financial_context}
170
 
171
  Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
172
 
173
+ Please provide:
174
+
175
+ 1. Business Status:
176
+ - Financial health assessment
177
+ - Growth trajectory
178
+ - Key performance indicators analysis
179
+
180
+ 2. Key Insights:
181
+ - Revenue trends
182
+ - Asset utilization
183
+ - Financial efficiency metrics
184
+ - Areas of concern or opportunity
185
+
186
+ 3. Strategic Recommendations:
187
+ - Specific action items based on the metrics
188
+ - Growth opportunities
189
+ - Risk mitigation strategies
190
+ - Timeline-based roadmap
191
+
192
+ Be specific and data-driven in your analysis.
193
  [/INST]"""
194
 
195
  response = self.analysis_model(
196
  analysis_prompt,
197
+ max_length=1500,
198
+ do_sample=False,
199
+ num_return_sequences=1,
200
+ truncation=True,
201
+ pad_token_id=self.analysis_model.tokenizer.eos_token_id
202
  )
203
 
204
  return self.format_response(response[0]['generated_text'], sentiment)
205
 
206
  except Exception as e:
207
  logger.error(f"Error in analysis: {str(e)}")
208
+ return f"Error generating analysis: {str(e)}"
209
 
210
  def format_response(self, analysis_text, sentiment):
211
  """Format the analysis response into structured sections"""
 
223
  for section in sections:
224
  if "Business Status" in section:
225
  current_section = status
226
+ elif "Key Insights" in section:
227
  current_section = insights
228
  elif "Strategic Recommendations" in section:
229
  current_section = recommendations
 
234
  output = [
235
  "# Financial Analysis Report\n\n",
236
  f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
237
+ "## Business Status\n",
238
  "".join(f"- {item}\n" for item in status if item),
239
  "\n## Key Business Insights\n",
240
  "".join(f"- {item}\n" for item in insights if item),
 
251
  def analyze_statements(income_statement, balance_sheet):
252
  """Main function to analyze financial statements"""
253
  try:
254
+ if not income_statement or not balance_sheet:
 
255
  return "Please upload both Income Statement and Balance Sheet CSV files."
256
 
257
+ # Validate files
258
+ income_valid, income_msg = validate_financial_csv(income_statement, 'income_statement')
259
+ if not income_valid:
260
+ return f"Invalid Income Statement: {income_msg}"
261
+
262
+ balance_valid, balance_msg = validate_financial_csv(balance_sheet, 'balance_sheet')
263
+ if not balance_valid:
264
+ return f"Invalid Balance Sheet: {balance_msg}"
265
 
266
+ # Process if valid
267
  analyzer = FinancialAnalyzer()
268
+ income_summary = analyzer.process_csv(income_statement)
269
+ balance_summary = analyzer.process_csv(balance_sheet)
270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  result = analyzer.analyze_financials(income_summary, balance_summary)
 
272
  clear_gpu_memory()
273
  return result
274
 
275
  except Exception as e:
276
  logger.error(f"Analysis error: {str(e)}")
277
+ return f"""Analysis Error: {str(e)}
278
 
279
+ Please ensure your CSV files:
280
+ 1. Have proper headers (Revenue, Expenses, Profit for Income Statement)
281
+ 2. Contain numeric data
282
+ 3. Follow standard financial statement format
283
+ 4. Are not corrupted"""
 
 
 
284
 
285
+ # Create Gradio interface
286
  iface = gr.Interface(
287
  fn=analyze_statements,
288
  inputs=[
 
302
  description="""## Financial Analysis Tool
303
 
304
  How to use:
305
+ 1. Prepare your CSV files with proper headers:
306
+ - Income Statement: Revenue, Expenses, Profit
307
+ - Balance Sheet: Assets, Liabilities, Equity
308
+ 2. Upload both files using the buttons below
309
  3. Wait for the analysis to complete
310
 
311
  The tool will provide:
 
315
 
316
  Requirements:
317
  - Files must be in CSV format
318
+ - Must contain numeric data
319
+ - Standard financial statement format required""",
320
  flagging_mode="never"
321
  )
322
 
323
+ # Launch the interface
324
  if __name__ == "__main__":
325
  try:
326
+ iface.queue()
327
  iface.launch(
328
  share=False,
329
  server_name="0.0.0.0",
330
+ server_port=7860
 
331
  )
332
  except Exception as e:
333
  logger.error(f"Launch error: {str(e)}")