walaa2022 commited on
Commit
17c0709
·
verified ·
1 Parent(s): 83dace5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -91
app.py CHANGED
@@ -3,7 +3,6 @@ import gradio as gr
3
  import pandas as pd
4
  import torch
5
  import logging
6
- import gc
7
  from transformers import pipeline
8
 
9
  # Setup logging
@@ -13,15 +12,9 @@ logging.basicConfig(
13
  )
14
  logger = logging.getLogger(__name__)
15
 
16
- # Device configuration
17
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
18
  logger.info(f"Using device: {DEVICE}")
19
 
20
- def clear_gpu_memory():
21
- if DEVICE == "cuda":
22
- torch.cuda.empty_cache()
23
- gc.collect()
24
-
25
  class FinancialAnalyzer:
26
  def __init__(self):
27
  self.analysis_model = None
@@ -30,12 +23,14 @@ class FinancialAnalyzer:
30
 
31
  def load_models(self):
32
  try:
 
33
  self.analysis_model = pipeline(
34
  "text-generation",
35
  model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
36
  torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
37
  )
38
 
 
39
  self.sentiment_model = pipeline(
40
  "text-classification",
41
  model="ProsusAI/finbert",
@@ -47,115 +42,105 @@ class FinancialAnalyzer:
47
  logger.error(f"Error loading models: {str(e)}")
48
  raise
49
 
50
- def format_number(self, number):
 
51
  try:
52
- if isinstance(number, str):
53
- number = float(number.replace(',', '').replace('$', '').strip())
54
- return f"${number:,.0f}"
55
- except:
56
- return str(number)
57
 
58
- def process_dataframe(self, df, statement_type):
59
- try:
60
- df_cleaned = df.copy()
61
- # Clean column names
62
- df_cleaned.columns = df_cleaned.columns.str.strip()
63
-
64
- # Clean numeric values
65
- numeric_cols = df_cleaned.select_dtypes(include=['float64', 'int64']).columns
66
- for col in numeric_cols:
67
- df_cleaned[col] = pd.to_numeric(df_cleaned[col].astype(str).str.replace('[$,()]', '', regex=True), errors='coerce')
 
 
 
 
 
 
 
68
 
69
- return df_cleaned
70
  except Exception as e:
71
- logger.error(f"Error processing {statement_type}: {str(e)}")
72
  raise
73
 
74
- def analyze_financials(self, income_df, balance_df):
75
  try:
76
- # Process dataframes
77
- income_clean = self.process_dataframe(income_df, "income_statement")
78
- balance_clean = self.process_dataframe(balance_df, "balance_sheet")
79
 
80
- # Create analysis context
81
- context = self.create_analysis_context(income_clean, balance_clean)
82
 
83
- # Generate sentiment
 
 
 
 
 
 
 
84
  sentiment = self.sentiment_model(
85
- context[:512],
86
  truncation=True
87
  )[0]
88
 
89
- # Generate analysis
90
- analysis_prompt = f"""[INST] Analyze these financial metrics:
91
 
92
- {context}
93
 
94
  Market Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
95
 
96
- Provide concise analysis of:
97
- 1. Financial Health
98
- 2. Key Insights
99
- 3. Strategic Recommendations
100
  [/INST]"""
101
 
102
- response = self.analysis_model(
103
- analysis_prompt,
104
  max_new_tokens=500,
105
  temperature=0.7,
106
  num_return_sequences=1,
107
  truncation=True
108
  )
109
 
110
- return self.format_response(response[0]['generated_text'], sentiment)
111
 
112
  except Exception as e:
113
  logger.error(f"Analysis error: {str(e)}")
114
  return f"Error in analysis: {str(e)}"
115
 
116
- def create_analysis_context(self, income_df, balance_df):
117
- try:
118
- # Extract latest year metrics
119
- latest_metrics = {
120
- 'Revenue': income_df.loc[income_df['year'] == 'Total Net Revenue', '2025'].iloc[0],
121
- 'Net_Income': income_df.loc[income_df['year'] == 'Net Income', '2025'].iloc[0],
122
- 'Assets': balance_df.loc[balance_df['year'] == 'Total Assets', '2025'].iloc[0],
123
- 'Liabilities': balance_df.loc[balance_df['year'] == 'Total Liabilities', '2025'].iloc[0],
124
- 'Equity': balance_df.loc[balance_df['year'] == "Shareholder's Equity", '2025'].iloc[0]
125
- }
126
-
127
- return f"""Financial Metrics (2025):
128
- Revenue: {self.format_number(latest_metrics['Revenue'])}
129
- Net Income: {self.format_number(latest_metrics['Net_Income'])}
130
- Total Assets: {self.format_number(latest_metrics['Assets'])}
131
- Total Liabilities: {self.format_number(latest_metrics['Liabilities'])}
132
- Shareholder's Equity: {self.format_number(latest_metrics['Equity'])}"""
133
-
134
- except Exception as e:
135
- logger.error(f"Error creating context: {str(e)}")
136
- raise
137
-
138
- def format_response(self, analysis_text, sentiment):
139
  try:
140
  sections = [
141
  "# Financial Analysis Report\n\n",
142
- f"## Market Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n"
 
 
 
 
143
  ]
144
 
145
- current_section = None
146
  for line in analysis_text.split('\n'):
147
  line = line.strip()
148
  if not line:
149
  continue
150
 
151
- if "Financial Health" in line:
152
- sections.append("## Financial Health\n")
153
- elif "Key Insights" in line:
154
- sections.append("\n## Key Insights\n")
155
- elif "Strategic Recommendations" in line:
156
- sections.append("\n## Strategic Recommendations\n")
157
  elif line:
158
- sections.append(f"- {line}\n")
 
 
159
 
160
  return "".join(sections)
161
  except Exception as e:
@@ -167,13 +152,22 @@ def analyze_statements(income_statement, balance_sheet):
167
  if not income_statement or not balance_sheet:
168
  return "Please upload both financial statements."
169
 
 
 
170
  income_df = pd.read_csv(income_statement)
171
  balance_df = pd.read_csv(balance_sheet)
172
 
 
 
 
 
 
173
  analyzer = FinancialAnalyzer()
174
- result = analyzer.analyze_financials(income_df, balance_df)
175
 
176
- clear_gpu_memory()
 
 
177
  return result
178
 
179
  except Exception as e:
@@ -181,31 +175,26 @@ def analyze_statements(income_statement, balance_sheet):
181
  return f"""Analysis Error: {str(e)}
182
 
183
  Please check:
184
- 1. CSV format is correct
185
- 2. Required financial data is present
186
  3. Files are not corrupted"""
187
 
188
  # Create Gradio interface
189
  iface = gr.Interface(
190
  fn=analyze_statements,
191
  inputs=[
192
- gr.File(
193
- label="Income Statement",
194
- file_types=[".csv"]
195
- ),
196
- gr.File(
197
- label="Balance Sheet",
198
- file_types=[".csv"]
199
- )
200
  ],
201
  outputs=gr.Markdown(),
202
- title="Financial Statement Analyzer",
203
- description="Upload financial statements for AI analysis",
 
204
  theme="default",
205
- allow_flagging="never"
206
  )
207
 
208
- # Launch with basic configuration
209
  if __name__ == "__main__":
210
  iface.launch(
211
  server_name="0.0.0.0",
 
3
  import pandas as pd
4
  import torch
5
  import logging
 
6
  from transformers import pipeline
7
 
8
  # Setup logging
 
12
  )
13
  logger = logging.getLogger(__name__)
14
 
 
15
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
16
  logger.info(f"Using device: {DEVICE}")
17
 
 
 
 
 
 
18
  class FinancialAnalyzer:
19
  def __init__(self):
20
  self.analysis_model = None
 
23
 
24
  def load_models(self):
25
  try:
26
+ logger.info("Loading TinyLlama model...")
27
  self.analysis_model = pipeline(
28
  "text-generation",
29
  model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
30
  torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
31
  )
32
 
33
+ logger.info("Loading FinBERT model...")
34
  self.sentiment_model = pipeline(
35
  "text-classification",
36
  model="ProsusAI/finbert",
 
42
  logger.error(f"Error loading models: {str(e)}")
43
  raise
44
 
45
+ def extract_and_analyze(self, statement_text, statement_type):
46
+ """Extract information from financial statement text"""
47
  try:
48
+ prompt = f"""[INST] As a financial analyst, analyze this {statement_type}:
 
 
 
 
49
 
50
+ {statement_text}
51
+
52
+ Extract and summarize:
53
+ 1. Key financial numbers for 2025
54
+ 2. Notable trends
55
+ 3. Important metrics
56
+
57
+ Focus on the most recent year (2025) and key financial indicators.
58
+ [/INST]"""
59
+
60
+ response = self.analysis_model(
61
+ prompt,
62
+ max_new_tokens=300,
63
+ temperature=0.3,
64
+ num_return_sequences=1,
65
+ truncation=True
66
+ )
67
 
68
+ return response[0]['generated_text']
69
  except Exception as e:
70
+ logger.error(f"Error extracting data from {statement_type}: {str(e)}")
71
  raise
72
 
73
+ def analyze_financials(self, income_text, balance_text):
74
  try:
75
+ # First, extract key information from each statement
76
+ logger.info("Analyzing Income Statement...")
77
+ income_analysis = self.extract_and_analyze(income_text, "Income Statement")
78
 
79
+ logger.info("Analyzing Balance Sheet...")
80
+ balance_analysis = self.extract_and_analyze(balance_text, "Balance Sheet")
81
 
82
+ # Combine the analyses
83
+ combined_analysis = f"""Income Statement Analysis:
84
+ {income_analysis}
85
+
86
+ Balance Sheet Analysis:
87
+ {balance_analysis}"""
88
+
89
+ # Get sentiment
90
  sentiment = self.sentiment_model(
91
+ combined_analysis[:512],
92
  truncation=True
93
  )[0]
94
 
95
+ # Generate final analysis
96
+ final_prompt = f"""[INST] Based on this financial analysis:
97
 
98
+ {combined_analysis}
99
 
100
  Market Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
101
 
102
+ Provide a concise analysis with:
103
+ 1. Overall Financial Health (2-3 key points)
104
+ 2. Main Business Insights (2-3 insights)
105
+ 3. Key Recommendations (2-3 recommendations)
106
  [/INST]"""
107
 
108
+ final_response = self.analysis_model(
109
+ final_prompt,
110
  max_new_tokens=500,
111
  temperature=0.7,
112
  num_return_sequences=1,
113
  truncation=True
114
  )
115
 
116
+ return self.format_response(final_response[0]['generated_text'], sentiment, combined_analysis)
117
 
118
  except Exception as e:
119
  logger.error(f"Analysis error: {str(e)}")
120
  return f"Error in analysis: {str(e)}"
121
 
122
+ def format_response(self, analysis_text, sentiment, raw_analysis):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  try:
124
  sections = [
125
  "# Financial Analysis Report\n\n",
126
+ f"## Market Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
127
+ "## Extracted Financial Data\n```\n",
128
+ raw_analysis,
129
+ "\n```\n\n",
130
+ "## Analysis\n\n"
131
  ]
132
 
 
133
  for line in analysis_text.split('\n'):
134
  line = line.strip()
135
  if not line:
136
  continue
137
 
138
+ if any(header in line for header in ["Financial Health", "Business Insights", "Recommendations"]):
139
+ sections.append(f"\n### {line}\n")
 
 
 
 
140
  elif line:
141
+ if not line.startswith('-'):
142
+ line = f"- {line}"
143
+ sections.append(f"{line}\n")
144
 
145
  return "".join(sections)
146
  except Exception as e:
 
152
  if not income_statement or not balance_sheet:
153
  return "Please upload both financial statements."
154
 
155
+ logger.info("Reading financial statements...")
156
+ # Read files as text
157
  income_df = pd.read_csv(income_statement)
158
  balance_df = pd.read_csv(balance_sheet)
159
 
160
+ # Convert to string while preserving format
161
+ income_text = income_df.to_string(index=False)
162
+ balance_text = balance_df.to_string(index=False)
163
+
164
+ logger.info("Initializing analysis...")
165
  analyzer = FinancialAnalyzer()
166
+ result = analyzer.analyze_financials(income_text, balance_text)
167
 
168
+ if DEVICE == "cuda":
169
+ torch.cuda.empty_cache()
170
+
171
  return result
172
 
173
  except Exception as e:
 
175
  return f"""Analysis Error: {str(e)}
176
 
177
  Please check:
178
+ 1. Files are readable CSV files
179
+ 2. Files contain financial data
180
  3. Files are not corrupted"""
181
 
182
  # Create Gradio interface
183
  iface = gr.Interface(
184
  fn=analyze_statements,
185
  inputs=[
186
+ gr.File(label="Income Statement (CSV)", file_types=[".csv"]),
187
+ gr.File(label="Balance Sheet (CSV)", file_types=[".csv"])
 
 
 
 
 
 
188
  ],
189
  outputs=gr.Markdown(),
190
+ title="AI Financial Statement Analyzer",
191
+ description="""Upload your financial statements for AI analysis.
192
+ The model will extract and analyze key financial information automatically.""",
193
  theme="default",
194
+ flagging_mode="never"
195
  )
196
 
197
+ # Launch
198
  if __name__ == "__main__":
199
  iface.launch(
200
  server_name="0.0.0.0",