walaa2022 commited on
Commit
98d6352
·
verified ·
1 Parent(s): bc8be1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +153 -76
app.py CHANGED
@@ -17,7 +17,6 @@ logger = logging.getLogger(__name__)
17
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
18
  logger.info(f"Using device: {DEVICE}")
19
 
20
- # Clear GPU memory utility
21
  def clear_gpu_memory():
22
  """Utility function to clear GPU memory"""
23
  if DEVICE == "cuda":
@@ -25,112 +24,172 @@ def clear_gpu_memory():
25
  gc.collect()
26
 
27
  class FinancialAnalyzer:
28
- """Financial analysis using Tiny Llama and Falcon models"""
29
 
30
  def __init__(self):
31
  self.analysis_model = None
32
  self.sentiment_model = None
33
- self.falcon_model = None
34
  self.load_models()
35
 
36
  def load_models(self):
37
  """Load models for analysis and sentiment"""
38
  try:
39
- # Load Tiny Llama for generating financial analysis and insights
 
40
  self.analysis_model = pipeline(
41
  "text-generation",
42
- model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", # Tiny Llama model for analysis
43
  torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
44
  )
45
 
46
- # Load FinBERT for sentiment analysis
 
47
  self.sentiment_model = pipeline(
48
  "text-classification",
49
- model="yiyanghkust/finbert-tone", # FinBERT model for sentiment analysis
50
  torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
51
  )
52
 
53
- # Load Falcon model for generating roadmap and recommendations
54
- self.falcon_model = pipeline(
55
- "text-generation",
56
- model="tiiuae/falcon-7b", # Falcon model for recommendations and roadmap
57
- torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
58
- )
59
-
60
- logger.info("Tiny Llama, FinBERT, and Falcon models loaded successfully")
61
  except Exception as e:
62
  logger.error(f"Error loading models: {str(e)}")
63
  raise
64
 
65
- def analyze_financials(self, income_data: pd.DataFrame, balance_data: pd.DataFrame) -> str:
66
- """Generate financial analysis using Tiny Llama and analyze sentiment using FinBERT"""
67
  try:
68
- # Combine the data for AI to process (can adjust prompt as needed)
69
- combined_data = f"Income Statement Data:\n{income_data.to_string()}\n\nBalance Sheet Data:\n{balance_data.to_string()}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- # Generate status and insights using Tiny Llama
72
- status_prompt = f"Please analyze the following financial data and provide status, insights, and metrics:\n\n{combined_data}"
73
  response = self.analysis_model(
74
- status_prompt,
75
  max_length=1500,
 
76
  num_return_sequences=1,
77
- do_sample=True,
78
- temperature=0.7
79
  )
80
- insights_result = response[0]['generated_text'].strip()
81
 
82
- # Get sentiment analysis from FinBERT
83
- sentiment = self.sentiment_model(insights_result[:512])[0] # Limit input to first 512 tokens
84
- sentiment_label = sentiment['label']
85
- sentiment_score = sentiment['score']
86
 
87
- # Generate recommendations and roadmap using Falcon
88
- roadmap_prompt = f"Based on the following financial insights, create a strategic roadmap and recommendations for the company:\n\n{insights_result}"
89
- roadmap_response = self.falcon_model(
90
- roadmap_prompt,
91
- max_length=1500,
92
- num_return_sequences=1,
93
- do_sample=True,
94
- temperature=0.7
95
- )
96
- roadmap_result = roadmap_response[0]['generated_text'].strip()
97
-
98
- # Return a comprehensive report
99
- result = f"""# Financial Analysis Report
100
- ### Sentiment Analysis: {sentiment_label} ({sentiment_score:.1%})
101
- ### Financial Status and Insights:
102
- {insights_result}
103
- ### Recommendations and Roadmap:
104
- {roadmap_result}
105
- """
106
- return result
107
-
108
  except Exception as e:
109
- logger.error(f"Analysis error: {str(e)}")
110
- return f"Analysis Error: {str(e)}"
111
 
112
- # Function to read CSV and convert to DataFrame
113
- def read_csv(file_path: str) -> pd.DataFrame:
114
- """Read CSV and return a DataFrame"""
115
- return pd.read_csv(file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  def analyze_statements(income_statement, balance_sheet):
118
  """Main function to analyze financial statements"""
119
  try:
120
  if not income_statement or not balance_sheet:
121
  return "Please upload both Income Statement and Balance Sheet CSV files."
122
-
123
- # Read files as DataFrames (no need to clean manually)
124
- income_data = read_csv(income_statement.name)
125
- balance_data = read_csv(balance_sheet.name)
126
-
127
- # Create analyzer and process data
128
  analyzer = FinancialAnalyzer()
 
 
 
 
 
 
 
 
 
 
129
  result = analyzer.analyze_financials(income_data, balance_data)
130
 
131
- # Clear memory
132
  clear_gpu_memory()
133
-
134
  return result
135
 
136
  except Exception as e:
@@ -138,29 +197,47 @@ def analyze_statements(income_statement, balance_sheet):
138
  return f"""Analysis Error: {str(e)}
139
 
140
  Please ensure your CSV files:
141
- 1. Have clear year columns
142
- 2. Contain recognizable financial metrics
143
- 3. Use consistent number formatting"""
144
 
145
  # Create Gradio interface
146
  iface = gr.Interface(
147
  fn=analyze_statements,
148
- inputs=[gr.File(label="Upload Income Statement (CSV)", file_types=[".csv"]),
149
- gr.File(label="Upload Balance Sheet (CSV)", file_types=[".csv"])],
 
 
 
 
 
 
 
 
150
  outputs=gr.Markdown(),
151
- title="Generative Financial Statement Analyzer with Tiny Llama, FinBERT, and Falcon",
152
- description="""## Financial Analysis Tool (AI-powered)
 
153
  Upload your financial statements to get:
154
- - Status & Insights
155
- - Key Metrics & Ratios
156
- - Trend Analysis
157
- - Strategic Recommendations & Roadmap""",
 
 
 
 
158
  )
159
 
160
  # Launch the interface
161
  if __name__ == "__main__":
162
  try:
163
- iface.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
164
  except Exception as e:
165
  logger.error(f"Launch error: {str(e)}")
166
  sys.exit(1)
 
17
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
18
  logger.info(f"Using device: {DEVICE}")
19
 
 
20
  def clear_gpu_memory():
21
  """Utility function to clear GPU memory"""
22
  if DEVICE == "cuda":
 
24
  gc.collect()
25
 
26
  class FinancialAnalyzer:
27
+ """Financial analysis using Tiny Llama and FinBERT models"""
28
 
29
  def __init__(self):
30
  self.analysis_model = None
31
  self.sentiment_model = None
 
32
  self.load_models()
33
 
34
  def load_models(self):
35
  """Load models for analysis and sentiment"""
36
  try:
37
+ # Load Tiny Llama for analysis
38
+ logger.info("Loading TinyLlama model...")
39
  self.analysis_model = pipeline(
40
  "text-generation",
41
+ model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
42
  torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
43
  )
44
 
45
+ # Load FinBERT for sentiment
46
+ logger.info("Loading FinBERT model...")
47
  self.sentiment_model = pipeline(
48
  "text-classification",
49
+ model="ProsusAI/finbert",
50
  torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
51
  )
52
 
53
+ logger.info("Models loaded successfully")
 
 
 
 
 
 
 
54
  except Exception as e:
55
  logger.error(f"Error loading models: {str(e)}")
56
  raise
57
 
58
+ def process_csv(self, file_obj):
59
+ """Process CSV file and extract financial data"""
60
  try:
61
+ if file_obj is None:
62
+ raise ValueError("No file provided")
63
+
64
+ # Read CSV with better error handling
65
+ df = pd.read_csv(file_obj, skipinitialspace=True)
66
+
67
+ if df.empty:
68
+ raise ValueError("Empty CSV file")
69
+
70
+ # Clean column names
71
+ df.columns = df.columns.str.strip()
72
+
73
+ # Remove unnamed columns
74
+ df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
75
+
76
+ # Convert to numeric where possible
77
+ for col in df.columns:
78
+ df[col] = pd.to_numeric(df[col].str.replace('[$,()]', '', regex=True), errors='ignore')
79
+
80
+ # Get numeric columns
81
+ numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
82
+ if len(numeric_cols) == 0:
83
+ raise ValueError("No numeric columns found in CSV")
84
+
85
+ return df[numeric_cols].describe()
86
+
87
+ except Exception as e:
88
+ logger.error(f"Error processing CSV: {str(e)}")
89
+ raise
90
+
91
+ def analyze_financials(self, income_data, balance_data):
92
+ """Generate financial analysis and recommendations"""
93
+ try:
94
+ financial_context = f"""
95
+ Income Statement Analysis:
96
+ {income_data.to_string()}
97
+
98
+ Balance Sheet Analysis:
99
+ {balance_data.to_string()}
100
+ """
101
+
102
+ # Generate sentiment analysis
103
+ sentiment = self.sentiment_model(
104
+ financial_context,
105
+ truncation=True,
106
+ max_length=512
107
+ )[0]
108
+
109
+ # Generate analysis
110
+ analysis_prompt = f"""[INST] As a financial analyst, analyze these financial statements:
111
+
112
+ {financial_context}
113
+
114
+ Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
115
+
116
+ Provide:
117
+ 1. Business Status and Health Assessment
118
+ 2. Key Financial Insights and Metrics
119
+ 3. Strategic Recommendations and Action Plan
120
+
121
+ Be specific and data-driven in your analysis.
122
+ [/INST]"""
123
 
 
 
124
  response = self.analysis_model(
125
+ analysis_prompt,
126
  max_length=1500,
127
+ do_sample=False,
128
  num_return_sequences=1,
129
+ truncation=True
 
130
  )
 
131
 
132
+ return self.format_response(response[0]['generated_text'], sentiment)
 
 
 
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  except Exception as e:
135
+ logger.error(f"Error in analysis: {str(e)}")
136
+ return f"Error generating analysis: {str(e)}"
137
 
138
+ def format_response(self, analysis_text, sentiment):
139
+ """Format the analysis response"""
140
+ try:
141
+ sections = analysis_text.split('\n\n')
142
+
143
+ output = [
144
+ "# Financial Analysis Report\n\n",
145
+ f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n"
146
+ ]
147
+
148
+ current_section = None
149
+ for section in sections:
150
+ section = section.strip()
151
+ if not section:
152
+ continue
153
+
154
+ if "Business Status" in section:
155
+ output.append("## Business Status\n")
156
+ current_section = "status"
157
+ elif "Key Financial Insights" in section:
158
+ output.append("\n## Key Insights\n")
159
+ current_section = "insights"
160
+ elif "Strategic Recommendations" in section:
161
+ output.append("\n## Recommendations\n")
162
+ current_section = "recommendations"
163
+ elif current_section:
164
+ output.append(f"- {section}\n")
165
+
166
+ return "".join(output)
167
+
168
+ except Exception as e:
169
+ logger.error(f"Error formatting response: {str(e)}")
170
+ return "Error formatting analysis results"
171
 
172
  def analyze_statements(income_statement, balance_sheet):
173
  """Main function to analyze financial statements"""
174
  try:
175
  if not income_statement or not balance_sheet:
176
  return "Please upload both Income Statement and Balance Sheet CSV files."
177
+
178
+ # Initialize analyzer
 
 
 
 
179
  analyzer = FinancialAnalyzer()
180
+
181
+ # Process statements
182
+ logger.info("Processing income statement...")
183
+ income_data = analyzer.process_csv(income_statement)
184
+
185
+ logger.info("Processing balance sheet...")
186
+ balance_data = analyzer.process_csv(balance_sheet)
187
+
188
+ # Generate analysis
189
+ logger.info("Generating analysis...")
190
  result = analyzer.analyze_financials(income_data, balance_data)
191
 
 
192
  clear_gpu_memory()
 
193
  return result
194
 
195
  except Exception as e:
 
197
  return f"""Analysis Error: {str(e)}
198
 
199
  Please ensure your CSV files:
200
+ 1. Contain numeric financial data
201
+ 2. Have proper column headers
202
+ 3. Are not corrupted"""
203
 
204
  # Create Gradio interface
205
  iface = gr.Interface(
206
  fn=analyze_statements,
207
+ inputs=[
208
+ gr.File(
209
+ label="Upload Income Statement (CSV)",
210
+ file_types=[".csv"]
211
+ ),
212
+ gr.File(
213
+ label="Upload Balance Sheet (CSV)",
214
+ file_types=[".csv"]
215
+ )
216
+ ],
217
  outputs=gr.Markdown(),
218
+ title="AI Financial Statement Analyzer",
219
+ description="""## Financial Analysis Tool
220
+
221
  Upload your financial statements to get:
222
+ - Business Status Assessment
223
+ - Key Financial Insights
224
+ - Strategic Recommendations
225
+
226
+ Requirements:
227
+ - CSV files with numeric data
228
+ - Standard financial statement format""",
229
+ flagging_mode="never"
230
  )
231
 
232
  # Launch the interface
233
  if __name__ == "__main__":
234
  try:
235
+ iface.queue()
236
+ iface.launch(
237
+ share=False,
238
+ server_name="0.0.0.0",
239
+ server_port=7860
240
+ )
241
  except Exception as e:
242
  logger.error(f"Launch error: {str(e)}")
243
  sys.exit(1)