walaa2022 commited on
Commit
30c4182
·
verified ·
1 Parent(s): f95f954

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -171
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import os
2
  import gradio as gr
3
  import pandas as pd
4
  import torch
@@ -17,200 +17,103 @@ logger = logging.getLogger(__name__)
17
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
18
  logger.info(f"Using device: {DEVICE}")
19
 
 
20
  def clear_gpu_memory():
21
  """Utility function to clear GPU memory"""
22
  if DEVICE == "cuda":
23
  torch.cuda.empty_cache()
24
  gc.collect()
25
 
26
- class FinancialDataExtractor:
27
- """Extract and clean financial data"""
28
-
29
- def __init__(self):
30
- self.logger = logger
31
-
32
- def clean_number(self, value):
33
- """Clean numeric values from financial statements"""
34
- try:
35
- if pd.isna(value) or value == '' or value == '-':
36
- return 0.0
37
- if isinstance(value, (int, float)):
38
- return float(value)
39
-
40
- # Remove currency symbols, spaces, commas
41
- cleaned = str(value).replace('$', '').replace(',', '').replace('"', '').strip()
42
- # Handle parentheses for negative numbers
43
- if '(' in cleaned and ')' in cleaned:
44
- cleaned = '-' + cleaned.replace('(', '').replace(')', '')
45
- return float(cleaned)
46
- except:
47
- return 0.0
48
-
49
- def extract_data(self, df: pd.DataFrame) -> pd.DataFrame:
50
- """Extract and clean data from DataFrame"""
51
- # Clean column names
52
- df.columns = df.columns.str.strip()
53
-
54
- # Get year columns
55
- year_cols = [col for col in df.columns if str(col).isdigit()]
56
-
57
- if not year_cols:
58
- raise ValueError("No year columns found in data")
59
-
60
- # Clean numeric data
61
- for col in year_cols:
62
- df[col] = df[col].apply(self.clean_number)
63
-
64
- return df, year_cols
65
-
66
  class FinancialAnalyzer:
67
- """Financial analysis using small models"""
68
 
69
  def __init__(self):
70
- self.extractor = FinancialDataExtractor()
71
- self.sentiment_model = None
72
  self.analysis_model = None
 
 
73
  self.load_models()
74
 
75
  def load_models(self):
76
- """Load the required models"""
77
  try:
 
 
 
 
 
 
 
78
  # Load FinBERT for sentiment analysis
79
  self.sentiment_model = pipeline(
80
  "text-classification",
81
- model="ProsusAI/finbert",
82
- torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
83
- truncation=True
84
  )
85
 
86
- # Load small model for analysis
87
- self.analysis_model = pipeline(
88
  "text-generation",
89
- model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
90
  torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
91
  )
92
 
93
- logger.info("Models loaded successfully")
94
  except Exception as e:
95
  logger.error(f"Error loading models: {str(e)}")
96
  raise
97
 
98
- def calculate_metrics(self, income_df: pd.DataFrame, balance_df: pd.DataFrame, year_cols: list) -> dict:
99
- """Calculate financial metrics"""
100
- metrics = {}
101
-
102
- for year in year_cols:
103
- # Income Statement metrics
104
- income = {
105
- 'Revenue': income_df[income_df['Period'].str.contains('Total Net Revenue|Revenue', na=False, case=False)][year].iloc[0],
106
- 'COGS': income_df[income_df['Period'].str.contains('Cost of Goods Sold', na=False, case=False)][year].iloc[0],
107
- 'Operating_Expenses': income_df[income_df['Period'].str.contains('Total Expenses', na=False, case=False)][year].iloc[0],
108
- 'EBIT': income_df[income_df['Period'].str.contains('Earnings Before Interest & Taxes', na=False, case=False)][year].iloc[0],
109
- 'Net_Income': income_df[income_df['Period'].str.contains('Net Income|Net Earnings', na=False, case=False)][year].iloc[-1]
110
- }
111
-
112
- # Balance Sheet metrics
113
- balance = {
114
- 'Total_Assets': balance_df[balance_df['Period'].str.contains('Total Assets', na=False, case=False)][year].iloc[0],
115
- 'Current_Assets': balance_df[balance_df['Period'].str.contains('Total current assets', na=False, case=False)][year].iloc[0],
116
- 'Total_Liabilities': balance_df[balance_df['Period'].str.contains('Total Liabilities', na=False, case=False)][year].iloc[0],
117
- 'Current_Liabilities': balance_df[balance_df['Period'].str.contains('Total current liabilities', na=False, case=False)][year].iloc[0],
118
- 'Equity': balance_df[balance_df['Period'].str.contains("Shareholder's Equity", na=False, case=False)][year].iloc[-1]
119
- }
120
-
121
- # Calculate ratios
122
- metrics[year] = {
123
- 'Profitability': {
124
- 'Gross_Margin': ((income['Revenue'] - income['COGS']) / income['Revenue']) * 100,
125
- 'Operating_Margin': (income['EBIT'] / income['Revenue']) * 100,
126
- 'Net_Margin': (income['Net_Income'] / income['Revenue']) * 100,
127
- 'ROE': (income['Net_Income'] / balance['Equity']) * 100,
128
- 'ROA': (income['Net_Income'] / balance['Total_Assets']) * 100
129
- },
130
- 'Liquidity': {
131
- 'Current_Ratio': balance['Current_Assets'] / balance['Current_Liabilities'],
132
- 'Working_Capital': balance['Current_Assets'] - balance['Current_Liabilities']
133
- },
134
- 'Growth': {
135
- 'Revenue': income['Revenue'],
136
- 'Net_Income': income['Net_Income'],
137
- 'Total_Assets': balance['Total_Assets']
138
- }
139
- }
140
-
141
- return metrics
142
-
143
- def analyze_financials(self, income_df: pd.DataFrame, balance_df: pd.DataFrame) -> str:
144
- """Generate financial analysis"""
145
  try:
146
- # Extract and clean data
147
- income_df, year_cols = self.extractor.extract_data(income_df)
148
- balance_df, _ = self.extractor.extract_data(balance_df)
149
-
150
- # Calculate metrics
151
- metrics = self.calculate_metrics(income_df, balance_df, year_cols)
152
-
153
- # Get latest and earliest years
154
- latest_year = max(year_cols)
155
- earliest_year = min(year_cols)
156
-
157
- # Calculate growth
158
- revenue_growth = ((metrics[latest_year]['Growth']['Revenue'] / metrics[earliest_year]['Growth']['Revenue']) - 1) * 100
159
- profit_growth = ((metrics[latest_year]['Growth']['Net_Income'] / metrics[earliest_year]['Growth']['Net_Income']) - 1) * 100
160
-
161
- # Generate analysis context
162
- context = f"""Financial Analysis ({earliest_year}-{latest_year}):
163
-
164
- Performance Metrics:
165
- - Revenue Growth: {revenue_growth:.1f}%
166
- - Profit Growth: {profit_growth:.1f}%
167
- - Current Gross Margin: {metrics[latest_year]['Profitability']['Gross_Margin']:.1f}%
168
- - Current Net Margin: {metrics[latest_year]['Profitability']['Net_Margin']:.1f}%
169
- - ROE: {metrics[latest_year]['Profitability']['ROE']:.1f}%
170
- - Current Ratio: {metrics[latest_year]['Liquidity']['Current_Ratio']:.2f}
171
 
172
- Trends:
173
- - Revenue has grown from ${metrics[earliest_year]['Growth']['Revenue']:,.0f} to ${metrics[latest_year]['Growth']['Revenue']:,.0f}
174
- - Net Income has changed from ${metrics[earliest_year]['Growth']['Net_Income']:,.0f} to ${metrics[latest_year]['Growth']['Net_Income']:,.0f}
175
- - Profitability margins show {('improving' if metrics[latest_year]['Profitability']['Net_Margin'] > metrics[earliest_year]['Profitability']['Net_Margin'] else 'declining')} trend"""
176
 
177
- # Get sentiment
178
- sentiment = self.sentiment_model(context[:512])[0]
179
-
180
- # Generate detailed analysis
181
- analysis = self.analysis_model(
182
- f"[INST] As a financial analyst, provide a detailed analysis of this company:\n\n{context}\n\nInclude:\n1. Financial health assessment\n2. Key performance insights\n3. Strategic recommendations [/INST]",
183
  max_length=1500,
184
  num_return_sequences=1,
185
  do_sample=True,
186
  temperature=0.7
187
- )[0]['generated_text']
188
-
189
- # Format output
190
- output = f"""# Financial Analysis Report
191
 
192
- ## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.1%})
 
193
 
194
- ## Key Performance Indicators ({latest_year})
195
- - Gross Margin: {metrics[latest_year]['Profitability']['Gross_Margin']:.1f}%
196
- - Operating Margin: {metrics[latest_year]['Profitability']['Operating_Margin']:.1f}%
197
- - Net Margin: {metrics[latest_year]['Profitability']['Net_Margin']:.1f}%
198
- - ROE: {metrics[latest_year]['Profitability']['ROE']:.1f}%
199
- - Current Ratio: {metrics[latest_year]['Liquidity']['Current_Ratio']:.2f}
200
 
201
- ## Performance Trends ({earliest_year}-{latest_year})
202
- - Revenue Growth: {revenue_growth:.1f}%
203
- - Profit Growth: {profit_growth:.1f}%
204
- - Working Capital: ${metrics[latest_year]['Liquidity']['Working_Capital']:,.0f}
205
 
206
- ## Analysis
207
- {analysis}"""
208
-
209
- return output
210
-
211
  except Exception as e:
212
  logger.error(f"Analysis error: {str(e)}")
213
- raise
 
 
 
 
 
 
214
 
215
  def analyze_statements(income_statement, balance_sheet):
216
  """Main function to analyze financial statements"""
@@ -218,13 +121,16 @@ def analyze_statements(income_statement, balance_sheet):
218
  if not income_statement or not balance_sheet:
219
  return "Please upload both Income Statement and Balance Sheet CSV files."
220
 
221
- # Read files
222
- income_df = pd.read_csv(income_statement.name)
223
- balance_df = pd.read_csv(balance_sheet.name)
224
 
225
- # Create analyzer and process
 
 
 
226
  analyzer = FinancialAnalyzer()
227
- result = analyzer.analyze_financials(income_df, balance_df)
228
 
229
  # Clear memory
230
  clear_gpu_memory()
@@ -243,20 +149,16 @@ def analyze_statements(income_statement, balance_sheet):
243
  # Create Gradio interface
244
  iface = gr.Interface(
245
  fn=analyze_statements,
246
- inputs=[
247
- gr.File(label="Upload Income Statement (CSV)", file_types=[".csv"]),
248
- gr.File(label="Upload Balance Sheet (CSV)", file_types=[".csv"])
249
- ],
250
  outputs=gr.Markdown(),
251
- title="Financial Statement Analyzer",
252
- description="""## Financial Analysis Tool
253
-
254
  Upload your financial statements to get:
255
- - Performance Analysis
256
  - Key Metrics & Ratios
257
  - Trend Analysis
258
- - Strategic Recommendations""",
259
- examples=None
260
  )
261
 
262
  # Launch the interface
@@ -265,4 +167,4 @@ if __name__ == "__main__":
265
  iface.launch(server_name="0.0.0.0", server_port=7860)
266
  except Exception as e:
267
  logger.error(f"Launch error: {str(e)}")
268
- sys.exit(1)
 
1
+ import os
2
  import gradio as gr
3
  import pandas as pd
4
  import torch
 
17
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
18
  logger.info(f"Using device: {DEVICE}")
19
 
20
+ # Clear GPU memory utility
21
  def clear_gpu_memory():
22
  """Utility function to clear GPU memory"""
23
  if DEVICE == "cuda":
24
  torch.cuda.empty_cache()
25
  gc.collect()
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  class FinancialAnalyzer:
28
+ """Financial analysis using Tiny Llama and Falcon models"""
29
 
30
  def __init__(self):
 
 
31
  self.analysis_model = None
32
+ self.sentiment_model = None
33
+ self.falcon_model = None
34
  self.load_models()
35
 
36
  def load_models(self):
37
+ """Load models for analysis and sentiment"""
38
  try:
39
+ # Load Tiny Llama for generating financial analysis and insights
40
+ self.analysis_model = pipeline(
41
+ "text-generation",
42
+ model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", # Tiny Llama model for analysis
43
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
44
+ )
45
+
46
  # Load FinBERT for sentiment analysis
47
  self.sentiment_model = pipeline(
48
  "text-classification",
49
+ model="yiyanghkust/finbert-tone", # FinBERT model for sentiment analysis
50
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
 
51
  )
52
 
53
+ # Load Falcon model for generating roadmap and recommendations
54
+ self.falcon_model = pipeline(
55
  "text-generation",
56
+ model="tiiuae/falcon-7b", # Falcon model for recommendations and roadmap
57
  torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
58
  )
59
 
60
+ logger.info("Tiny Llama, FinBERT, and Falcon models loaded successfully")
61
  except Exception as e:
62
  logger.error(f"Error loading models: {str(e)}")
63
  raise
64
 
65
+ def analyze_financials(self, csv_data: str) -> str:
66
+ """Generate financial analysis using Tiny Llama and analyze sentiment using FinBERT"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  try:
68
+ # Generate status and insights using Tiny Llama
69
+ status_prompt = f"Please analyze the following financial data and provide status, insights, and metrics:\n\n{csv_data}"
70
+ response = self.analysis_model(
71
+ status_prompt,
72
+ max_length=1500,
73
+ num_return_sequences=1,
74
+ do_sample=True,
75
+ temperature=0.7
76
+ )
77
+ insights_result = response[0]['generated_text'].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ # Get sentiment analysis from FinBERT
80
+ sentiment = self.sentiment_model(insights_result[:512])[0] # Limit input to first 512 tokens
81
+ sentiment_label = sentiment['label']
82
+ sentiment_score = sentiment['score']
83
 
84
+ # Generate recommendations and roadmap using Falcon
85
+ roadmap_prompt = f"Based on the following financial insights, create a strategic roadmap and recommendations for the company:\n\n{insights_result}"
86
+ roadmap_response = self.falcon_model(
87
+ roadmap_prompt,
 
 
88
  max_length=1500,
89
  num_return_sequences=1,
90
  do_sample=True,
91
  temperature=0.7
92
+ )
93
+ roadmap_result = roadmap_response[0]['generated_text'].strip()
 
 
94
 
95
+ # Return a comprehensive report
96
+ result = f"""# Financial Analysis Report
97
 
98
+ ### Sentiment Analysis: {sentiment_label} ({sentiment_score:.1%})
 
 
 
 
 
99
 
100
+ ### Financial Status and Insights:
101
+ {insights_result}
 
 
102
 
103
+ ### Recommendations and Roadmap:
104
+ {roadmap_result}
105
+ """
106
+ return result
107
+
108
  except Exception as e:
109
  logger.error(f"Analysis error: {str(e)}")
110
+ return f"Analysis Error: {str(e)}"
111
+
112
+ # Function to read CSV and convert to text format
113
+ def csv_to_text(file_path: str) -> str:
114
+ """Convert CSV to raw text format for model input"""
115
+ df = pd.read_csv(file_path)
116
+ return df.to_string(index=False) # Convert DataFrame to string without index
117
 
118
  def analyze_statements(income_statement, balance_sheet):
119
  """Main function to analyze financial statements"""
 
121
  if not income_statement or not balance_sheet:
122
  return "Please upload both Income Statement and Balance Sheet CSV files."
123
 
124
+ # Read files as raw text (no need to clean manually)
125
+ income_data = csv_to_text(income_statement.name)
126
+ balance_data = csv_to_text(balance_sheet.name)
127
 
128
+ # Combine the data for AI to process (can adjust prompt as needed)
129
+ combined_data = f"Income Statement Data:\n{income_data}\n\nBalance Sheet Data:\n{balance_data}"
130
+
131
+ # Create analyzer and process data
132
  analyzer = FinancialAnalyzer()
133
+ result = analyzer.analyze_financials(combined_data)
134
 
135
  # Clear memory
136
  clear_gpu_memory()
 
149
  # Create Gradio interface
150
  iface = gr.Interface(
151
  fn=analyze_statements,
152
+ inputs=[gr.File(label="Upload Income Statement (CSV)", file_types=[".csv"]),
153
+ gr.File(label="Upload Balance Sheet (CSV)", file_types=[".csv"])],
 
 
154
  outputs=gr.Markdown(),
155
+ title="Generative Financial Statement Analyzer with Tiny Llama, FinBERT, and Falcon",
156
+ description="""## Financial Analysis Tool (AI-powered)
 
157
  Upload your financial statements to get:
158
+ - Status & Insights
159
  - Key Metrics & Ratios
160
  - Trend Analysis
161
+ - Strategic Recommendations & Roadmap""",
 
162
  )
163
 
164
  # Launch the interface
 
167
  iface.launch(server_name="0.0.0.0", server_port=7860)
168
  except Exception as e:
169
  logger.error(f"Launch error: {str(e)}")
170
+ sys.exit(1)