walaa2022 commited on
Commit
a772146
·
verified ·
1 Parent(s): 98d6352

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -87
app.py CHANGED
@@ -6,119 +6,137 @@ import logging
6
  import gc
7
  from transformers import pipeline
8
 
9
- # Setup logging
10
  logging.basicConfig(
11
  level=logging.INFO,
12
  format='%(asctime)s - %(levelname)s - %(message)s'
13
  )
14
  logger = logging.getLogger(__name__)
15
 
16
- # Device configuration
17
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
18
  logger.info(f"Using device: {DEVICE}")
19
 
20
  def clear_gpu_memory():
21
- """Utility function to clear GPU memory"""
22
  if DEVICE == "cuda":
23
  torch.cuda.empty_cache()
24
  gc.collect()
25
 
26
  class FinancialAnalyzer:
27
- """Financial analysis using Tiny Llama and FinBERT models"""
28
-
29
  def __init__(self):
 
30
  self.analysis_model = None
31
  self.sentiment_model = None
32
  self.load_models()
33
 
34
  def load_models(self):
35
- """Load models for analysis and sentiment"""
36
  try:
37
- # Load Tiny Llama for analysis
38
- logger.info("Loading TinyLlama model...")
 
 
 
 
 
 
 
 
39
  self.analysis_model = pipeline(
40
  "text-generation",
41
  model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
42
  torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
43
  )
44
 
45
- # Load FinBERT for sentiment
46
- logger.info("Loading FinBERT model...")
47
  self.sentiment_model = pipeline(
48
  "text-classification",
49
  model="ProsusAI/finbert",
50
  torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
51
  )
52
 
53
- logger.info("Models loaded successfully")
54
  except Exception as e:
55
  logger.error(f"Error loading models: {str(e)}")
56
  raise
57
 
58
- def process_csv(self, file_obj):
59
- """Process CSV file and extract financial data"""
60
  try:
61
- if file_obj is None:
62
- raise ValueError("No file provided")
 
63
 
64
- # Read CSV with better error handling
65
- df = pd.read_csv(file_obj, skipinitialspace=True)
66
-
67
- if df.empty:
68
- raise ValueError("Empty CSV file")
69
 
70
- # Clean column names
71
- df.columns = df.columns.str.strip()
72
-
73
- # Remove unnamed columns
74
- df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
75
-
76
- # Convert to numeric where possible
77
- for col in df.columns:
78
- df[col] = pd.to_numeric(df[col].str.replace('[$,()]', '', regex=True), errors='ignore')
79
-
80
- # Get numeric columns
81
- numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
82
- if len(numeric_cols) == 0:
83
- raise ValueError("No numeric columns found in CSV")
84
-
85
- return df[numeric_cols].describe()
 
 
 
 
86
 
87
- except Exception as e:
88
- logger.error(f"Error processing CSV: {str(e)}")
89
- raise
90
 
91
  def analyze_financials(self, income_data, balance_data):
92
- """Generate financial analysis and recommendations"""
93
  try:
 
94
  financial_context = f"""
95
  Income Statement Analysis:
96
- {income_data.to_string()}
97
 
98
  Balance Sheet Analysis:
99
- {balance_data.to_string()}
100
  """
101
 
102
- # Generate sentiment analysis
103
  sentiment = self.sentiment_model(
104
  financial_context,
105
  truncation=True,
106
  max_length=512
107
  )[0]
108
-
109
- # Generate analysis
110
- analysis_prompt = f"""[INST] As a financial analyst, analyze these financial statements:
111
 
112
  {financial_context}
113
 
114
- Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
- Provide:
117
- 1. Business Status and Health Assessment
118
- 2. Key Financial Insights and Metrics
119
- 3. Strategic Recommendations and Action Plan
120
 
121
- Be specific and data-driven in your analysis.
122
  [/INST]"""
123
 
124
  response = self.analysis_model(
@@ -129,40 +147,42 @@ class FinancialAnalyzer:
129
  truncation=True
130
  )
131
 
132
- return self.format_response(response[0]['generated_text'], sentiment)
133
 
134
  except Exception as e:
135
  logger.error(f"Error in analysis: {str(e)}")
136
  return f"Error generating analysis: {str(e)}"
137
 
138
- def format_response(self, analysis_text, sentiment):
139
  """Format the analysis response"""
140
  try:
141
- sections = analysis_text.split('\n\n')
142
-
143
  output = [
144
  "# Financial Analysis Report\n\n",
145
- f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n"
 
 
 
146
  ]
147
-
 
148
  current_section = None
 
149
  for section in sections:
150
  section = section.strip()
151
  if not section:
152
  continue
153
-
154
  if "Business Status" in section:
155
  output.append("## Business Status\n")
156
- current_section = "status"
157
- elif "Key Financial Insights" in section:
158
- output.append("\n## Key Insights\n")
159
- current_section = "insights"
160
- elif "Strategic Recommendations" in section:
161
- output.append("\n## Recommendations\n")
162
- current_section = "recommendations"
163
- elif current_section:
164
- output.append(f"- {section}\n")
165
-
166
  return "".join(output)
167
 
168
  except Exception as e:
@@ -175,18 +195,17 @@ def analyze_statements(income_statement, balance_sheet):
175
  if not income_statement or not balance_sheet:
176
  return "Please upload both Income Statement and Balance Sheet CSV files."
177
 
178
- # Initialize analyzer
179
  analyzer = FinancialAnalyzer()
180
 
181
- # Process statements
182
- logger.info("Processing income statement...")
183
- income_data = analyzer.process_csv(income_statement)
184
 
185
- logger.info("Processing balance sheet...")
186
- balance_data = analyzer.process_csv(balance_sheet)
187
 
188
  # Generate analysis
189
- logger.info("Generating analysis...")
190
  result = analyzer.analyze_financials(income_data, balance_data)
191
 
192
  clear_gpu_memory()
@@ -196,10 +215,10 @@ def analyze_statements(income_statement, balance_sheet):
196
  logger.error(f"Analysis error: {str(e)}")
197
  return f"""Analysis Error: {str(e)}
198
 
199
- Please ensure your CSV files:
200
- 1. Contain numeric financial data
201
- 2. Have proper column headers
202
- 3. Are not corrupted"""
203
 
204
  # Create Gradio interface
205
  iface = gr.Interface(
@@ -218,14 +237,12 @@ iface = gr.Interface(
218
  title="AI Financial Statement Analyzer",
219
  description="""## Financial Analysis Tool
220
 
221
- Upload your financial statements to get:
222
- - Business Status Assessment
223
- - Key Financial Insights
224
- - Strategic Recommendations
225
 
226
- Requirements:
227
- - CSV files with numeric data
228
- - Standard financial statement format""",
229
  flagging_mode="never"
230
  )
231
 
 
6
  import gc
7
  from transformers import pipeline
8
 
 
9
  logging.basicConfig(
10
  level=logging.INFO,
11
  format='%(asctime)s - %(levelname)s - %(message)s'
12
  )
13
  logger = logging.getLogger(__name__)
14
 
 
15
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
16
  logger.info(f"Using device: {DEVICE}")
17
 
18
  def clear_gpu_memory():
 
19
  if DEVICE == "cuda":
20
  torch.cuda.empty_cache()
21
  gc.collect()
22
 
23
  class FinancialAnalyzer:
 
 
24
  def __init__(self):
25
+ self.data_model = None
26
  self.analysis_model = None
27
  self.sentiment_model = None
28
  self.load_models()
29
 
30
  def load_models(self):
31
+ """Load models for data extraction and analysis"""
32
  try:
33
+ # Model for understanding and extracting data from CSV
34
+ logger.info("Loading data extraction model...")
35
+ self.data_model = pipeline(
36
+ "text-generation",
37
+ model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
38
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
39
+ )
40
+
41
+ # Model for financial analysis
42
+ logger.info("Loading analysis model...")
43
  self.analysis_model = pipeline(
44
  "text-generation",
45
  model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
46
  torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
47
  )
48
 
49
+ # Model for sentiment analysis
50
+ logger.info("Loading sentiment model...")
51
  self.sentiment_model = pipeline(
52
  "text-classification",
53
  model="ProsusAI/finbert",
54
  torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
55
  )
56
 
57
+ logger.info("All models loaded successfully")
58
  except Exception as e:
59
  logger.error(f"Error loading models: {str(e)}")
60
  raise
61
 
62
+ def extract_financial_data(self, file_obj, statement_type):
63
+ """Use generative AI to understand and extract data from CSV"""
64
  try:
65
+ # Read raw CSV content
66
+ df = pd.read_csv(file_obj)
67
+ raw_data = df.to_string()
68
 
69
+ # Create prompt for data extraction
70
+ extraction_prompt = f"""[INST] As a financial expert, analyze this raw {statement_type} data:
 
 
 
71
 
72
+ {raw_data}
73
+
74
+ Extract and summarize the following:
75
+ 1. Key financial numbers (revenue, profit, assets, liabilities etc.)
76
+ 2. Time periods covered
77
+ 3. Important trends and patterns
78
+ 4. Any significant financial metrics
79
+
80
+ Present the extracted data in a clear, structured format.
81
+ Focus on the most important financial information regardless of how the data is formatted.
82
+ [/INST]"""
83
+
84
+ # Generate structured extraction
85
+ response = self.data_model(
86
+ extraction_prompt,
87
+ max_length=1000,
88
+ do_sample=False,
89
+ num_return_sequences=1,
90
+ truncation=True
91
+ )
92
 
93
+ logger.info(f"Data extracted from {statement_type}")
94
+ return response[0]['generated_text']
 
95
 
96
  def analyze_financials(self, income_data, balance_data):
97
+ """Generate financial analysis based on extracted data"""
98
  try:
99
+ # Combine extracted data
100
  financial_context = f"""
101
  Income Statement Analysis:
102
+ {income_data}
103
 
104
  Balance Sheet Analysis:
105
+ {balance_data}
106
  """
107
 
108
+ # Get sentiment
109
  sentiment = self.sentiment_model(
110
  financial_context,
111
  truncation=True,
112
  max_length=512
113
  )[0]
114
+
115
+ # Generate comprehensive analysis
116
+ analysis_prompt = f"""[INST] As a senior financial analyst, review this financial data:
117
 
118
  {financial_context}
119
 
120
+ Market Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
121
+
122
+ Provide a detailed analysis including:
123
+
124
+ 1. Business Status
125
+ - Overall financial health
126
+ - Performance assessment
127
+ - Key metrics analysis
128
+
129
+ 2. Strategic Insights
130
+ - Market position
131
+ - Competitive advantages
132
+ - Areas of concern
133
 
134
+ 3. Recommendations & Roadmap
135
+ - Strategic initiatives
136
+ - Improvement opportunities
137
+ - Action timeline
138
 
139
+ Base your analysis on the extracted financial data and provide specific insights.
140
  [/INST]"""
141
 
142
  response = self.analysis_model(
 
147
  truncation=True
148
  )
149
 
150
+ return self.format_response(response[0]['generated_text'], sentiment, financial_context)
151
 
152
  except Exception as e:
153
  logger.error(f"Error in analysis: {str(e)}")
154
  return f"Error generating analysis: {str(e)}"
155
 
156
+ def format_response(self, analysis_text, sentiment, context):
157
  """Format the analysis response"""
158
  try:
 
 
159
  output = [
160
  "# Financial Analysis Report\n\n",
161
+ f"## Market Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
162
+ "## Extracted Financial Data\n```\n",
163
+ context,
164
+ "\n```\n\n"
165
  ]
166
+
167
+ sections = analysis_text.split('\n\n')
168
  current_section = None
169
+
170
  for section in sections:
171
  section = section.strip()
172
  if not section:
173
  continue
174
+
175
  if "Business Status" in section:
176
  output.append("## Business Status\n")
177
+ elif "Strategic Insights" in section:
178
+ output.append("\n## Strategic Insights\n")
179
+ elif "Recommendations" in section:
180
+ output.append("\n## Recommendations & Roadmap\n")
181
+ else:
182
+ if not section.startswith('-'):
183
+ section = f"- {section}"
184
+ output.append(f"{section}\n")
185
+
 
186
  return "".join(output)
187
 
188
  except Exception as e:
 
195
  if not income_statement or not balance_sheet:
196
  return "Please upload both Income Statement and Balance Sheet CSV files."
197
 
 
198
  analyzer = FinancialAnalyzer()
199
 
200
+ # Extract data from CSVs using generative AI
201
+ logger.info("Extracting data from Income Statement...")
202
+ income_data = analyzer.extract_financial_data(income_statement, "Income Statement")
203
 
204
+ logger.info("Extracting data from Balance Sheet...")
205
+ balance_data = analyzer.extract_financial_data(balance_sheet, "Balance Sheet")
206
 
207
  # Generate analysis
208
+ logger.info("Generating comprehensive analysis...")
209
  result = analyzer.analyze_financials(income_data, balance_data)
210
 
211
  clear_gpu_memory()
 
215
  logger.error(f"Analysis error: {str(e)}")
216
  return f"""Analysis Error: {str(e)}
217
 
218
+ Please verify:
219
+ 1. Files are in CSV format
220
+ 2. Files contain financial data
221
+ 3. Files are not corrupted"""
222
 
223
  # Create Gradio interface
224
  iface = gr.Interface(
 
237
  title="AI Financial Statement Analyzer",
238
  description="""## Financial Analysis Tool
239
 
240
+ Upload your financial statements (any CSV format) and let AI:
241
+ - Extract and understand the financial data
242
+ - Provide comprehensive analysis
243
+ - Generate strategic recommendations
244
 
245
+ No specific format required - AI will interpret your data!""",
 
 
246
  flagging_mode="never"
247
  )
248