Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,13 +5,17 @@ import torch
|
|
5 |
import logging
|
6 |
import gc
|
7 |
from transformers import pipeline
|
|
|
|
|
8 |
|
|
|
9 |
logging.basicConfig(
|
10 |
level=logging.INFO,
|
11 |
format='%(asctime)s - %(levelname)s - %(message)s'
|
12 |
)
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
|
|
15 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
16 |
logger.info(f"Using device: {DEVICE}")
|
17 |
|
@@ -20,123 +24,71 @@ def clear_gpu_memory():
|
|
20 |
torch.cuda.empty_cache()
|
21 |
gc.collect()
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
class FinancialAnalyzer:
|
24 |
def __init__(self):
|
25 |
-
self.data_model = None
|
26 |
self.analysis_model = None
|
27 |
self.sentiment_model = None
|
28 |
self.load_models()
|
29 |
|
30 |
def load_models(self):
|
31 |
-
"""Load models for data extraction and analysis"""
|
32 |
try:
|
33 |
-
#
|
34 |
-
logger.info("Loading data extraction model...")
|
35 |
-
self.data_model = pipeline(
|
36 |
-
"text-generation",
|
37 |
-
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
38 |
-
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
39 |
-
)
|
40 |
-
|
41 |
-
# Model for financial analysis
|
42 |
-
logger.info("Loading analysis model...")
|
43 |
self.analysis_model = pipeline(
|
44 |
"text-generation",
|
45 |
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
46 |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
47 |
)
|
48 |
|
49 |
-
#
|
50 |
-
logger.info("Loading sentiment model...")
|
51 |
self.sentiment_model = pipeline(
|
52 |
"text-classification",
|
53 |
model="ProsusAI/finbert",
|
54 |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
55 |
)
|
56 |
|
57 |
-
logger.info("
|
58 |
except Exception as e:
|
59 |
logger.error(f"Error loading models: {str(e)}")
|
60 |
raise
|
61 |
|
62 |
-
def
|
63 |
-
"""Use generative AI to understand and extract data from CSV"""
|
64 |
-
try:
|
65 |
-
# Read raw CSV content
|
66 |
-
df = pd.read_csv(file_obj)
|
67 |
-
raw_data = df.to_string()
|
68 |
-
|
69 |
-
# Create prompt for data extraction
|
70 |
-
extraction_prompt = f"""[INST] As a financial expert, analyze this raw {statement_type} data:
|
71 |
-
|
72 |
-
{raw_data}
|
73 |
-
|
74 |
-
Extract and summarize the following:
|
75 |
-
1. Key financial numbers (revenue, profit, assets, liabilities etc.)
|
76 |
-
2. Time periods covered
|
77 |
-
3. Important trends and patterns
|
78 |
-
4. Any significant financial metrics
|
79 |
-
|
80 |
-
Present the extracted data in a clear, structured format.
|
81 |
-
Focus on the most important financial information regardless of how the data is formatted.
|
82 |
-
[/INST]"""
|
83 |
-
|
84 |
-
# Generate structured extraction
|
85 |
-
response = self.data_model(
|
86 |
-
extraction_prompt,
|
87 |
-
max_length=1000,
|
88 |
-
do_sample=False,
|
89 |
-
num_return_sequences=1,
|
90 |
-
truncation=True
|
91 |
-
)
|
92 |
-
|
93 |
-
logger.info(f"Data extracted from {statement_type}")
|
94 |
-
return response[0]['generated_text']
|
95 |
-
|
96 |
-
def analyze_financials(self, income_data, balance_data):
|
97 |
-
"""Generate financial analysis based on extracted data"""
|
98 |
try:
|
99 |
-
#
|
100 |
-
financial_context = f"""
|
101 |
-
Income Statement Analysis:
|
102 |
-
{income_data}
|
103 |
-
|
104 |
-
Balance Sheet Analysis:
|
105 |
-
{balance_data}
|
106 |
-
"""
|
107 |
-
|
108 |
-
# Get sentiment
|
109 |
sentiment = self.sentiment_model(
|
110 |
-
|
111 |
truncation=True,
|
112 |
max_length=512
|
113 |
)[0]
|
|
|
|
|
|
|
114 |
|
115 |
-
|
116 |
-
analysis_prompt = f"""[INST] As a senior financial analyst, review this financial data:
|
117 |
-
|
118 |
-
{financial_context}
|
119 |
-
|
120 |
-
Market Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
|
121 |
-
|
122 |
-
Provide a detailed analysis including:
|
123 |
-
|
124 |
-
1. Business Status
|
125 |
-
- Overall financial health
|
126 |
-
- Performance assessment
|
127 |
-
- Key metrics analysis
|
128 |
|
129 |
-
|
130 |
-
- Market position
|
131 |
-
- Competitive advantages
|
132 |
-
- Areas of concern
|
133 |
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
|
139 |
-
|
140 |
[/INST]"""
|
141 |
|
142 |
response = self.analysis_model(
|
@@ -147,23 +99,21 @@ class FinancialAnalyzer:
|
|
147 |
truncation=True
|
148 |
)
|
149 |
|
150 |
-
return self.format_response(response[0]['generated_text'], sentiment,
|
151 |
-
|
152 |
except Exception as e:
|
153 |
logger.error(f"Error in analysis: {str(e)}")
|
154 |
return f"Error generating analysis: {str(e)}"
|
155 |
|
156 |
def format_response(self, analysis_text, sentiment, context):
|
157 |
-
"""Format the analysis response"""
|
158 |
try:
|
159 |
output = [
|
160 |
"# Financial Analysis Report\n\n",
|
161 |
-
f"##
|
162 |
-
"##
|
163 |
context,
|
164 |
"\n```\n\n"
|
165 |
]
|
166 |
-
|
167 |
sections = analysis_text.split('\n\n')
|
168 |
current_section = None
|
169 |
|
@@ -174,43 +124,69 @@ class FinancialAnalyzer:
|
|
174 |
|
175 |
if "Business Status" in section:
|
176 |
output.append("## Business Status\n")
|
177 |
-
elif "
|
178 |
-
output.append("\n##
|
179 |
-
elif "Recommendations" in section:
|
180 |
-
output.append("\n## Recommendations
|
181 |
else:
|
182 |
if not section.startswith('-'):
|
183 |
section = f"- {section}"
|
184 |
output.append(f"{section}\n")
|
185 |
-
|
186 |
-
return "".join(output)
|
187 |
|
|
|
188 |
except Exception as e:
|
189 |
logger.error(f"Error formatting response: {str(e)}")
|
190 |
return "Error formatting analysis results"
|
191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
def analyze_statements(income_statement, balance_sheet):
|
193 |
-
"""Main function to analyze financial statements"""
|
194 |
try:
|
195 |
if not income_statement or not balance_sheet:
|
196 |
return "Please upload both Income Statement and Balance Sheet CSV files."
|
197 |
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
except Exception as e:
|
215 |
logger.error(f"Analysis error: {str(e)}")
|
216 |
return f"""Analysis Error: {str(e)}
|
@@ -237,12 +213,14 @@ iface = gr.Interface(
|
|
237 |
title="AI Financial Statement Analyzer",
|
238 |
description="""## Financial Analysis Tool
|
239 |
|
240 |
-
Upload your financial statements
|
241 |
-
-
|
242 |
-
-
|
243 |
-
-
|
244 |
|
245 |
-
|
|
|
|
|
246 |
flagging_mode="never"
|
247 |
)
|
248 |
|
|
|
5 |
import logging
|
6 |
import gc
|
7 |
from transformers import pipeline
|
8 |
+
import json
|
9 |
+
import csv
|
10 |
|
11 |
+
# Setup logging
|
12 |
logging.basicConfig(
|
13 |
level=logging.INFO,
|
14 |
format='%(asctime)s - %(levelname)s - %(message)s'
|
15 |
)
|
16 |
logger = logging.getLogger(__name__)
|
17 |
|
18 |
+
# Device configuration
|
19 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
20 |
logger.info(f"Using device: {DEVICE}")
|
21 |
|
|
|
24 |
torch.cuda.empty_cache()
|
25 |
gc.collect()
|
26 |
|
27 |
+
def clean_financial_value(value):
|
28 |
+
try:
|
29 |
+
if isinstance(value, str):
|
30 |
+
value = value.strip().replace('"', '').replace(' ', '')
|
31 |
+
if '(' in value and ')' in value:
|
32 |
+
value = '-' + value.replace('(', '').replace(')', '')
|
33 |
+
value = value.replace(',', '')
|
34 |
+
try:
|
35 |
+
return float(value)
|
36 |
+
except ValueError:
|
37 |
+
return 0.0
|
38 |
+
return float(value) if isinstance(value, (int, float)) else 0.0
|
39 |
+
except Exception as e:
|
40 |
+
logger.error(f"Error cleaning value: {str(e)}")
|
41 |
+
return 0.0
|
42 |
+
|
43 |
class FinancialAnalyzer:
|
44 |
def __init__(self):
|
|
|
45 |
self.analysis_model = None
|
46 |
self.sentiment_model = None
|
47 |
self.load_models()
|
48 |
|
49 |
def load_models(self):
|
|
|
50 |
try:
|
51 |
+
# Load analysis model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
self.analysis_model = pipeline(
|
53 |
"text-generation",
|
54 |
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
55 |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
56 |
)
|
57 |
|
58 |
+
# Load sentiment model
|
|
|
59 |
self.sentiment_model = pipeline(
|
60 |
"text-classification",
|
61 |
model="ProsusAI/finbert",
|
62 |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
63 |
)
|
64 |
|
65 |
+
logger.info("Models loaded successfully")
|
66 |
except Exception as e:
|
67 |
logger.error(f"Error loading models: {str(e)}")
|
68 |
raise
|
69 |
|
70 |
+
def analyze_financials(self, context):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
try:
|
72 |
+
# Generate sentiment analysis
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
sentiment = self.sentiment_model(
|
74 |
+
context,
|
75 |
truncation=True,
|
76 |
max_length=512
|
77 |
)[0]
|
78 |
+
|
79 |
+
# Generate analysis
|
80 |
+
analysis_prompt = f"""[INST] As a financial analyst, analyze this data:
|
81 |
|
82 |
+
{context}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
+
Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
|
|
|
|
|
|
|
85 |
|
86 |
+
Provide:
|
87 |
+
1. Business Status and Health Assessment
|
88 |
+
2. Key Financial Insights and Metrics
|
89 |
+
3. Strategic Recommendations and Action Plan
|
90 |
|
91 |
+
Be specific and data-driven in your analysis.
|
92 |
[/INST]"""
|
93 |
|
94 |
response = self.analysis_model(
|
|
|
99 |
truncation=True
|
100 |
)
|
101 |
|
102 |
+
return self.format_response(response[0]['generated_text'], sentiment, context)
|
|
|
103 |
except Exception as e:
|
104 |
logger.error(f"Error in analysis: {str(e)}")
|
105 |
return f"Error generating analysis: {str(e)}"
|
106 |
|
107 |
def format_response(self, analysis_text, sentiment, context):
|
|
|
108 |
try:
|
109 |
output = [
|
110 |
"# Financial Analysis Report\n\n",
|
111 |
+
f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
|
112 |
+
"## Financial Data\n```\n",
|
113 |
context,
|
114 |
"\n```\n\n"
|
115 |
]
|
116 |
+
|
117 |
sections = analysis_text.split('\n\n')
|
118 |
current_section = None
|
119 |
|
|
|
124 |
|
125 |
if "Business Status" in section:
|
126 |
output.append("## Business Status\n")
|
127 |
+
elif "Financial Insights" in section:
|
128 |
+
output.append("\n## Key Insights\n")
|
129 |
+
elif "Strategic Recommendations" in section:
|
130 |
+
output.append("\n## Recommendations\n")
|
131 |
else:
|
132 |
if not section.startswith('-'):
|
133 |
section = f"- {section}"
|
134 |
output.append(f"{section}\n")
|
|
|
|
|
135 |
|
136 |
+
return "".join(output)
|
137 |
except Exception as e:
|
138 |
logger.error(f"Error formatting response: {str(e)}")
|
139 |
return "Error formatting analysis results"
|
140 |
|
141 |
+
def save_organized_data(structured_data, filename):
|
142 |
+
try:
|
143 |
+
with open(filename, 'w') as f:
|
144 |
+
json.dump(structured_data, f, indent=4)
|
145 |
+
return True
|
146 |
+
except Exception as e:
|
147 |
+
logger.error(f"Error saving data: {str(e)}")
|
148 |
+
return False
|
149 |
+
|
150 |
def analyze_statements(income_statement, balance_sheet):
|
|
|
151 |
try:
|
152 |
if not income_statement or not balance_sheet:
|
153 |
return "Please upload both Income Statement and Balance Sheet CSV files."
|
154 |
|
155 |
+
# Read and organize data
|
156 |
+
try:
|
157 |
+
income_df = pd.read_csv(income_statement)
|
158 |
+
balance_df = pd.read_csv(balance_sheet)
|
159 |
+
|
160 |
+
# Clean and structure data
|
161 |
+
financial_data = {
|
162 |
+
"income_statement": income_df.to_dict(orient='records'),
|
163 |
+
"balance_sheet": balance_df.to_dict(orient='records')
|
164 |
+
}
|
165 |
+
|
166 |
+
# Save structured data
|
167 |
+
save_organized_data(financial_data, "organized_financial_data.json")
|
168 |
+
|
169 |
+
# Create analysis context
|
170 |
+
context = f"""Financial Data Summary:
|
171 |
+
|
172 |
+
Income Statement:
|
173 |
+
{income_df.to_string()}
|
174 |
+
|
175 |
+
Balance Sheet:
|
176 |
+
{balance_df.to_string()}
|
177 |
+
"""
|
178 |
+
|
179 |
+
# Initialize analyzer and generate analysis
|
180 |
+
analyzer = FinancialAnalyzer()
|
181 |
+
result = analyzer.analyze_financials(context)
|
182 |
+
|
183 |
+
clear_gpu_memory()
|
184 |
+
return result
|
185 |
+
|
186 |
+
except Exception as e:
|
187 |
+
logger.error(f"Error processing files: {str(e)}")
|
188 |
+
raise
|
189 |
+
|
190 |
except Exception as e:
|
191 |
logger.error(f"Analysis error: {str(e)}")
|
192 |
return f"""Analysis Error: {str(e)}
|
|
|
213 |
title="AI Financial Statement Analyzer",
|
214 |
description="""## Financial Analysis Tool
|
215 |
|
216 |
+
Upload your financial statements to get:
|
217 |
+
- Business Status Assessment
|
218 |
+
- Key Financial Insights
|
219 |
+
- Strategic Recommendations
|
220 |
|
221 |
+
Requirements:
|
222 |
+
- CSV files with financial data
|
223 |
+
- Standard financial statement format""",
|
224 |
flagging_mode="never"
|
225 |
)
|
226 |
|