Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,119 +6,137 @@ import logging
|
|
6 |
import gc
|
7 |
from transformers import pipeline
|
8 |
|
9 |
-
# Setup logging
|
10 |
logging.basicConfig(
|
11 |
level=logging.INFO,
|
12 |
format='%(asctime)s - %(levelname)s - %(message)s'
|
13 |
)
|
14 |
logger = logging.getLogger(__name__)
|
15 |
|
16 |
-
# Device configuration
|
17 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
18 |
logger.info(f"Using device: {DEVICE}")
|
19 |
|
20 |
def clear_gpu_memory():
|
21 |
-
"""Utility function to clear GPU memory"""
|
22 |
if DEVICE == "cuda":
|
23 |
torch.cuda.empty_cache()
|
24 |
gc.collect()
|
25 |
|
26 |
class FinancialAnalyzer:
|
27 |
-
"""Financial analysis using Tiny Llama and FinBERT models"""
|
28 |
-
|
29 |
def __init__(self):
|
|
|
30 |
self.analysis_model = None
|
31 |
self.sentiment_model = None
|
32 |
self.load_models()
|
33 |
|
34 |
def load_models(self):
|
35 |
-
"""Load models for
|
36 |
try:
|
37 |
-
#
|
38 |
-
logger.info("Loading
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
self.analysis_model = pipeline(
|
40 |
"text-generation",
|
41 |
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
42 |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
43 |
)
|
44 |
|
45 |
-
#
|
46 |
-
logger.info("Loading
|
47 |
self.sentiment_model = pipeline(
|
48 |
"text-classification",
|
49 |
model="ProsusAI/finbert",
|
50 |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
51 |
)
|
52 |
|
53 |
-
logger.info("
|
54 |
except Exception as e:
|
55 |
logger.error(f"Error loading models: {str(e)}")
|
56 |
raise
|
57 |
|
58 |
-
def
|
59 |
-
"""
|
60 |
try:
|
61 |
-
|
62 |
-
|
|
|
63 |
|
64 |
-
#
|
65 |
-
|
66 |
-
|
67 |
-
if df.empty:
|
68 |
-
raise ValueError("Empty CSV file")
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
|
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
raise
|
90 |
|
91 |
def analyze_financials(self, income_data, balance_data):
|
92 |
-
"""Generate financial analysis
|
93 |
try:
|
|
|
94 |
financial_context = f"""
|
95 |
Income Statement Analysis:
|
96 |
-
{income_data
|
97 |
|
98 |
Balance Sheet Analysis:
|
99 |
-
{balance_data
|
100 |
"""
|
101 |
|
102 |
-
#
|
103 |
sentiment = self.sentiment_model(
|
104 |
financial_context,
|
105 |
truncation=True,
|
106 |
max_length=512
|
107 |
)[0]
|
108 |
-
|
109 |
-
# Generate analysis
|
110 |
-
analysis_prompt = f"""[INST] As a financial analyst,
|
111 |
|
112 |
{financial_context}
|
113 |
|
114 |
-
Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
|
121 |
-
|
122 |
[/INST]"""
|
123 |
|
124 |
response = self.analysis_model(
|
@@ -129,40 +147,42 @@ class FinancialAnalyzer:
|
|
129 |
truncation=True
|
130 |
)
|
131 |
|
132 |
-
return self.format_response(response[0]['generated_text'], sentiment)
|
133 |
|
134 |
except Exception as e:
|
135 |
logger.error(f"Error in analysis: {str(e)}")
|
136 |
return f"Error generating analysis: {str(e)}"
|
137 |
|
138 |
-
def format_response(self, analysis_text, sentiment):
|
139 |
"""Format the analysis response"""
|
140 |
try:
|
141 |
-
sections = analysis_text.split('\n\n')
|
142 |
-
|
143 |
output = [
|
144 |
"# Financial Analysis Report\n\n",
|
145 |
-
f"##
|
|
|
|
|
|
|
146 |
]
|
147 |
-
|
|
|
148 |
current_section = None
|
|
|
149 |
for section in sections:
|
150 |
section = section.strip()
|
151 |
if not section:
|
152 |
continue
|
153 |
-
|
154 |
if "Business Status" in section:
|
155 |
output.append("## Business Status\n")
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
return "".join(output)
|
167 |
|
168 |
except Exception as e:
|
@@ -175,18 +195,17 @@ def analyze_statements(income_statement, balance_sheet):
|
|
175 |
if not income_statement or not balance_sheet:
|
176 |
return "Please upload both Income Statement and Balance Sheet CSV files."
|
177 |
|
178 |
-
# Initialize analyzer
|
179 |
analyzer = FinancialAnalyzer()
|
180 |
|
181 |
-
#
|
182 |
-
logger.info("
|
183 |
-
income_data = analyzer.
|
184 |
|
185 |
-
logger.info("
|
186 |
-
balance_data = analyzer.
|
187 |
|
188 |
# Generate analysis
|
189 |
-
logger.info("Generating analysis...")
|
190 |
result = analyzer.analyze_financials(income_data, balance_data)
|
191 |
|
192 |
clear_gpu_memory()
|
@@ -196,10 +215,10 @@ def analyze_statements(income_statement, balance_sheet):
|
|
196 |
logger.error(f"Analysis error: {str(e)}")
|
197 |
return f"""Analysis Error: {str(e)}
|
198 |
|
199 |
-
Please
|
200 |
-
1.
|
201 |
-
2.
|
202 |
-
3.
|
203 |
|
204 |
# Create Gradio interface
|
205 |
iface = gr.Interface(
|
@@ -218,14 +237,12 @@ iface = gr.Interface(
|
|
218 |
title="AI Financial Statement Analyzer",
|
219 |
description="""## Financial Analysis Tool
|
220 |
|
221 |
-
Upload your financial statements
|
222 |
-
-
|
223 |
-
-
|
224 |
-
-
|
225 |
|
226 |
-
|
227 |
-
- CSV files with numeric data
|
228 |
-
- Standard financial statement format""",
|
229 |
flagging_mode="never"
|
230 |
)
|
231 |
|
|
|
6 |
import gc
|
7 |
from transformers import pipeline
|
8 |
|
|
|
9 |
logging.basicConfig(
|
10 |
level=logging.INFO,
|
11 |
format='%(asctime)s - %(levelname)s - %(message)s'
|
12 |
)
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
|
|
15 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
16 |
logger.info(f"Using device: {DEVICE}")
|
17 |
|
18 |
def clear_gpu_memory():
|
|
|
19 |
if DEVICE == "cuda":
|
20 |
torch.cuda.empty_cache()
|
21 |
gc.collect()
|
22 |
|
23 |
class FinancialAnalyzer:
|
|
|
|
|
24 |
def __init__(self):
|
25 |
+
self.data_model = None
|
26 |
self.analysis_model = None
|
27 |
self.sentiment_model = None
|
28 |
self.load_models()
|
29 |
|
30 |
def load_models(self):
|
31 |
+
"""Load models for data extraction and analysis"""
|
32 |
try:
|
33 |
+
# Model for understanding and extracting data from CSV
|
34 |
+
logger.info("Loading data extraction model...")
|
35 |
+
self.data_model = pipeline(
|
36 |
+
"text-generation",
|
37 |
+
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
38 |
+
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
39 |
+
)
|
40 |
+
|
41 |
+
# Model for financial analysis
|
42 |
+
logger.info("Loading analysis model...")
|
43 |
self.analysis_model = pipeline(
|
44 |
"text-generation",
|
45 |
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
46 |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
47 |
)
|
48 |
|
49 |
+
# Model for sentiment analysis
|
50 |
+
logger.info("Loading sentiment model...")
|
51 |
self.sentiment_model = pipeline(
|
52 |
"text-classification",
|
53 |
model="ProsusAI/finbert",
|
54 |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
55 |
)
|
56 |
|
57 |
+
logger.info("All models loaded successfully")
|
58 |
except Exception as e:
|
59 |
logger.error(f"Error loading models: {str(e)}")
|
60 |
raise
|
61 |
|
62 |
+
def extract_financial_data(self, file_obj, statement_type):
|
63 |
+
"""Use generative AI to understand and extract data from CSV"""
|
64 |
try:
|
65 |
+
# Read raw CSV content
|
66 |
+
df = pd.read_csv(file_obj)
|
67 |
+
raw_data = df.to_string()
|
68 |
|
69 |
+
# Create prompt for data extraction
|
70 |
+
extraction_prompt = f"""[INST] As a financial expert, analyze this raw {statement_type} data:
|
|
|
|
|
|
|
71 |
|
72 |
+
{raw_data}
|
73 |
+
|
74 |
+
Extract and summarize the following:
|
75 |
+
1. Key financial numbers (revenue, profit, assets, liabilities etc.)
|
76 |
+
2. Time periods covered
|
77 |
+
3. Important trends and patterns
|
78 |
+
4. Any significant financial metrics
|
79 |
+
|
80 |
+
Present the extracted data in a clear, structured format.
|
81 |
+
Focus on the most important financial information regardless of how the data is formatted.
|
82 |
+
[/INST]"""
|
83 |
+
|
84 |
+
# Generate structured extraction
|
85 |
+
response = self.data_model(
|
86 |
+
extraction_prompt,
|
87 |
+
max_length=1000,
|
88 |
+
do_sample=False,
|
89 |
+
num_return_sequences=1,
|
90 |
+
truncation=True
|
91 |
+
)
|
92 |
|
93 |
+
logger.info(f"Data extracted from {statement_type}")
|
94 |
+
return response[0]['generated_text']
|
|
|
95 |
|
96 |
def analyze_financials(self, income_data, balance_data):
|
97 |
+
"""Generate financial analysis based on extracted data"""
|
98 |
try:
|
99 |
+
# Combine extracted data
|
100 |
financial_context = f"""
|
101 |
Income Statement Analysis:
|
102 |
+
{income_data}
|
103 |
|
104 |
Balance Sheet Analysis:
|
105 |
+
{balance_data}
|
106 |
"""
|
107 |
|
108 |
+
# Get sentiment
|
109 |
sentiment = self.sentiment_model(
|
110 |
financial_context,
|
111 |
truncation=True,
|
112 |
max_length=512
|
113 |
)[0]
|
114 |
+
|
115 |
+
# Generate comprehensive analysis
|
116 |
+
analysis_prompt = f"""[INST] As a senior financial analyst, review this financial data:
|
117 |
|
118 |
{financial_context}
|
119 |
|
120 |
+
Market Sentiment: {sentiment['label']} ({sentiment['score']:.2%})
|
121 |
+
|
122 |
+
Provide a detailed analysis including:
|
123 |
+
|
124 |
+
1. Business Status
|
125 |
+
- Overall financial health
|
126 |
+
- Performance assessment
|
127 |
+
- Key metrics analysis
|
128 |
+
|
129 |
+
2. Strategic Insights
|
130 |
+
- Market position
|
131 |
+
- Competitive advantages
|
132 |
+
- Areas of concern
|
133 |
|
134 |
+
3. Recommendations & Roadmap
|
135 |
+
- Strategic initiatives
|
136 |
+
- Improvement opportunities
|
137 |
+
- Action timeline
|
138 |
|
139 |
+
Base your analysis on the extracted financial data and provide specific insights.
|
140 |
[/INST]"""
|
141 |
|
142 |
response = self.analysis_model(
|
|
|
147 |
truncation=True
|
148 |
)
|
149 |
|
150 |
+
return self.format_response(response[0]['generated_text'], sentiment, financial_context)
|
151 |
|
152 |
except Exception as e:
|
153 |
logger.error(f"Error in analysis: {str(e)}")
|
154 |
return f"Error generating analysis: {str(e)}"
|
155 |
|
156 |
+
def format_response(self, analysis_text, sentiment, context):
|
157 |
"""Format the analysis response"""
|
158 |
try:
|
|
|
|
|
159 |
output = [
|
160 |
"# Financial Analysis Report\n\n",
|
161 |
+
f"## Market Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
|
162 |
+
"## Extracted Financial Data\n```\n",
|
163 |
+
context,
|
164 |
+
"\n```\n\n"
|
165 |
]
|
166 |
+
|
167 |
+
sections = analysis_text.split('\n\n')
|
168 |
current_section = None
|
169 |
+
|
170 |
for section in sections:
|
171 |
section = section.strip()
|
172 |
if not section:
|
173 |
continue
|
174 |
+
|
175 |
if "Business Status" in section:
|
176 |
output.append("## Business Status\n")
|
177 |
+
elif "Strategic Insights" in section:
|
178 |
+
output.append("\n## Strategic Insights\n")
|
179 |
+
elif "Recommendations" in section:
|
180 |
+
output.append("\n## Recommendations & Roadmap\n")
|
181 |
+
else:
|
182 |
+
if not section.startswith('-'):
|
183 |
+
section = f"- {section}"
|
184 |
+
output.append(f"{section}\n")
|
185 |
+
|
|
|
186 |
return "".join(output)
|
187 |
|
188 |
except Exception as e:
|
|
|
195 |
if not income_statement or not balance_sheet:
|
196 |
return "Please upload both Income Statement and Balance Sheet CSV files."
|
197 |
|
|
|
198 |
analyzer = FinancialAnalyzer()
|
199 |
|
200 |
+
# Extract data from CSVs using generative AI
|
201 |
+
logger.info("Extracting data from Income Statement...")
|
202 |
+
income_data = analyzer.extract_financial_data(income_statement, "Income Statement")
|
203 |
|
204 |
+
logger.info("Extracting data from Balance Sheet...")
|
205 |
+
balance_data = analyzer.extract_financial_data(balance_sheet, "Balance Sheet")
|
206 |
|
207 |
# Generate analysis
|
208 |
+
logger.info("Generating comprehensive analysis...")
|
209 |
result = analyzer.analyze_financials(income_data, balance_data)
|
210 |
|
211 |
clear_gpu_memory()
|
|
|
215 |
logger.error(f"Analysis error: {str(e)}")
|
216 |
return f"""Analysis Error: {str(e)}
|
217 |
|
218 |
+
Please verify:
|
219 |
+
1. Files are in CSV format
|
220 |
+
2. Files contain financial data
|
221 |
+
3. Files are not corrupted"""
|
222 |
|
223 |
# Create Gradio interface
|
224 |
iface = gr.Interface(
|
|
|
237 |
title="AI Financial Statement Analyzer",
|
238 |
description="""## Financial Analysis Tool
|
239 |
|
240 |
+
Upload your financial statements (any CSV format) and let AI:
|
241 |
+
- Extract and understand the financial data
|
242 |
+
- Provide comprehensive analysis
|
243 |
+
- Generate strategic recommendations
|
244 |
|
245 |
+
No specific format required - AI will interpret your data!""",
|
|
|
|
|
246 |
flagging_mode="never"
|
247 |
)
|
248 |
|