Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import os
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
4 |
import torch
|
@@ -17,200 +17,103 @@ logger = logging.getLogger(__name__)
|
|
17 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
18 |
logger.info(f"Using device: {DEVICE}")
|
19 |
|
|
|
20 |
def clear_gpu_memory():
|
21 |
"""Utility function to clear GPU memory"""
|
22 |
if DEVICE == "cuda":
|
23 |
torch.cuda.empty_cache()
|
24 |
gc.collect()
|
25 |
|
26 |
-
class FinancialDataExtractor:
|
27 |
-
"""Extract and clean financial data"""
|
28 |
-
|
29 |
-
def __init__(self):
|
30 |
-
self.logger = logger
|
31 |
-
|
32 |
-
def clean_number(self, value):
|
33 |
-
"""Clean numeric values from financial statements"""
|
34 |
-
try:
|
35 |
-
if pd.isna(value) or value == '' or value == '-':
|
36 |
-
return 0.0
|
37 |
-
if isinstance(value, (int, float)):
|
38 |
-
return float(value)
|
39 |
-
|
40 |
-
# Remove currency symbols, spaces, commas
|
41 |
-
cleaned = str(value).replace('$', '').replace(',', '').replace('"', '').strip()
|
42 |
-
# Handle parentheses for negative numbers
|
43 |
-
if '(' in cleaned and ')' in cleaned:
|
44 |
-
cleaned = '-' + cleaned.replace('(', '').replace(')', '')
|
45 |
-
return float(cleaned)
|
46 |
-
except:
|
47 |
-
return 0.0
|
48 |
-
|
49 |
-
def extract_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
50 |
-
"""Extract and clean data from DataFrame"""
|
51 |
-
# Clean column names
|
52 |
-
df.columns = df.columns.str.strip()
|
53 |
-
|
54 |
-
# Get year columns
|
55 |
-
year_cols = [col for col in df.columns if str(col).isdigit()]
|
56 |
-
|
57 |
-
if not year_cols:
|
58 |
-
raise ValueError("No year columns found in data")
|
59 |
-
|
60 |
-
# Clean numeric data
|
61 |
-
for col in year_cols:
|
62 |
-
df[col] = df[col].apply(self.clean_number)
|
63 |
-
|
64 |
-
return df, year_cols
|
65 |
-
|
66 |
class FinancialAnalyzer:
|
67 |
-
"""Financial analysis using
|
68 |
|
69 |
def __init__(self):
|
70 |
-
self.extractor = FinancialDataExtractor()
|
71 |
-
self.sentiment_model = None
|
72 |
self.analysis_model = None
|
|
|
|
|
73 |
self.load_models()
|
74 |
|
75 |
def load_models(self):
|
76 |
-
"""Load
|
77 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
# Load FinBERT for sentiment analysis
|
79 |
self.sentiment_model = pipeline(
|
80 |
"text-classification",
|
81 |
-
model="
|
82 |
-
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
83 |
-
truncation=True
|
84 |
)
|
85 |
|
86 |
-
# Load
|
87 |
-
self.
|
88 |
"text-generation",
|
89 |
-
model="
|
90 |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
91 |
)
|
92 |
|
93 |
-
logger.info("
|
94 |
except Exception as e:
|
95 |
logger.error(f"Error loading models: {str(e)}")
|
96 |
raise
|
97 |
|
98 |
-
def
|
99 |
-
"""
|
100 |
-
metrics = {}
|
101 |
-
|
102 |
-
for year in year_cols:
|
103 |
-
# Income Statement metrics
|
104 |
-
income = {
|
105 |
-
'Revenue': income_df[income_df['Period'].str.contains('Total Net Revenue|Revenue', na=False, case=False)][year].iloc[0],
|
106 |
-
'COGS': income_df[income_df['Period'].str.contains('Cost of Goods Sold', na=False, case=False)][year].iloc[0],
|
107 |
-
'Operating_Expenses': income_df[income_df['Period'].str.contains('Total Expenses', na=False, case=False)][year].iloc[0],
|
108 |
-
'EBIT': income_df[income_df['Period'].str.contains('Earnings Before Interest & Taxes', na=False, case=False)][year].iloc[0],
|
109 |
-
'Net_Income': income_df[income_df['Period'].str.contains('Net Income|Net Earnings', na=False, case=False)][year].iloc[-1]
|
110 |
-
}
|
111 |
-
|
112 |
-
# Balance Sheet metrics
|
113 |
-
balance = {
|
114 |
-
'Total_Assets': balance_df[balance_df['Period'].str.contains('Total Assets', na=False, case=False)][year].iloc[0],
|
115 |
-
'Current_Assets': balance_df[balance_df['Period'].str.contains('Total current assets', na=False, case=False)][year].iloc[0],
|
116 |
-
'Total_Liabilities': balance_df[balance_df['Period'].str.contains('Total Liabilities', na=False, case=False)][year].iloc[0],
|
117 |
-
'Current_Liabilities': balance_df[balance_df['Period'].str.contains('Total current liabilities', na=False, case=False)][year].iloc[0],
|
118 |
-
'Equity': balance_df[balance_df['Period'].str.contains("Shareholder's Equity", na=False, case=False)][year].iloc[-1]
|
119 |
-
}
|
120 |
-
|
121 |
-
# Calculate ratios
|
122 |
-
metrics[year] = {
|
123 |
-
'Profitability': {
|
124 |
-
'Gross_Margin': ((income['Revenue'] - income['COGS']) / income['Revenue']) * 100,
|
125 |
-
'Operating_Margin': (income['EBIT'] / income['Revenue']) * 100,
|
126 |
-
'Net_Margin': (income['Net_Income'] / income['Revenue']) * 100,
|
127 |
-
'ROE': (income['Net_Income'] / balance['Equity']) * 100,
|
128 |
-
'ROA': (income['Net_Income'] / balance['Total_Assets']) * 100
|
129 |
-
},
|
130 |
-
'Liquidity': {
|
131 |
-
'Current_Ratio': balance['Current_Assets'] / balance['Current_Liabilities'],
|
132 |
-
'Working_Capital': balance['Current_Assets'] - balance['Current_Liabilities']
|
133 |
-
},
|
134 |
-
'Growth': {
|
135 |
-
'Revenue': income['Revenue'],
|
136 |
-
'Net_Income': income['Net_Income'],
|
137 |
-
'Total_Assets': balance['Total_Assets']
|
138 |
-
}
|
139 |
-
}
|
140 |
-
|
141 |
-
return metrics
|
142 |
-
|
143 |
-
def analyze_financials(self, income_df: pd.DataFrame, balance_df: pd.DataFrame) -> str:
|
144 |
-
"""Generate financial analysis"""
|
145 |
try:
|
146 |
-
#
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
# Calculate growth
|
158 |
-
revenue_growth = ((metrics[latest_year]['Growth']['Revenue'] / metrics[earliest_year]['Growth']['Revenue']) - 1) * 100
|
159 |
-
profit_growth = ((metrics[latest_year]['Growth']['Net_Income'] / metrics[earliest_year]['Growth']['Net_Income']) - 1) * 100
|
160 |
-
|
161 |
-
# Generate analysis context
|
162 |
-
context = f"""Financial Analysis ({earliest_year}-{latest_year}):
|
163 |
-
|
164 |
-
Performance Metrics:
|
165 |
-
- Revenue Growth: {revenue_growth:.1f}%
|
166 |
-
- Profit Growth: {profit_growth:.1f}%
|
167 |
-
- Current Gross Margin: {metrics[latest_year]['Profitability']['Gross_Margin']:.1f}%
|
168 |
-
- Current Net Margin: {metrics[latest_year]['Profitability']['Net_Margin']:.1f}%
|
169 |
-
- ROE: {metrics[latest_year]['Profitability']['ROE']:.1f}%
|
170 |
-
- Current Ratio: {metrics[latest_year]['Liquidity']['Current_Ratio']:.2f}
|
171 |
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
|
177 |
-
#
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
analysis = self.analysis_model(
|
182 |
-
f"[INST] As a financial analyst, provide a detailed analysis of this company:\n\n{context}\n\nInclude:\n1. Financial health assessment\n2. Key performance insights\n3. Strategic recommendations [/INST]",
|
183 |
max_length=1500,
|
184 |
num_return_sequences=1,
|
185 |
do_sample=True,
|
186 |
temperature=0.7
|
187 |
-
)
|
188 |
-
|
189 |
-
# Format output
|
190 |
-
output = f"""# Financial Analysis Report
|
191 |
|
192 |
-
|
|
|
193 |
|
194 |
-
|
195 |
-
- Gross Margin: {metrics[latest_year]['Profitability']['Gross_Margin']:.1f}%
|
196 |
-
- Operating Margin: {metrics[latest_year]['Profitability']['Operating_Margin']:.1f}%
|
197 |
-
- Net Margin: {metrics[latest_year]['Profitability']['Net_Margin']:.1f}%
|
198 |
-
- ROE: {metrics[latest_year]['Profitability']['ROE']:.1f}%
|
199 |
-
- Current Ratio: {metrics[latest_year]['Liquidity']['Current_Ratio']:.2f}
|
200 |
|
201 |
-
|
202 |
-
|
203 |
-
- Profit Growth: {profit_growth:.1f}%
|
204 |
-
- Working Capital: ${metrics[latest_year]['Liquidity']['Working_Capital']:,.0f}
|
205 |
|
206 |
-
|
207 |
-
{
|
208 |
-
|
209 |
-
return
|
210 |
-
|
211 |
except Exception as e:
|
212 |
logger.error(f"Analysis error: {str(e)}")
|
213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
|
215 |
def analyze_statements(income_statement, balance_sheet):
|
216 |
"""Main function to analyze financial statements"""
|
@@ -218,13 +121,16 @@ def analyze_statements(income_statement, balance_sheet):
|
|
218 |
if not income_statement or not balance_sheet:
|
219 |
return "Please upload both Income Statement and Balance Sheet CSV files."
|
220 |
|
221 |
-
# Read files
|
222 |
-
|
223 |
-
|
224 |
|
225 |
-
#
|
|
|
|
|
|
|
226 |
analyzer = FinancialAnalyzer()
|
227 |
-
result = analyzer.analyze_financials(
|
228 |
|
229 |
# Clear memory
|
230 |
clear_gpu_memory()
|
@@ -243,20 +149,16 @@ def analyze_statements(income_statement, balance_sheet):
|
|
243 |
# Create Gradio interface
|
244 |
iface = gr.Interface(
|
245 |
fn=analyze_statements,
|
246 |
-
inputs=[
|
247 |
-
|
248 |
-
gr.File(label="Upload Balance Sheet (CSV)", file_types=[".csv"])
|
249 |
-
],
|
250 |
outputs=gr.Markdown(),
|
251 |
-
title="Financial Statement Analyzer",
|
252 |
-
description="""## Financial Analysis Tool
|
253 |
-
|
254 |
Upload your financial statements to get:
|
255 |
-
-
|
256 |
- Key Metrics & Ratios
|
257 |
- Trend Analysis
|
258 |
-
- Strategic Recommendations""",
|
259 |
-
examples=None
|
260 |
)
|
261 |
|
262 |
# Launch the interface
|
@@ -265,4 +167,4 @@ if __name__ == "__main__":
|
|
265 |
iface.launch(server_name="0.0.0.0", server_port=7860)
|
266 |
except Exception as e:
|
267 |
logger.error(f"Launch error: {str(e)}")
|
268 |
-
sys.exit(1)
|
|
|
1 |
+
import os
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
4 |
import torch
|
|
|
17 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
18 |
logger.info(f"Using device: {DEVICE}")
|
19 |
|
20 |
+
# Clear GPU memory utility
|
21 |
def clear_gpu_memory():
|
22 |
"""Utility function to clear GPU memory"""
|
23 |
if DEVICE == "cuda":
|
24 |
torch.cuda.empty_cache()
|
25 |
gc.collect()
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
class FinancialAnalyzer:
|
28 |
+
"""Financial analysis using Tiny Llama and Falcon models"""
|
29 |
|
30 |
def __init__(self):
|
|
|
|
|
31 |
self.analysis_model = None
|
32 |
+
self.sentiment_model = None
|
33 |
+
self.falcon_model = None
|
34 |
self.load_models()
|
35 |
|
36 |
def load_models(self):
|
37 |
+
"""Load models for analysis and sentiment"""
|
38 |
try:
|
39 |
+
# Load Tiny Llama for generating financial analysis and insights
|
40 |
+
self.analysis_model = pipeline(
|
41 |
+
"text-generation",
|
42 |
+
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", # Tiny Llama model for analysis
|
43 |
+
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
44 |
+
)
|
45 |
+
|
46 |
# Load FinBERT for sentiment analysis
|
47 |
self.sentiment_model = pipeline(
|
48 |
"text-classification",
|
49 |
+
model="yiyanghkust/finbert-tone", # FinBERT model for sentiment analysis
|
50 |
+
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
|
|
51 |
)
|
52 |
|
53 |
+
# Load Falcon model for generating roadmap and recommendations
|
54 |
+
self.falcon_model = pipeline(
|
55 |
"text-generation",
|
56 |
+
model="tiiuae/falcon-7b", # Falcon model for recommendations and roadmap
|
57 |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
58 |
)
|
59 |
|
60 |
+
logger.info("Tiny Llama, FinBERT, and Falcon models loaded successfully")
|
61 |
except Exception as e:
|
62 |
logger.error(f"Error loading models: {str(e)}")
|
63 |
raise
|
64 |
|
65 |
+
def analyze_financials(self, csv_data: str) -> str:
|
66 |
+
"""Generate financial analysis using Tiny Llama and analyze sentiment using FinBERT"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
try:
|
68 |
+
# Generate status and insights using Tiny Llama
|
69 |
+
status_prompt = f"Please analyze the following financial data and provide status, insights, and metrics:\n\n{csv_data}"
|
70 |
+
response = self.analysis_model(
|
71 |
+
status_prompt,
|
72 |
+
max_length=1500,
|
73 |
+
num_return_sequences=1,
|
74 |
+
do_sample=True,
|
75 |
+
temperature=0.7
|
76 |
+
)
|
77 |
+
insights_result = response[0]['generated_text'].strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
+
# Get sentiment analysis from FinBERT
|
80 |
+
sentiment = self.sentiment_model(insights_result[:512])[0] # Limit input to first 512 tokens
|
81 |
+
sentiment_label = sentiment['label']
|
82 |
+
sentiment_score = sentiment['score']
|
83 |
|
84 |
+
# Generate recommendations and roadmap using Falcon
|
85 |
+
roadmap_prompt = f"Based on the following financial insights, create a strategic roadmap and recommendations for the company:\n\n{insights_result}"
|
86 |
+
roadmap_response = self.falcon_model(
|
87 |
+
roadmap_prompt,
|
|
|
|
|
88 |
max_length=1500,
|
89 |
num_return_sequences=1,
|
90 |
do_sample=True,
|
91 |
temperature=0.7
|
92 |
+
)
|
93 |
+
roadmap_result = roadmap_response[0]['generated_text'].strip()
|
|
|
|
|
94 |
|
95 |
+
# Return a comprehensive report
|
96 |
+
result = f"""# Financial Analysis Report
|
97 |
|
98 |
+
### Sentiment Analysis: {sentiment_label} ({sentiment_score:.1%})
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
+
### Financial Status and Insights:
|
101 |
+
{insights_result}
|
|
|
|
|
102 |
|
103 |
+
### Recommendations and Roadmap:
|
104 |
+
{roadmap_result}
|
105 |
+
"""
|
106 |
+
return result
|
107 |
+
|
108 |
except Exception as e:
|
109 |
logger.error(f"Analysis error: {str(e)}")
|
110 |
+
return f"Analysis Error: {str(e)}"
|
111 |
+
|
112 |
+
# Function to read CSV and convert to text format
|
113 |
+
def csv_to_text(file_path: str) -> str:
|
114 |
+
"""Convert CSV to raw text format for model input"""
|
115 |
+
df = pd.read_csv(file_path)
|
116 |
+
return df.to_string(index=False) # Convert DataFrame to string without index
|
117 |
|
118 |
def analyze_statements(income_statement, balance_sheet):
|
119 |
"""Main function to analyze financial statements"""
|
|
|
121 |
if not income_statement or not balance_sheet:
|
122 |
return "Please upload both Income Statement and Balance Sheet CSV files."
|
123 |
|
124 |
+
# Read files as raw text (no need to clean manually)
|
125 |
+
income_data = csv_to_text(income_statement.name)
|
126 |
+
balance_data = csv_to_text(balance_sheet.name)
|
127 |
|
128 |
+
# Combine the data for AI to process (can adjust prompt as needed)
|
129 |
+
combined_data = f"Income Statement Data:\n{income_data}\n\nBalance Sheet Data:\n{balance_data}"
|
130 |
+
|
131 |
+
# Create analyzer and process data
|
132 |
analyzer = FinancialAnalyzer()
|
133 |
+
result = analyzer.analyze_financials(combined_data)
|
134 |
|
135 |
# Clear memory
|
136 |
clear_gpu_memory()
|
|
|
149 |
# Create Gradio interface
|
150 |
iface = gr.Interface(
|
151 |
fn=analyze_statements,
|
152 |
+
inputs=[gr.File(label="Upload Income Statement (CSV)", file_types=[".csv"]),
|
153 |
+
gr.File(label="Upload Balance Sheet (CSV)", file_types=[".csv"])],
|
|
|
|
|
154 |
outputs=gr.Markdown(),
|
155 |
+
title="Generative Financial Statement Analyzer with Tiny Llama, FinBERT, and Falcon",
|
156 |
+
description="""## Financial Analysis Tool (AI-powered)
|
|
|
157 |
Upload your financial statements to get:
|
158 |
+
- Status & Insights
|
159 |
- Key Metrics & Ratios
|
160 |
- Trend Analysis
|
161 |
+
- Strategic Recommendations & Roadmap""",
|
|
|
162 |
)
|
163 |
|
164 |
# Launch the interface
|
|
|
167 |
iface.launch(server_name="0.0.0.0", server_port=7860)
|
168 |
except Exception as e:
|
169 |
logger.error(f"Launch error: {str(e)}")
|
170 |
+
sys.exit(1)
|