File size: 19,337 Bytes
558a79e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525f1d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558a79e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f051d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558a79e
 
 
 
 
 
 
 
 
 
 
 
 
 
b940d72
 
558a79e
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
import gradio as gr
import pandas as pd
import json
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM,
    AutoModelForSequenceClassification
)
import torch
import numpy as np
import re

class FinancialDataset:
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        inputs = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        return {
            'input_ids': inputs['input_ids'].squeeze(),
            'attention_mask': inputs['attention_mask'].squeeze(),
            'labels': torch.tensor(self.labels[idx], dtype=torch.long)
        }

class FinancialAnalyzer:
    def __init__(self):
        print("Initializing Analyzer...")
        self.last_metrics = {}
        self.initialize_models()
        print("Initialization complete!")

    def initialize_models(self):
        """Initialize TinyLlama model"""
        try:
            self.llama_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
            self.llama_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
            self.llama_model.eval()
            print("Models loaded successfully!")
        except Exception as e:
            print(f"Error initializing models: {str(e)}")
            raise

    def clean_number(self, value):
        """Clean and convert numerical values"""
        try:
            if isinstance(value, str):
                value = value.replace('$', '').replace(',', '').strip()
                if '(' in value and ')' in value:
                    value = '-' + value.replace('(', '').replace(')', '')
            return float(value or 0)
        except:
            return 0.0

    def is_valid_markdown(self, file_path):
        """Check if a file is a valid Markdown file"""
        try:
            with open(file_path, 'r') as f:
                content = f.read()
            return any(line.startswith('#') or '|' in line for line in content.split('\n'))
        except:
            return False

    def parse_financial_data(self, content):
        """Parse markdown content into structured data"""
        try:
            data = {}
            current_section = ""
            current_table = []
            headers = None

            for line in content.split('\n'):
                if line.startswith('#'):
                    if current_table and headers:
                        data[current_section] = self.process_table(headers, current_table)
                    current_section = line.strip('# ')
                    current_table = []
                    headers = None
                elif '|' in line:
                    if '-|-' not in line:
                        row = [cell.strip() for cell in line.split('|')[1:-1]]
                        if not headers:
                            headers = row
                        else:
                            current_table.append(row)

            if current_table and headers:
                data[current_section] = self.process_table(headers, current_table)

            return data
        except Exception as e:
            print(f"Error parsing financial data: {str(e)}")
            return {}

    def process_table(self, headers, rows):
        """Process table data into structured format"""
        try:
            processed_data = {}
            for row in rows:
                if len(row) == len(headers):
                    item_name = row[0].strip('*').strip()
                    processed_data[item_name] = {}
                    for i, value in enumerate(row[1:], 1):
                        processed_data[item_name][headers[i]] = self.clean_number(value)
            return processed_data
        except Exception as e:
            print(f"Error processing table: {str(e)}")
            return {}

    def get_nested_value(self, data, section, key, year):
        """Safely get nested dictionary value"""
        try:
            return data.get(section, {}).get(key, {}).get(str(year), 0)
        except:
            return 0

    def calculate_metrics(self, income_data, balance_data):
        """Calculate all CFI standard financial metrics"""
        try:
            metrics = {}
            
            # 1. Gross Profit Margin Ratio
            revenue = self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2025")
            cogs = self.get_nested_value(income_data, "Operating Expenses", "Cost of Goods Sold", "2025")
            gross_profit = revenue - cogs
            metrics['gross_profit_margin'] = (gross_profit / revenue) * 100 if revenue != 0 else 0

            # 2. Current Ratio
            current_assets = self.get_nested_value(balance_data, "Assets", "Total Current Assets", "2025")
            current_liabilities = self.get_nested_value(balance_data, "Liabilities", "Total Current Liabilities", "2025")
            metrics['current_ratio'] = current_assets / current_liabilities if current_liabilities != 0 else 0

            # 3. Debt Ratio
            total_liabilities = self.get_nested_value(balance_data, "Liabilities", "Total Liabilities", "2025")
            total_assets = self.get_nested_value(balance_data, "Assets", "Total Assets", "2025")
            metrics['debt_ratio'] = (total_liabilities / total_assets) * 100 if total_assets != 0 else 0

            # 4. Sustainable Growth Rate (SGR)
            net_income = self.get_nested_value(income_data, "Net Income", "Total Net Income", "2025")
            equity = self.get_nested_value(balance_data, "Equity", "Total Shareholders Equity", "2025")
            dividends = self.get_nested_value(income_data, "Dividends", "Dividends Paid", "2025")
            
            roe = (net_income / equity) * 100 if equity != 0 else 0
            retention_ratio = (net_income - dividends) / net_income if net_income != 0 else 0
            metrics['sgr'] = roe * retention_ratio / 100 if roe != 0 else 0

            # 5. Accounts Receivable Turnover
            accounts_receivable = self.get_nested_value(balance_data, "Assets", "Accounts Receivable", "2025")
            metrics['ar_turnover'] = revenue / accounts_receivable if accounts_receivable != 0 else 0

            # 6. Return on Equity (ROE)
            metrics['roe'] = roe

            # 7. Net Profit Margin
            metrics['net_profit_margin'] = (net_income / revenue) * 100 if revenue != 0 else 0

            # 8. Retained Earnings Ratio
            retained_earnings = self.get_nested_value(balance_data, "Equity", "Retained Earnings", "2025")
            metrics['retained_earnings_ratio'] = (retained_earnings / total_assets) * 100 if total_assets != 0 else 0

            # Additional YoY metrics
            revenue_2024 = self.get_nested_value(income_data, "Revenue", "Total Net Revenue", "2024")
            metrics['revenue_growth'] = ((revenue / revenue_2024) - 1) * 100 if revenue_2024 != 0 else 0

            return metrics
        except Exception as e:
            print(f"Error calculating metrics: {str(e)}")
            return {}

    
    def analyze_financials(self, balance_sheet_path, income_statement_path):
        try:
            # Validate markdown files
            if not self.is_valid_markdown(balance_sheet_path):
                return "Invalid Balance Sheet file format. Please upload a valid Markdown file."
            if not self.is_valid_markdown(income_statement_path):
                return "Invalid Income Statement file format. Please upload a valid Markdown file."

            # Read and parse files
            with open(balance_sheet_path, 'r') as f:
                balance_content = f.read()
            with open(income_statement_path, 'r') as f:
                income_content = f.read()

            balance_data = self.parse_financial_data(balance_content)
            income_data = self.parse_financial_data(income_content)

            # Calculate metrics
            metrics = self.calculate_metrics(income_data, balance_data)

            # Generate analysis
            return self.generate_analysis(metrics)

        except Exception as e:
            return f"Error analyzing financials: {e}"

    
    def generate_analysis(self, metrics):
        """Generate comprehensive analysis"""
        try:
            prompt = f"""[INST] As a financial analyst, provide a comprehensive analysis based on these metrics:

1. Profitability:
   - Gross Profit Margin: {metrics.get('gross_profit_margin', 0):.2f}%
   - Net Profit Margin: {metrics.get('net_profit_margin', 0):.2f}%
   - Return on Equity: {metrics.get('roe', 0):.2f}%

2. Liquidity & Efficiency:
   - Current Ratio: {metrics.get('current_ratio', 0):.2f}
   - Accounts Receivable Turnover: {metrics.get('ar_turnover', 0):.2f}

3. Financial Structure:
   - Debt Ratio: {metrics.get('debt_ratio', 0):.2f}%
   - Retained Earnings Ratio: {metrics.get('retained_earnings_ratio', 0):.2f}%

4. Growth:
   - Sustainable Growth Rate: {metrics.get('sgr', 0):.2f}%
   - Revenue Growth (YoY): {metrics.get('revenue_growth', 0):.2f}%

Provide:
1. Overall financial health assessment
2. Key strengths and concerns
3. Operational efficiency analysis
4. Specific recommendations for improvement
[/INST]"""

            inputs = self.llama_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
            outputs = self.llama_model.generate(
                inputs["input_ids"],
                max_new_tokens=1024,
                min_new_tokens=200,
                temperature=0.7,
                top_p=0.95,
                repetition_penalty=1.2,
                length_penalty=1.5
            )
            
            analysis = self.llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            if len(analysis.split()) < 100:
                return self.generate_fallback_analysis(metrics)
            
            return analysis

        except Exception as e:
            print(f"Error generating analysis: {str(e)}")
            return self.generate_fallback_analysis(metrics)

    def generate_fallback_analysis(self, metrics):
        """Generate basic analysis when model fails"""
        try:
            analysis = f"""Financial Analysis Summary:

1. Profitability Assessment:
- Gross Profit Margin: {metrics.get('gross_profit_margin', 0):.2f}% 
  ({self.interpret_metric('gross_profit_margin', metrics.get('gross_profit_margin', 0))})
- Net Profit Margin: {metrics.get('net_profit_margin', 0):.2f}%
  ({self.interpret_metric('net_profit_margin', metrics.get('net_profit_margin', 0))})
- Return on Equity: {metrics.get('roe', 0):.2f}%
  ({self.interpret_metric('roe', metrics.get('roe', 0))})

2. Liquidity & Efficiency Analysis:
- Current Ratio: {metrics.get('current_ratio', 0):.2f}
  ({self.interpret_metric('current_ratio', metrics.get('current_ratio', 0))})
- AR Turnover: {metrics.get('ar_turnover', 0):.2f}
  ({self.interpret_metric('ar_turnover', metrics.get('ar_turnover', 0))})

3. Financial Structure:
- Debt Ratio: {metrics.get('debt_ratio', 0):.2f}%
  ({self.interpret_metric('debt_ratio', metrics.get('debt_ratio', 0))})
- Retained Earnings Ratio: {metrics.get('retained_earnings_ratio', 0):.2f}%
  ({self.interpret_metric('retained_earnings_ratio', metrics.get('retained_earnings_ratio', 0))})

4. Growth & Sustainability:
- Sustainable Growth Rate: {metrics.get('sgr', 0):.2f}%
  ({self.interpret_metric('sgr', metrics.get('sgr', 0))})
- Revenue Growth: {metrics.get('revenue_growth', 0):.2f}%
  ({self.interpret_metric('revenue_growth', metrics.get('revenue_growth', 0))})

{self.generate_recommendations(metrics)}"""
            return analysis

        except Exception as e:
            return f"Error generating fallback analysis: {str(e)}"

    def interpret_metric(self, metric_name, value):
        """Interpret individual metrics based on CFI standards"""
        interpretations = {
            'gross_profit_margin': lambda x: 'Strong' if x > 40 else 'Adequate' if x > 30 else 'Needs improvement',
            'current_ratio': lambda x: 'Strong' if x > 2 else 'Adequate' if x > 1 else 'Concerning',
            'debt_ratio': lambda x: 'Conservative' if x < 40 else 'Moderate' if x < 60 else 'High risk',
            'ar_turnover': lambda x: 'Excellent' if x > 8 else 'Good' if x > 4 else 'Needs improvement',
            'roe': lambda x: 'Strong' if x > 15 else 'Adequate' if x > 10 else 'Below target',
            'net_profit_margin': lambda x: 'Strong' if x > 10 else 'Adequate' if x > 5 else 'Needs improvement',
            'retained_earnings_ratio': lambda x: 'Strong' if x > 30 else 'Adequate' if x > 15 else 'Low retention',
            'sgr': lambda x: 'Strong' if x > 10 else 'Moderate' if x > 5 else 'Limited growth potential',
            'revenue_growth': lambda x: 'Strong' if x > 10 else 'Moderate' if x > 5 else 'Below industry average'
        }
        try:
            return interpretations.get(metric_name, lambda x: 'No interpretation')(value)
        except:
            return 'Unable to interpret'

    def generate_recommendations(self, metrics):
        """Generate specific recommendations based on metrics"""
        recommendations = []
        
        if metrics.get('gross_profit_margin', 0) < 30:
            recommendations.append("- Review pricing strategy and cost structure to improve gross margins")
        if metrics.get('current_ratio', 0) < 1.5:
            recommendations.append("- Strengthen working capital management to improve liquidity")
        if metrics.get('debt_ratio', 0) > 60:
            recommendations.append("- Consider debt reduction strategies to improve financial flexibility")
        if metrics.get('ar_turnover', 0) < 4:
            recommendations.append("- Improve accounts receivable collection practices")
        if metrics.get('roe', 0) < 10:
            recommendations.append("- Focus on improving operational efficiency to enhance returns")
        if metrics.get('revenue_growth', 0) < 5:
            recommendations.append("- Develop strategies to accelerate revenue growth")
            recommendations.append("- Consider strategic acquisitions or new market entry")
        
            return "Key Recommendations:\n" + "\n".join(recommendations)

        def analyze_financials(self, balance_sheet_file, income_stmt_file):
        """Main analysis function"""
            try:
            # Validate input files
                if not (self.is_valid_markdown(balance_sheet_file) and self.is_valid_markdown(income_stmt_file)):
                    return "Error: One or both files are invalid or not in Markdown format."

            # Read files
                with open(balance_sheet_file, 'r') as f:
                    balance_sheet = f.read()
                with open(income_stmt_file, 'r') as f:
                    income_stmt = f.read()

            # Process financial data
                income_data = self.parse_financial_data(income_stmt)
                balance_data = self.parse_financial_data(balance_sheet)
            
            # Calculate metrics
                metrics = self.calculate_metrics(income_data, balance_data)
                self.last_metrics = metrics

            # Generate analysis
                analysis = self.generate_analysis(metrics)

            # Prepare final results
                results = {
                "Financial Analysis": {
                    "Key Metrics": {
                        "Profitability": {
                            "Gross Profit Margin": f"{metrics['gross_profit_margin']:.2f}%",
                            "Net Profit Margin": f"{metrics['net_profit_margin']:.2f}%",
                            "Return on Equity": f"{metrics['roe']:.2f}%"
                        },
                        "Liquidity": {
                            "Current Ratio": f"{metrics['current_ratio']:.2f}",
                            "Accounts Receivable Turnover": f"{metrics['ar_turnover']:.2f}"
                        },
                        "Solvency": {
                            "Debt Ratio": f"{metrics['debt_ratio']:.2f}%",
                            "Retained Earnings Ratio": f"{metrics['retained_earnings_ratio']:.2f}%"
                        },
                        "Growth": {
                            "Sustainable Growth Rate": f"{metrics['sgr']:.2f}%",
                            "Revenue Growth (YoY)": f"{metrics['revenue_growth']:.2f}%"
                        }
                    },
                    "Analysis": analysis,
                    "Analysis Period": "2021-2025",
                    "Note": "Analysis based on CFI standards"
                }
            }

                return json.dumps(results, indent=2)

            except Exception as e:
                return f"Error in analysis: {str(e)}\n\nDetails: {type(e).__name__}"

        def fine_tune_models(self, train_texts, train_labels, epochs=3):
            """Fine-tune the model with custom data"""
            try:
            # Prepare dataset
                train_dataset = FinancialDataset(train_texts, train_labels, self.llama_tokenizer)
            
            # Training arguments
                training_args = TrainingArguments(
                output_dir="./financial_model_tuned",
                num_train_epochs=epochs,
                per_device_train_batch_size=4,
                logging_dir="./logs",
                logging_steps=10,
                save_steps=50,
                eval_steps=50,
                learning_rate=2e-5,
                weight_decay=0.01,
                warmup_steps=500
            )

            # Initialize trainer
                trainer = Trainer(
                    model=self.llama_model,
                    args=training_args,
                    train_dataset=train_dataset
            )

            # Fine-tune the model
                trainer.train()
            
            # Save the fine-tuned model
                self.llama_model.save_pretrained("./financial_model_tuned")
                self.llama_tokenizer.save_pretrained("./financial_model_tuned")
            
                print("Fine-tuning completed successfully!")
            except Exception as e:
                print(f"Error in fine-tuning: {str(e)}")



def create_interface():
    analyzer = FinancialAnalyzer()
    
    iface = gr.Interface(
        fn=analyzer.analyze_financials,
        inputs=[
            gr.File(label="Balance Sheet (Markdown)", type="filepath"),
            gr.File(label="Income Statement (Markdown)", type="filepath")
        ],
        outputs=gr.Textbox(label="Analysis Results", lines=25),
        title="AI Financial Statement Analyzer",
        description="""Upload financial statements in Markdown format for AI-powered analysis.
                      Analysis is based on Corporate Finance Institute (CFI) standards.""",
        
        cache_examples=False
    )
    
    return iface

if __name__ == "__main__":
    iface = create_interface()
    iface.launch()