File size: 9,373 Bytes
864f28a
a1ef945
 
 
35acd3c
91033f9
ca1d38d
ceb9625
eac8dde
 
 
 
 
6e9bd28
35acd3c
eac8dde
 
 
ceb9625
eac8dde
 
 
 
 
57061b5
0ff54a0
ca1d38d
eac8dde
0ff54a0
ca1d38d
 
 
 
 
 
 
35acd3c
ca1d38d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35acd3c
ca1d38d
35acd3c
 
ca1d38d
 
35acd3c
37a163f
 
eac8dde
ca1d38d
eac8dde
 
 
ca1d38d
 
91033f9
 
 
ca1d38d
 
91033f9
 
ca1d38d
 
 
 
 
 
 
 
91033f9
35acd3c
ca1d38d
35acd3c
0ff54a0
ca1d38d
 
91033f9
ca1d38d
 
 
91033f9
ca1d38d
 
91033f9
ca1d38d
 
 
 
 
 
91033f9
ca1d38d
 
 
 
 
 
 
 
91033f9
ca1d38d
 
91033f9
ca1d38d
91033f9
ca1d38d
 
eac8dde
ca1d38d
 
 
91033f9
ca1d38d
35acd3c
eac8dde
ca1d38d
91033f9
35acd3c
ca1d38d
 
eac8dde
ca1d38d
 
91033f9
ca1d38d
 
 
 
 
 
 
91033f9
ca1d38d
324809c
91033f9
ca1d38d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91033f9
 
ca1d38d
 
a1ef945
ca1d38d
 
a1ef945
ca1d38d
 
 
eac8dde
ca1d38d
 
 
 
 
eac8dde
ca1d38d
 
a1ef945
ca1d38d
 
 
 
 
 
eac8dde
ca1d38d
 
 
 
 
 
eac8dde
ca1d38d
 
324809c
ca1d38d
91033f9
ca1d38d
a1ef945
eac8dde
 
 
 
35acd3c
eac8dde
ca1d38d
 
 
eac8dde
ca1d38d
f4bbd39
a1ef945
ca1d38d
a1ef945
324809c
ca1d38d
 
 
324809c
 
ca1d38d
 
 
324809c
a1ef945
 
ca1d38d
 
 
 
 
 
 
324809c
ca1d38d
 
 
 
324809c
 
 
 
ca1d38d
eac8dde
a1ef945
 
ca1d38d
a1ef945
35acd3c
ca1d38d
eac8dde
 
 
324809c
ca1d38d
eac8dde
35acd3c
eac8dde
35acd3c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
import os 
import gradio as gr
import pandas as pd
import torch
import logging
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
import gc

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Device configuration
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"Using device: {DEVICE}")

def clear_gpu_memory():
    """Utility function to clear GPU memory"""
    if DEVICE == "cuda":
        torch.cuda.empty_cache()
    gc.collect()

class FinancialAnalyzer:
    """Simplified Financial Analyzer using small models"""
    
    def __init__(self):
        # Initialize with two small models
        self.sentiment_model = None
        self.analysis_model = None
        self.load_models()

    def load_models(self):
        """Load the required models"""
        try:
            # Load FinBERT for sentiment analysis
            self.sentiment_model = pipeline(
                "text-classification",
                model="ProsusAI/finbert",
                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
            )
            
            # Load small model for analysis and recommendations
            self.analysis_model = pipeline(
                "text-generation",
                model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
            )
            
            logger.info("Models loaded successfully")
        except Exception as e:
            logger.error(f"Error loading models: {str(e)}")
            raise

    def process_csv(self, file_obj):
        """Process CSV file and extract KPIs"""
        try:
            if file_obj is None:
                raise ValueError("No file provided")
            
            df = pd.read_csv(file_obj)
            
            if df.empty:
                raise ValueError("Empty CSV file")

            # Get numeric columns
            numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
            if len(numeric_cols) == 0:
                raise ValueError("No numeric columns found in CSV")

            # Calculate basic KPIs
            summary = df[numeric_cols].describe()
            
            # Extract key metrics
            metrics = {
                'total': df[numeric_cols].sum(),
                'average': df[numeric_cols].mean(),
                'growth': df[numeric_cols].pct_change().mean() * 100
            }
            
            return summary, metrics
            
        except Exception as e:
            logger.error(f"Error processing CSV: {str(e)}")
            raise

    def analyze_financials(self, income_summary, balance_summary):
        """Generate financial analysis and recommendations"""
        try:
            financial_context = f"""
            Income Statement Metrics:
            {income_summary[0].to_string()}
            
            Key Income Indicators:
            {income_summary[1]}
            
            Balance Sheet Metrics:
            {balance_summary[0].to_string()}
            
            Key Balance Sheet Indicators:
            {balance_summary[1]}
            """

            # Generate sentiment analysis
            sentiment = self.sentiment_model(financial_context)[0]
            
            # Generate business analysis
            analysis_prompt = f"""[INST] Based on the following financial data, provide:
            1. Current Business Status
            2. Key Business Insights
            3. Strategic Recommendations and Roadmap

            Financial Context:
            {financial_context}

            Sentiment: {sentiment['label']} ({sentiment['score']:.2%})

            Provide a concise but detailed analysis for each section.
            [/INST]"""

            response = self.analysis_model(
                analysis_prompt,
                max_length=1000,
                temperature=0.7,
                num_return_sequences=1
            )

            return self.format_response(response[0]['generated_text'], sentiment)

        except Exception as e:
            logger.error(f"Error in analysis: {str(e)}")
            return "Error generating analysis"

    def format_response(self, analysis_text, sentiment):
        """Format the analysis response into structured sections"""
        try:
            # Split the analysis into sections
            sections = analysis_text.split('\n\n')
            
            # Initialize output sections
            status = []
            insights = []
            recommendations = []
            
            # Process each section
            current_section = None
            for section in sections:
                if "Business Status" in section:
                    current_section = status
                elif "Key Business Insights" in section:
                    current_section = insights
                elif "Strategic Recommendations" in section:
                    current_section = recommendations
                elif current_section is not None:
                    current_section.append(section.strip())
            
            # Format the final output
            output = [
                "# Financial Analysis Report\n\n",
                f"## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.2%})\n\n",
                "## Current Business Status\n",
                "".join(f"- {item}\n" for item in status if item),
                "\n## Key Business Insights\n",
                "".join(f"- {item}\n" for item in insights if item),
                "\n## Strategic Recommendations & Roadmap\n",
                "".join(f"- {item}\n" for item in recommendations if item)
            ]
            
            return "".join(output)
            
        except Exception as e:
            logger.error(f"Error formatting response: {str(e)}")
            return "Error formatting analysis results"

def analyze_statements(income_statement, balance_sheet):
    """Main function to analyze financial statements"""
    try:
        # Check if files are uploaded
        if income_statement is None or balance_sheet is None:
            return "Please upload both Income Statement and Balance Sheet CSV files."

        # Get file names
        income_filename = income_statement.name if hasattr(income_statement, 'name') else 'Income Statement'
        balance_filename = balance_sheet.name if hasattr(balance_sheet, 'name') else 'Balance Sheet'
        
        logger.info(f"Processing {income_filename} and {balance_filename}")

        # Initialize analyzer
        analyzer = FinancialAnalyzer()
        
        # Process statements with better error handling
        try:
            income_summary = analyzer.process_csv(income_statement)
            logger.info("Successfully processed Income Statement")
        except Exception as e:
            return f"Error processing Income Statement: {str(e)}\nPlease ensure it's a valid CSV file with numeric data."

        try:
            balance_summary = analyzer.process_csv(balance_sheet)
            logger.info("Successfully processed Balance Sheet")
        except Exception as e:
            return f"Error processing Balance Sheet: {str(e)}\nPlease ensure it's a valid CSV file with numeric data."
        
        # Generate analysis
        logger.info("Generating analysis...")
        result = analyzer.analyze_financials(income_summary, balance_summary)
        
        clear_gpu_memory()
        return result
        
    except Exception as e:
        logger.error(f"Analysis error: {str(e)}")
        return f"""Analysis Error:
        
        {str(e)}
        
        Please verify:
        1. Files are in CSV format
        2. Files contain numeric data columns
        3. Files follow standard financial statement format"""

# Create Gradio interface with improved file handling

iface = gr.Interface(
    fn=analyze_statements,
    inputs=[
        gr.File(
            label="Upload Income Statement (CSV)",
            file_types=[".csv"],
            file_count="single"
        ),
        gr.File(
            label="Upload Balance Sheet (CSV)",
            file_types=[".csv"],
            file_count="single"
        )
    ],
    outputs=gr.Markdown(),
    title="Financial Statement Analyzer",
    description="""## Financial Analysis Tool

How to use:
1. Click 'Upload Income Statement' to select your income statement CSV file
2. Click 'Upload Balance Sheet' to select your balance sheet CSV file
3. Wait for the analysis to complete

The tool will provide:
- Business Status Assessment
- Key Financial Insights
- Strategic Recommendations

Requirements:
- Files must be in CSV format
- Must contain numeric data columns
- Standard financial statement format preferred""",
    flagging_mode="never"
)

# Launch the interface with better error handling
if __name__ == "__main__":
    try:
        iface.queue()  # Enable queuing for better file handling
        iface.launch(
            share=False,
            server_name="0.0.0.0",
            server_port=7860,
            show_api=False  # Disable API tab for security
        )
    except Exception as e:
        logger.error(f"Launch error: {str(e)}")
        sys.exit(1)