import os import gradio as gr import pandas as pd import torch import logging import gc from transformers import pipeline # Setup logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Device configuration DEVICE = "cuda" if torch.cuda.is_available() else "cpu" logger.info(f"Using device: {DEVICE}") def clear_gpu_memory(): """Utility function to clear GPU memory""" if DEVICE == "cuda": torch.cuda.empty_cache() gc.collect() class FinancialDataExtractor: """Extract and clean financial data""" def __init__(self): self.logger = logger def clean_number(self, value): """Clean numeric values from financial statements""" try: if pd.isna(value) or value == '' or value == '-': return 0.0 if isinstance(value, (int, float)): return float(value) # Remove currency symbols, spaces, commas cleaned = str(value).replace('$', '').replace(',', '').replace('"', '').strip() # Handle parentheses for negative numbers if '(' in cleaned and ')' in cleaned: cleaned = '-' + cleaned.replace('(', '').replace(')', '') return float(cleaned) except: return 0.0 def extract_data(self, df: pd.DataFrame) -> pd.DataFrame: """Extract and clean data from DataFrame""" # Clean column names df.columns = df.columns.str.strip() # Get year columns year_cols = [col for col in df.columns if str(col).isdigit()] if not year_cols: raise ValueError("No year columns found in data") # Clean numeric data for col in year_cols: df[col] = df[col].apply(self.clean_number) return df, year_cols class FinancialAnalyzer: """Financial analysis using small models""" def __init__(self): self.extractor = FinancialDataExtractor() self.sentiment_model = None self.analysis_model = None self.load_models() def load_models(self): """Load the required models""" try: # Load FinBERT for sentiment analysis self.sentiment_model = pipeline( "text-classification", model="ProsusAI/finbert", torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32, truncation=True ) # Load small model for analysis self.analysis_model = pipeline( "text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32 ) logger.info("Models loaded successfully") except Exception as e: logger.error(f"Error loading models: {str(e)}") raise def calculate_metrics(self, income_df: pd.DataFrame, balance_df: pd.DataFrame, year_cols: list) -> dict: """Calculate financial metrics""" metrics = {} for year in year_cols: # Income Statement metrics income = { 'Revenue': income_df[income_df['Period'].str.contains('Total Net Revenue|Revenue', na=False, case=False)][year].iloc[0], 'COGS': income_df[income_df['Period'].str.contains('Cost of Goods Sold', na=False, case=False)][year].iloc[0], 'Operating_Expenses': income_df[income_df['Period'].str.contains('Total Expenses', na=False, case=False)][year].iloc[0], 'EBIT': income_df[income_df['Period'].str.contains('Earnings Before Interest & Taxes', na=False, case=False)][year].iloc[0], 'Net_Income': income_df[income_df['Period'].str.contains('Net Income|Net Earnings', na=False, case=False)][year].iloc[-1] } # Balance Sheet metrics balance = { 'Total_Assets': balance_df[balance_df['Period'].str.contains('Total Assets', na=False, case=False)][year].iloc[0], 'Current_Assets': balance_df[balance_df['Period'].str.contains('Total current assets', na=False, case=False)][year].iloc[0], 'Total_Liabilities': balance_df[balance_df['Period'].str.contains('Total Liabilities', na=False, case=False)][year].iloc[0], 'Current_Liabilities': balance_df[balance_df['Period'].str.contains('Total current liabilities', na=False, case=False)][year].iloc[0], 'Equity': balance_df[balance_df['Period'].str.contains("Shareholder's Equity", na=False, case=False)][year].iloc[-1] } # Calculate ratios metrics[year] = { 'Profitability': { 'Gross_Margin': ((income['Revenue'] - income['COGS']) / income['Revenue']) * 100, 'Operating_Margin': (income['EBIT'] / income['Revenue']) * 100, 'Net_Margin': (income['Net_Income'] / income['Revenue']) * 100, 'ROE': (income['Net_Income'] / balance['Equity']) * 100, 'ROA': (income['Net_Income'] / balance['Total_Assets']) * 100 }, 'Liquidity': { 'Current_Ratio': balance['Current_Assets'] / balance['Current_Liabilities'], 'Working_Capital': balance['Current_Assets'] - balance['Current_Liabilities'] }, 'Growth': { 'Revenue': income['Revenue'], 'Net_Income': income['Net_Income'], 'Total_Assets': balance['Total_Assets'] } } return metrics def analyze_financials(self, income_df: pd.DataFrame, balance_df: pd.DataFrame) -> str: """Generate financial analysis""" try: # Extract and clean data income_df, year_cols = self.extractor.extract_data(income_df) balance_df, _ = self.extractor.extract_data(balance_df) # Calculate metrics metrics = self.calculate_metrics(income_df, balance_df, year_cols) # Get latest and earliest years latest_year = max(year_cols) earliest_year = min(year_cols) # Calculate growth revenue_growth = ((metrics[latest_year]['Growth']['Revenue'] / metrics[earliest_year]['Growth']['Revenue']) - 1) * 100 profit_growth = ((metrics[latest_year]['Growth']['Net_Income'] / metrics[earliest_year]['Growth']['Net_Income']) - 1) * 100 # Generate analysis context context = f"""Financial Analysis ({earliest_year}-{latest_year}): Performance Metrics: - Revenue Growth: {revenue_growth:.1f}% - Profit Growth: {profit_growth:.1f}% - Current Gross Margin: {metrics[latest_year]['Profitability']['Gross_Margin']:.1f}% - Current Net Margin: {metrics[latest_year]['Profitability']['Net_Margin']:.1f}% - ROE: {metrics[latest_year]['Profitability']['ROE']:.1f}% - Current Ratio: {metrics[latest_year]['Liquidity']['Current_Ratio']:.2f} Trends: - Revenue has grown from ${metrics[earliest_year]['Growth']['Revenue']:,.0f} to ${metrics[latest_year]['Growth']['Revenue']:,.0f} - Net Income has changed from ${metrics[earliest_year]['Growth']['Net_Income']:,.0f} to ${metrics[latest_year]['Growth']['Net_Income']:,.0f} - Profitability margins show {('improving' if metrics[latest_year]['Profitability']['Net_Margin'] > metrics[earliest_year]['Profitability']['Net_Margin'] else 'declining')} trend""" # Get sentiment sentiment = self.sentiment_model(context[:512])[0] # Generate detailed analysis analysis = self.analysis_model( f"[INST] As a financial analyst, provide a detailed analysis of this company:\n\n{context}\n\nInclude:\n1. Financial health assessment\n2. Key performance insights\n3. Strategic recommendations [/INST]", max_length=1500, num_return_sequences=1, do_sample=True, temperature=0.7 )[0]['generated_text'] # Format output output = f"""# Financial Analysis Report ## Overall Sentiment: {sentiment['label'].upper()} ({sentiment['score']:.1%}) ## Key Performance Indicators ({latest_year}) - Gross Margin: {metrics[latest_year]['Profitability']['Gross_Margin']:.1f}% - Operating Margin: {metrics[latest_year]['Profitability']['Operating_Margin']:.1f}% - Net Margin: {metrics[latest_year]['Profitability']['Net_Margin']:.1f}% - ROE: {metrics[latest_year]['Profitability']['ROE']:.1f}% - Current Ratio: {metrics[latest_year]['Liquidity']['Current_Ratio']:.2f} ## Performance Trends ({earliest_year}-{latest_year}) - Revenue Growth: {revenue_growth:.1f}% - Profit Growth: {profit_growth:.1f}% - Working Capital: ${metrics[latest_year]['Liquidity']['Working_Capital']:,.0f} ## Analysis {analysis}""" return output except Exception as e: logger.error(f"Analysis error: {str(e)}") raise def analyze_statements(income_statement, balance_sheet): """Main function to analyze financial statements""" try: if not income_statement or not balance_sheet: return "Please upload both Income Statement and Balance Sheet CSV files." # Read files income_df = pd.read_csv(income_statement.name) balance_df = pd.read_csv(balance_sheet.name) # Create analyzer and process analyzer = FinancialAnalyzer() result = analyzer.analyze_financials(income_df, balance_df) # Clear memory clear_gpu_memory() return result except Exception as e: logger.error(f"Analysis error: {str(e)}") return f"""Analysis Error: {str(e)} Please ensure your CSV files: 1. Have clear year columns 2. Contain recognizable financial metrics 3. Use consistent number formatting""" # Create Gradio interface iface = gr.Interface( fn=analyze_statements, inputs=[ gr.File(label="Upload Income Statement (CSV)", file_types=[".csv"]), gr.File(label="Upload Balance Sheet (CSV)", file_types=[".csv"]) ], outputs=gr.Markdown(), title="Financial Statement Analyzer", description="""## Financial Analysis Tool Upload your financial statements to get: - Performance Analysis - Key Metrics & Ratios - Trend Analysis - Strategic Recommendations""", examples=None ) # Launch the interface if __name__ == "__main__": try: iface.launch(server_name="0.0.0.0", server_port=7860) except Exception as e: logger.error(f"Launch error: {str(e)}") sys.exit(1)