import gradio as gr
import mne
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os

# Load an open-source LLM model with no additional training
model_name = "tiiuae/falcon-7b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    torch_dtype=torch.float16,
    device_map="auto"  # Automatically selects CPU/GPU if available
)

def compute_band_power(psd, freqs, fmin, fmax):
    """Compute mean band power in the given frequency range."""
    freq_mask = (freqs >= fmin) & (freqs <= fmax)
    # Take the mean across channels and frequencies
    band_psd = psd[:, freq_mask].mean()
    return float(band_psd)

def load_eeg_data(file_path):
    """
    Load EEG data from a file.
    If FIF file is detected, use MNE's read_raw_fif.
    If CSV file is detected, load via pandas and create a RawArray.
    """
    _, file_ext = os.path.splitext(file_path)
    file_ext = file_ext.lower()

    if file_ext == '.fif':
        raw = mne.io.read_raw_fif(file_path, preload=True)
    elif file_ext == '.csv':
        # Assume first column is 'time', and subsequent columns are channels
        df = pd.read_csv(file_path)
        if 'time' not in df.columns:
            raise ValueError("CSV must contain a 'time' column for timestamps.")
        
        time = df['time'].values
        data = df.drop(columns=['time']).values.T  # shape: (n_channels, n_samples)
        
        # Estimate sampling frequency from time vector (assuming uniform)
        # This is a simplistic approach: we take 1 / average time step.
        # Make sure time is in seconds
        if len(time) < 2:
            raise ValueError("Not enough time points in CSV.")
        sfreq = 1.0 / np.mean(np.diff(time))
        
        # Create MNE Info
        ch_names = list(df.columns)
        ch_names.remove('time')
        ch_types = ['eeg'] * len(ch_names)
        info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
        
        raw = mne.io.RawArray(data, info)
    else:
        raise ValueError("Unsupported file format. Please provide a FIF or CSV file.")
    
    return raw

def process_eeg(file):
    # Load EEG data
    raw = load_eeg_data(file.name)

    # Compute PSD (Power Spectral Density) between 1 and 40 Hz
    psd, freqs = mne.time_frequency.psd_welch(raw, fmin=1, fmax=40)

    # Compute simple band powers
    alpha_power = compute_band_power(psd, freqs, 8, 12)
    beta_power = compute_band_power(psd, freqs, 13, 30)

    # Create a short summary of the extracted features
    data_summary = (
        f"Alpha power: {alpha_power:.3f}, Beta power: {beta_power:.3f}. "
        f"The EEG shows stable alpha rhythms and slightly elevated beta activity."
    )

    # Prepare the prompt for the language model
    prompt = f"""You are a neuroscientist analyzing EEG features.
Data Summary: {data_summary}

Provide a concise, user-friendly interpretation of these findings in simple terms.
"""

    # Generate the summary using the LLM
    inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        inputs, max_length=200, do_sample=True, top_k=50, top_p=0.95
    )
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return summary

iface = gr.Interface(
    fn=process_eeg,
    inputs=gr.File(label="Upload your EEG data (FIF or CSV)"),
    outputs="text",
    title="NeuroNarrative-Lite: EEG Summary",
    description=("Upload EEG data in FIF (MNE native) or CSV format. "
                 "The system extracts basic EEG features and generates "
                 "a human-readable summary using an open-source language model.")
)

if __name__ == "__main__":
    iface.launch()