import gradio as gr import mne import numpy as np import pandas as pd from transformers import AutoTokenizer, AutoModelForCausalLM import torch import os # Load an open-source LLM model with no additional training model_name = "tiiuae/falcon-7b-instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True, torch_dtype=torch.float16, device_map="auto" # Automatically selects CPU/GPU if available ) def compute_band_power(psd, freqs, fmin, fmax): """Compute mean band power in the given frequency range.""" freq_mask = (freqs >= fmin) & (freqs <= fmax) # Take the mean across channels and frequencies band_psd = psd[:, freq_mask].mean() return float(band_psd) def load_eeg_data(file_path): """ Load EEG data from a file. If FIF file is detected, use MNE's read_raw_fif. If CSV file is detected, load via pandas and create a RawArray. """ _, file_ext = os.path.splitext(file_path) file_ext = file_ext.lower() if file_ext == '.fif': raw = mne.io.read_raw_fif(file_path, preload=True) elif file_ext == '.csv': # Assume first column is 'time', and subsequent columns are channels df = pd.read_csv(file_path) if 'time' not in df.columns: raise ValueError("CSV must contain a 'time' column for timestamps.") time = df['time'].values data = df.drop(columns=['time']).values.T # shape: (n_channels, n_samples) # Estimate sampling frequency from time vector (assuming uniform) # This is a simplistic approach: we take 1 / average time step. # Make sure time is in seconds if len(time) < 2: raise ValueError("Not enough time points in CSV.") sfreq = 1.0 / np.mean(np.diff(time)) # Create MNE Info ch_names = list(df.columns) ch_names.remove('time') ch_types = ['eeg'] * len(ch_names) info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types) raw = mne.io.RawArray(data, info) else: raise ValueError("Unsupported file format. Please provide a FIF or CSV file.") return raw def process_eeg(file): # Load EEG data raw = load_eeg_data(file.name) # Compute PSD (Power Spectral Density) between 1 and 40 Hz psd, freqs = mne.time_frequency.psd_welch(raw, fmin=1, fmax=40) # Compute simple band powers alpha_power = compute_band_power(psd, freqs, 8, 12) beta_power = compute_band_power(psd, freqs, 13, 30) # Create a short summary of the extracted features data_summary = ( f"Alpha power: {alpha_power:.3f}, Beta power: {beta_power:.3f}. " f"The EEG shows stable alpha rhythms and slightly elevated beta activity." ) # Prepare the prompt for the language model prompt = f"""You are a neuroscientist analyzing EEG features. Data Summary: {data_summary} Provide a concise, user-friendly interpretation of these findings in simple terms. """ # Generate the summary using the LLM inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device) outputs = model.generate( inputs, max_length=200, do_sample=True, top_k=50, top_p=0.95 ) summary = tokenizer.decode(outputs[0], skip_special_tokens=True) return summary iface = gr.Interface( fn=process_eeg, inputs=gr.File(label="Upload your EEG data (FIF or CSV)"), outputs="text", title="NeuroNarrative-Lite: EEG Summary", description=("Upload EEG data in FIF (MNE native) or CSV format. " "The system extracts basic EEG features and generates " "a human-readable summary using an open-source language model.") ) if __name__ == "__main__": iface.launch()