Spaces:

Rogerjs
/

NeuroNarrative-Lite

Sleeping

App Files Files Community

Rogerjs commited on Dec 6, 2024

Commit

1ccf8ee

verified ·

1 Parent(s): 49be262

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -44

app.py CHANGED Viewed

@@ -1,11 +1,12 @@
-import gradio as gr
 import mne
 import numpy as np
 import pandas as pd
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-import os
 model_name = "tiiuae/falcon-7b-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
@@ -20,57 +21,101 @@ def compute_band_power(psd, freqs, fmin, fmax):
     band_psd = psd[:, freq_mask].mean()
     return float(band_psd)
 def load_eeg_data(file_path, default_sfreq=256.0, time_col='time'):
     """
-    Load EEG data from a file with flexible CSV handling.
-    - If FIF: Use read_raw_fif.
-    - If CSV:
-       * If `time_col` is present, use it as time.
-       * Otherwise, assume a default sfreq and treat all columns as channels.
     """
     _, file_ext = os.path.splitext(file_path)
     file_ext = file_ext.lower()
     if file_ext == '.fif':
         raw = mne.io.read_raw_fif(file_path, preload=True)
     elif file_ext == '.csv':
         df = pd.read_csv(file_path)
-        # Remove non-numeric columns except time_col
-        for col in df.columns:
-            if col != time_col:
-                # Drop non-numeric columns if any
-                if not pd.api.types.is_numeric_dtype(df[col]):
-                    df = df.drop(columns=[col])
-        if time_col in df.columns:
-            # Use the provided time column
             time = df[time_col].values
             data_df = df.drop(columns=[time_col])
             if len(time) < 2:
-                raise ValueError("Not enough time points to estimate sampling frequency.")
-            sfreq = 1.0 / np.mean(np.diff(time))
         else:
-            # No explicit time column, assume uniform sampling at default_sfreq
-            sfreq = default_sfreq
             data_df = df
-        # Channels are all remaining columns
         ch_names = list(data_df.columns)
         data = data_df.values.T  # shape: (n_channels, n_samples)
-        # Create MNE info
         ch_types = ['eeg'] * len(ch_names)
         info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
         raw = mne.io.RawArray(data, info)
     else:
-        raise ValueError("Unsupported file format. Please provide a FIF or CSV file.")
     return raw
-def process_eeg(file, default_sfreq, time_col):
     raw = load_eeg_data(file.name, default_sfreq=float(default_sfreq), time_col=time_col)
     psd, freqs = mne.time_frequency.psd_welch(raw, fmin=1, fmax=40)
@@ -87,30 +132,45 @@ Data Summary: {data_summary}
 Provide a concise, user-friendly interpretation of these findings in simple terms.
 """
     inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
         inputs, max_length=200, do_sample=True, top_k=50, top_p=0.95
     )
     summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return summary
-iface = gr.Interface(
-    fn=process_eeg,
-    inputs=[
-        gr.File(label="Upload your EEG data (FIF or CSV)"),
-        gr.Textbox(label="Default Sampling Frequency if no time column (Hz)", value="256"),
-        gr.Textbox(label="Time column name (if exists)", value="time")
-    ],
-    outputs="text",
-    title="NeuroNarrative-Lite: EEG Summary (Flexible CSV Handling)",
-    description=(
-        "Upload EEG data in FIF or CSV format. "
-        "If CSV, either include a 'time' column or specify a default sampling frequency. "
-        "Non-numeric columns will be removed (except the chosen time column)."
     )
-)
 if __name__ == "__main__":
-    iface.launch()

+import os
 import mne
 import numpy as np
 import pandas as pd
+import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+# Load LLM
 model_name = "tiiuae/falcon-7b-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
     band_psd = psd[:, freq_mask].mean()
     return float(band_psd)
+def inspect_file(file):
+    """
+    Inspect the uploaded file to determine available columns.
+    If FIF: Just inform that it's an MNE file and no time column is needed.
+    If CSV: Return a list of columns (both numeric and non-numeric).
+    """
+    if file is None:
+        return "No file uploaded.", [], "No preview available."
+    file_path = file.name
+    _, file_ext = os.path.splitext(file_path)
+    file_ext = file_ext.lower()
+    if file_ext == ".fif":
+        # FIF files: We know they're MNE compatible
+        # No columns to choose from, just proceed with default analysis
+        return (
+            "FIF file detected. No need for time column selection. Default sampling frequency will be read from file.",
+            [],
+            "FIF file doesn't require further inspection."
+        )
+    elif file_ext == ".csv":
+        # Read a small portion of the CSV to determine columns
+        try:
+            df = pd.read_csv(file_path, nrows=5)
+        except Exception as e:
+            return f"Error reading CSV: {e}", [], "Could not read CSV preview."
+        cols = list(df.columns)
+        preview = df.head().to_markdown()
+        return (
+            "CSV file detected. Select a time column if available, or leave it blank and specify a default frequency.",
+            cols,
+            preview
+        )
+    else:
+        return "Unsupported file format.", [], "No preview available."
 def load_eeg_data(file_path, default_sfreq=256.0, time_col='time'):
     """
+    Load EEG data with flexibility.
+    If FIF: Use MNE's read_raw_fif directly.
+    If CSV:
+      - If time_col is given and present in the file, use it.
+      - Otherwise, assume default_sfreq.
     """
     _, file_ext = os.path.splitext(file_path)
     file_ext = file_ext.lower()
     if file_ext == '.fif':
         raw = mne.io.read_raw_fif(file_path, preload=True)
     elif file_ext == '.csv':
         df = pd.read_csv(file_path)
+        # If time_col is specified and in df, use it to compute sfreq
+        if time_col and time_col in df.columns:
             time = df[time_col].values
             data_df = df.drop(columns=[time_col])
+            # Drop non-numeric columns
+            for col in data_df.columns:
+                if not pd.api.types.is_numeric_dtype(data_df[col]):
+                    data_df = data_df.drop(columns=[col])
             if len(time) < 2:
+                # Not enough time points, fallback to default_sfreq
+                sfreq = default_sfreq
+            else:
+                # Compute sfreq from time
+                sfreq = 1.0 / np.mean(np.diff(time))
         else:
+            # No time column used, assume default_sfreq
+            # Drop non-numeric columns
+            for col in df.columns:
+                if not pd.api.types.is_numeric_dtype(df[col]):
+                    df = df.drop(columns=[col])
             data_df = df
+            sfreq = default_sfreq
         ch_names = list(data_df.columns)
         data = data_df.values.T  # shape: (n_channels, n_samples)
         ch_types = ['eeg'] * len(ch_names)
         info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
         raw = mne.io.RawArray(data, info)
     else:
+        raise ValueError("Unsupported file format. Provide a FIF or CSV file.")
     return raw
+def analyze_eeg(file, default_sfreq, time_col):
+    if file is None:
+        return "No file uploaded."
     raw = load_eeg_data(file.name, default_sfreq=float(default_sfreq), time_col=time_col)
     psd, freqs = mne.time_frequency.psd_welch(raw, fmin=1, fmax=40)
 Provide a concise, user-friendly interpretation of these findings in simple terms.
 """
     inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
         inputs, max_length=200, do_sample=True, top_k=50, top_p=0.95
     )
     summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return summary
+#########################
+# BUILD THE GRADIO INTERFACE
+#########################
+# Step 1: Inspect file
+def preview_file(file):
+    msg, cols, preview = inspect_file(file)
+    return msg, gr.Dropdown.update(choices=cols, value=None), preview
+with gr.Blocks() as demo:
+    gr.Markdown("# NeuroNarrative-Lite: EEG Summary with Flexible Preprocessing")
+    gr.Markdown(
+        "Upload an EEG file (FIF or CSV). If it's CSV, we will inspect the file and let you choose a time column. "
+        "If no suitable time column is found, leave it blank and provide a default sampling frequency."
     )
+    file_input = gr.File(label="Upload your EEG data (FIF or CSV)")
+    preview_button = gr.Button("Inspect File")
+    msg_output = gr.Markdown()
+    cols_dropdown = gr.Dropdown(label="Select Time Column (optional)", interactive=True)
+    preview_output = gr.Markdown()
+    preview_button.click(preview_file, inputs=[file_input], outputs=[msg_output, cols_dropdown, preview_output])
+    default_sfreq_input = gr.Textbox(label="Default Sampling Frequency (Hz) if no time column", value="256")
+    analyze_button = gr.Button("Run Analysis")
+    result_output = gr.Textbox(label="Analysis Summary")
+    analyze_button.click(analyze_eeg,
+                          inputs=[file_input, default_sfreq_input, cols_dropdown],
+                          outputs=[result_output])
 if __name__ == "__main__":
+    demo.launch()