Spaces:

broadfield-dev
/

surya-demo

Running

App Files Files Community

broadfield-dev commited on 3 days ago

Commit

ca15132

verified ·

1 Parent(s): b02989a

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -74

app.py CHANGED Viewed

@@ -10,7 +10,12 @@ import warnings
 import logging
 import datetime
 import matplotlib.pyplot as plt
-import sunpy.visualization.colormaps as sunpy_cm
 import traceback
 from io import BytesIO
 import re
@@ -26,13 +31,22 @@ logger = logging.getLogger(__name__)
 APP_CACHE = {}
-CHANNEL_TO_URL_CODE = {
-    "aia94": "0094", "aia131": "0131", "aia171": "0171", "aia193": "0193",
-    "aia211": "0211", "aia304": "0304", "aia335": "0335", "aia1600": "1600",
-    "hmi_m": "HMIBC", "hmi_bx": "HMIB", "hmi_by": "HMIB",
-    "hmi_bz": "HMIB", "hmi_v": "HMID"
 }
-SDO_CHANNELS = list(CHANNEL_TO_URL_CODE.keys())
 def setup_and_load_model():
     if "model" in APP_CACHE:
@@ -75,41 +89,6 @@ def setup_and_load_model():
     APP_CACHE["model"] = model
     yield "✅ Model setup complete."
-def find_nearest_browse_image_url(channel, target_dt):
-    url_code = CHANNEL_TO_URL_CODE[channel]
-    base_url = "https://sdo.gsfc.nasa.gov/assets/img/browse"
-    for i in range(2):
-        dt_to_try = target_dt - datetime.timedelta(days=i)
-        dir_url = dt_to_try.strftime(f"{base_url}/%Y/%m/%d/")
-        response = requests.get(dir_url)
-        if response.status_code != 200:
-            continue
-        filenames = re.findall(r'href="(\d{8}_\d{6}_4096_' + url_code + r'\.jpg)"', response.text)
-        if not filenames:
-            continue
-        best_filename = ""
-        min_diff = float('inf')
-        for fname in filenames:
-            try:
-                timestamp_str = fname.split('_')[1]
-                img_dt = datetime.datetime.strptime(f"{dt_to_try.strftime('%Y%m%d')}{timestamp_str}", "%Y%m%d%H%M%S")
-                diff = abs((target_dt - img_dt).total_seconds())
-                if diff < min_diff:
-                    min_diff = diff
-                    best_filename = fname
-            except (ValueError, IndexError):
-                continue
-        if best_filename:
-            return dir_url + best_filename
-    raise FileNotFoundError(f"Could not find any browse images for {channel} in the last 48 hours.")
 def fetch_and_process_sdo_data(target_dt, forecast_horizon_minutes):
     config = APP_CACHE["config"]
     img_size = config["model"]["img_size"]
@@ -119,36 +98,62 @@ def fetch_and_process_sdo_data(target_dt, forecast_horizon_minutes):
     target_time = target_dt + datetime.timedelta(minutes=forecast_horizon_minutes)
     all_times = sorted(list(set(input_times + [target_time])))
-    images = {}
     total_fetches = len(all_times) * len(SDO_CHANNELS)
     fetches_done = 0
-    yield f"Starting search for {total_fetches} data files..."
     for t in all_times:
-        images[t] = {}
         for channel in SDO_CHANNELS:
             fetches_done += 1
-            yield f"Finding [{fetches_done}/{total_fetches}]: Closest image for {channel} near {t.strftime('%Y-%m-%d %H:%M')}..."
-            image_url = find_nearest_browse_image_url(channel, t)
-            yield f"Downloading: {os.path.basename(image_url)}..."
-            response = requests.get(image_url)
-            response.raise_for_status()
-            images[t][channel] = Image.open(BytesIO(response.content))
-    yield "✅ All images found and downloaded. Starting preprocessing..."
     scalers_dict = APP_CACHE["scalers"]
     processed_tensors = {}
-    for t, channel_images in images.items():
         channel_tensors = []
         for i, channel in enumerate(SDO_CHANNELS):
-            img = channel_images[channel]
-            if img.mode != 'L':
-                img = img.convert('L')
-            img_resized = img.resize((img_size, img_size), Image.Resampling.LANCZOS)
-            norm_data = np.array(img_resized, dtype=np.float32)
             scaler = scalers_dict[channel]
             scaled_data = scaler.transform(norm_data.reshape(-1, 1)).reshape(norm_data.shape)
@@ -158,10 +163,10 @@ def fetch_and_process_sdo_data(target_dt, forecast_horizon_minutes):
     yield "✅ Preprocessing complete."
     input_tensor_list = [processed_tensors[t] for t in input_times]
     input_tensor = torch.stack(input_tensor_list, dim=1).unsqueeze(0)
-    target_image_map = images[target_time]
-    last_input_image_map = images[input_times[-1]]
-    yield (input_tensor, last_input_image_map, target_image_map)
 def run_inference(input_tensor):
     model = APP_CACHE["model"]
@@ -178,20 +183,14 @@ def generate_visualization(last_input_map, prediction_tensor, target_map, channe
     if last_input_map is None: return None, None, None
     c_idx = SDO_CHANNELS.index(channel_name)
-    # *** FIX: Access the specific scaler for the channel from the dictionary ***
     scaler = APP_CACHE["scalers"][channel_name]
-    # *** FIX: Access the parameters as attributes, not from to_dict() ***
-    mean = scaler.mean
-    std = scaler.std
-    epsilon = scaler.epsilon
-    sl_scale_factor = scaler.sl_scale_factor
     pred_slice = inverse_transform_single_channel(
         prediction_tensor[0, c_idx].numpy(), mean=mean, std=std, epsilon=epsilon, sl_scale_factor=sl_scale_factor
     )
-    target_img_data = np.array(target_map[channel_name])
-    vmax = np.quantile(np.nan_to_num(target_img_data), 0.995)
     cmap_name = f"sdoaia{channel_name.replace('aia', '')}" if 'aia' in channel_name else 'hmimag'
     cmap = plt.get_cmap(sunpy_cm.cmlist.get(cmap_name, 'gray'))
@@ -202,7 +201,7 @@ def generate_visualization(last_input_map, prediction_tensor, target_map, channe
         colored = (cmap(data_norm)[:, :, :3] * 255).astype(np.uint8)
         return Image.fromarray(colored)
-    return last_input_map[channel_name], to_pil(pred_slice), target_map[channel_name]
 def forecast_controller(date_str, hour, minute, forecast_horizon):
     yield {
@@ -274,11 +273,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         """
         <div align='center'>
         # ☀️ Surya: Live Forecast Demo ☀️
-        ### A Foundation Model for Solar Dynamics
         This demo runs NASA's **Surya**, a foundation model trained to understand the physics of the Sun.
         It looks at the Sun in 13 different channels (wavelengths of light) simultaneously to learn the complex relationships between phenomena like coronal loops, magnetic fields, and solar flares. By seeing these interconnected views, it can generate a holistic forecast of what the entire solar disk will look like in the near future.
-        <br>
-        <p style="color:red;font-weight:bold;">NOTE: This demo uses lower-quality browse images for reliability. The model was trained on high-fidelity scientific data, so forecast accuracy may vary.</p>
         </div>
         """
     )

 import logging
 import datetime
 import matplotlib.pyplot as plt
+import sunpy.map
+import sunpy.net.attrs as a
+from sunpy.net import Fido
+from astropy.wcs import WCS
+import astropy.units as u
+from reproject import reproject_interp
 import traceback
 from io import BytesIO
 import re
 APP_CACHE = {}
+SDO_CHANNELS_MAP = {
+    "aia94": (a.Wavelength(94 * u.angstrom), a.Sample(12 * u.s)),
+    "aia131": (a.Wavelength(131 * u.angstrom), a.Sample(12 * u.s)),
+    "aia171": (a.Wavelength(171 * u.angstrom), a.Sample(12 * u.s)),
+    "aia193": (a.Wavelength(193 * u.angstrom), a.Sample(12 * u.s)),
+    "aia211": (a.Wavelength(211 * u.angstrom), a.Sample(12 * u.s)),
+    "aia304": (a.Wavelength(304 * u.angstrom), a.Sample(12 * u.s)),
+    "aia335": (a.Wavelength(335 * u.angstrom), a.Sample(12 * u.s)),
+    "aia1600": (a.Wavelength(1600 * u.angstrom), a.Sample(24 * u.s)),
+    "hmi_m": (a.Physobs("intensity"), a.Sample(45 * u.s)),
+    "hmi_bx": (a.Physobs("los_magnetic_field"), a.Sample(720 * u.s)),
+    "hmi_by": (a.Physobs("los_magnetic_field"), a.Sample(720 * u.s)),
+    "hmi_bz": (a.Physobs("los_magnetic_field"), a.Sample(720 * u.s)),
+    "hmi_v": (a.Physobs("los_velocity"), a.Sample(45 * u.s)),
 }
+SDO_CHANNELS = list(SDO_CHANNELS_MAP.keys())
 def setup_and_load_model():
     if "model" in APP_CACHE:
     APP_CACHE["model"] = model
     yield "✅ Model setup complete."
 def fetch_and_process_sdo_data(target_dt, forecast_horizon_minutes):
     config = APP_CACHE["config"]
     img_size = config["model"]["img_size"]
     target_time = target_dt + datetime.timedelta(minutes=forecast_horizon_minutes)
     all_times = sorted(list(set(input_times + [target_time])))
+    data_maps = {}
+    last_successful_map = {} # Store the last good map for each channel
     total_fetches = len(all_times) * len(SDO_CHANNELS)
     fetches_done = 0
+    yield f"Starting search for {total_fetches} scientific data files..."
     for t in all_times:
+        data_maps[t] = {}
         for channel in SDO_CHANNELS:
             fetches_done += 1
+            yield f"Querying [{fetches_done}/{total_fetches}]: {channel} near {t.strftime('%Y-%m-%d %H:%M')}..."
+            # Handle placeholder channels by reusing hmi_bx
+            if channel in ["hmi_by", "hmi_bz"]:
+                if data_maps[t].get("hmi_bx"):
+                    smap = data_maps[t]["hmi_bx"]
+                    data_maps[t][channel] = smap
+                    last_successful_map[channel] = smap
+                continue
+            physobs, sample = SDO_CHANNELS_MAP[channel]
+            time_attr = a.Time(t - datetime.timedelta(minutes=5), t + datetime.timedelta(minutes=5))
+            instrument = a.Instrument.hmi if "hmi" in channel else a.Instrument.aia
+            query = Fido.search(time_attr, instrument, physobs, sample)
+            if query:
+                files = Fido.fetch(query[0,0], path="./data/sdo_cache")
+                smap = sunpy.map.Map(files[0])
+                data_maps[t][channel] = smap
+                last_successful_map[channel] = smap # Save the good map
+            elif channel in last_successful_map:
+                # If the query fails, reuse the last successful map for this channel
+                yield f"⚠️ WARNING: No data for {channel} near {t}. Reusing previous image."
+                data_maps[t][channel] = last_successful_map[channel]
+            else:
+                # If the very first image for a channel fails, we cannot proceed.
+                raise ValueError(f"CRITICAL: No initial data found for {channel}. Cannot proceed.")
+    yield "✅ All data acquired. Starting preprocessing..."
+    output_wcs = WCS(naxis=2)
+    output_wcs.wcs.crpix = [(img_size + 1) / 2, (img_size + 1) / 2]
+    output_wcs.wcs.cdelt = np.array([-1.2, 1.2]) * u.arcsec
+    output_wcs.wcs.crval = [0, 0] * u.arcsec
+    output_wcs.wcs.ctype = ['HPLN-TAN', 'HPLT-TAN']
     scalers_dict = APP_CACHE["scalers"]
     processed_tensors = {}
+    for t, channel_maps in data_maps.items():
         channel_tensors = []
         for i, channel in enumerate(SDO_CHANNELS):
+            smap = channel_maps[channel]
+            reprojected_data, _ = reproject_interp(smap, output_wcs, shape_out=(img_size, img_size))
+            exp_time = smap.meta.get('exptime', 1.0)
+            if exp_time is None or exp_time <= 0: exp_time = 1.0
+            norm_data = reprojected_data / exp_time
             scaler = scalers_dict[channel]
             scaled_data = scaler.transform(norm_data.reshape(-1, 1)).reshape(norm_data.shape)
     yield "✅ Preprocessing complete."
     input_tensor_list = [processed_tensors[t] for t in input_times]
     input_tensor = torch.stack(input_tensor_list, dim=1).unsqueeze(0)
+    target_map = data_maps[target_time]
+    last_input_map = data_maps[input_times[-1]]
+    yield (input_tensor, last_input_map, target_map)
 def run_inference(input_tensor):
     model = APP_CACHE["model"]
     if last_input_map is None: return None, None, None
     c_idx = SDO_CHANNELS.index(channel_name)
     scaler = APP_CACHE["scalers"][channel_name]
+    mean, std, epsilon, sl_scale_factor = scaler.mean, scaler.std, scaler.epsilon, scaler.sl_scale_factor
     pred_slice = inverse_transform_single_channel(
         prediction_tensor[0, c_idx].numpy(), mean=mean, std=std, epsilon=epsilon, sl_scale_factor=sl_scale_factor
     )
+    vmax = np.quantile(np.nan_to_num(target_map[channel_name].data), 0.995)
     cmap_name = f"sdoaia{channel_name.replace('aia', '')}" if 'aia' in channel_name else 'hmimag'
     cmap = plt.get_cmap(sunpy_cm.cmlist.get(cmap_name, 'gray'))
         colored = (cmap(data_norm)[:, :, :3] * 255).astype(np.uint8)
         return Image.fromarray(colored)
+    return to_pil(last_input_map[channel_name].data), to_pil(pred_slice), to_pil(target_map[channel_name].data)
 def forecast_controller(date_str, hour, minute, forecast_horizon):
     yield {
         """
         <div align='center'>
         # ☀️ Surya: Live Forecast Demo ☀️
+        ### A Foundation Model for Solar Dynamics using High-Fidelity Scientific Data
         This demo runs NASA's **Surya**, a foundation model trained to understand the physics of the Sun.
         It looks at the Sun in 13 different channels (wavelengths of light) simultaneously to learn the complex relationships between phenomena like coronal loops, magnetic fields, and solar flares. By seeing these interconnected views, it can generate a holistic forecast of what the entire solar disk will look like in the near future.
         </div>
         """
     )