Tonic commited on
Commit
efd66db
·
1 Parent(s): cc4cdeb

attempt to solve gpu error

Browse files
Files changed (1) hide show
  1. app.py +49 -86
app.py CHANGED
@@ -356,100 +356,63 @@ def make_prediction(symbol: str, timeframe: str = "1d", prediction_days: int = 5
356
  # Move the entire model and all its components to GPU
357
  pipe.model = pipe.model.to(device)
358
 
359
- # Ensure all model parameters and buffers are on GPU
360
  for param in pipe.model.parameters():
361
  param.data = param.data.to(device)
362
  for buffer in pipe.model.buffers():
363
  buffer.data = buffer.data.to(device)
364
 
365
- # Move any registered buffers or parameters in submodules
366
  for module in pipe.model.modules():
367
- if hasattr(module, 'register_buffer'):
368
- for name, buffer in module._buffers.items():
369
- if buffer is not None and hasattr(buffer, 'to'):
370
- module._buffers[name] = buffer.to(device)
371
- if hasattr(module, 'register_parameter'):
372
- for name, param in module._parameters.items():
373
- if param is not None and hasattr(param, 'to'):
374
- module._parameters[name] = param.to(device)
375
 
376
- # Use predict_quantiles with proper formatting
377
- with torch.amp.autocast('cuda'):
378
- # Ensure all inputs are on GPU
379
- context = context.to(device)
380
-
381
- # Move quantile levels to GPU
382
- quantile_levels = torch.tensor([0.1, 0.5, 0.9], device=device, dtype=dtype)
383
-
384
- # Ensure prediction length is on GPU
385
- prediction_length = torch.tensor(actual_prediction_length, device=device, dtype=torch.long)
386
-
387
- # Force all model components to GPU
388
- pipe.model = pipe.model.to(device)
389
- if hasattr(pipe, 'tokenizer') and hasattr(pipe.tokenizer, 'to'):
390
- pipe.tokenizer = pipe.tokenizer.to(device)
391
-
392
- # Ensure all model states are on GPU
393
- if hasattr(pipe.model, 'state_dict'):
394
- state_dict = pipe.model.state_dict()
395
- for key in state_dict:
396
- if isinstance(state_dict[key], torch.Tensor):
397
- state_dict[key] = state_dict[key].to(device)
398
- pipe.model.load_state_dict(state_dict)
399
-
400
- # Ensure all model attributes are on GPU
401
- for attr_name in dir(pipe.model):
402
- attr = getattr(pipe.model, attr_name)
403
- if isinstance(attr, torch.Tensor) and hasattr(attr, 'to'):
404
- setattr(pipe.model, attr_name, attr.to(device))
405
-
406
- # Ensure all model submodules are on GPU
407
- for name, module in pipe.model.named_modules():
408
- if hasattr(module, 'to'):
409
- module.to(device)
410
-
411
- # Ensure all model buffers are on GPU
412
- for name, buffer in pipe.model.named_buffers():
413
- if buffer is not None and hasattr(buffer, 'to'):
414
- pipe.model.register_buffer(name, buffer.to(device))
415
-
416
- # Ensure all model parameters are on GPU
417
- for name, param in pipe.model.named_parameters():
418
- if param is not None and hasattr(param, 'to'):
419
- param.data = param.data.to(device)
420
-
421
- # Ensure all model attributes that might contain tensors are on GPU
422
- for name, value in pipe.model.__dict__.items():
423
- if isinstance(value, torch.Tensor) and hasattr(value, 'to'):
424
- pipe.model.__dict__[name] = value.to(device)
425
-
426
- # Move any additional model components to GPU
427
- if hasattr(pipe.model, 'config'):
428
- for key, value in pipe.model.config.__dict__.items():
429
- if isinstance(value, torch.Tensor) and hasattr(value, 'to'):
430
- setattr(pipe.model.config, key, value.to(device))
431
-
432
- # Ensure all model components are in eval mode
433
- pipe.model.eval()
434
-
435
- # Move any additional tensors in the pipeline to GPU
436
- for attr_name in dir(pipe):
437
- attr = getattr(pipe, attr_name)
438
- if isinstance(attr, torch.Tensor) and hasattr(attr, 'to'):
439
- setattr(pipe, attr_name, attr.to(device))
440
-
441
- # Ensure context is properly shaped and on GPU
442
- if len(context.shape) == 1:
443
- context = context.unsqueeze(0)
444
- context = context.to(device)
445
 
446
- # Ensure all inputs are on the same device
447
- with torch.cuda.device(device):
448
- quantiles, mean = pipe.predict_quantiles(
449
- context=context,
450
- prediction_length=actual_prediction_length,
451
- quantile_levels=[0.1, 0.5, 0.9]
452
- )
453
 
454
  if quantiles is None or mean is None:
455
  raise ValueError("Chronos returned empty prediction")
 
356
  # Move the entire model and all its components to GPU
357
  pipe.model = pipe.model.to(device)
358
 
359
+ # Move all model parameters and buffers to GPU
360
  for param in pipe.model.parameters():
361
  param.data = param.data.to(device)
362
  for buffer in pipe.model.buffers():
363
  buffer.data = buffer.data.to(device)
364
 
365
+ # Move all model submodules to GPU
366
  for module in pipe.model.modules():
367
+ if hasattr(module, 'to'):
368
+ module.to(device)
 
 
 
 
 
 
369
 
370
+ # Move all model attributes to GPU
371
+ for name, value in pipe.model.__dict__.items():
372
+ if isinstance(value, torch.Tensor):
373
+ pipe.model.__dict__[name] = value.to(device)
374
+
375
+ # Move all model config tensors to GPU
376
+ if hasattr(pipe.model, 'config'):
377
+ for key, value in pipe.model.config.__dict__.items():
378
+ if isinstance(value, torch.Tensor):
379
+ setattr(pipe.model.config, key, value.to(device))
380
+
381
+ # Move all pipeline tensors to GPU
382
+ for name, value in pipe.__dict__.items():
383
+ if isinstance(value, torch.Tensor):
384
+ setattr(pipe, name, value.to(device))
385
+
386
+ # Ensure all model states are on GPU
387
+ if hasattr(pipe.model, 'state_dict'):
388
+ state_dict = pipe.model.state_dict()
389
+ for key in state_dict:
390
+ if isinstance(state_dict[key], torch.Tensor):
391
+ state_dict[key] = state_dict[key].to(device)
392
+ pipe.model.load_state_dict(state_dict)
393
+
394
+ # Move any additional components to GPU
395
+ if hasattr(pipe, 'tokenizer'):
396
+ for name, value in pipe.tokenizer.__dict__.items():
397
+ if isinstance(value, torch.Tensor):
398
+ setattr(pipe.tokenizer, name, value.to(device))
399
+
400
+ # Ensure context is properly shaped and on GPU
401
+ if len(context.shape) == 1:
402
+ context = context.unsqueeze(0)
403
+ context = context.to(device)
404
+
405
+ # Ensure all inputs are on the same device
406
+ with torch.cuda.device(device):
407
+ # Force synchronization to ensure all tensors are on GPU
408
+ torch.cuda.synchronize()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
 
410
+ # Make prediction
411
+ quantiles, mean = pipe.predict_quantiles(
412
+ context=context,
413
+ prediction_length=actual_prediction_length,
414
+ quantile_levels=[0.1, 0.5, 0.9]
415
+ )
 
416
 
417
  if quantiles is None or mean is None:
418
  raise ValueError("Chronos returned empty prediction")