Spaces:

sergiopaniego
/

LongerThinkingLLM

Sleeping

sergiopaniego HF Staff commited on Jan 8

Commit

2cb48ed

1 Parent(s): 66c19f0

test

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import subprocess
 import torch
 import gradio as gr
 from huggingface_hub import InferenceClient
 from vllm import LLM
 from sal.models.reward_models import RLHFFlow
@@ -17,6 +18,12 @@ if not os.path.exists("search-and-learn"):
     subprocess.run(["pip", "install", "-e", "./search-and-learn[dev]"])
 model_path = "meta-llama/Llama-3.2-1B-Instruct"
 prm_path = "RLHFlow/Llama3.1-8B-PRM-Deepseek-Data"
@@ -25,6 +32,7 @@ llm = LLM(
     gpu_memory_utilization=0.5,  # Utilize 50% of GPU memory
     enable_prefix_caching=True,  # Optimize repeated prefix computations
     seed=42,  # Set seed for reproducibility
 )

 import torch
 import gradio as gr
 from huggingface_hub import InferenceClient
+from vllm.config import DeviceConfig
 from vllm import LLM
 from sal.models.reward_models import RLHFFlow
     subprocess.run(["pip", "install", "-e", "./search-and-learn[dev]"])
+device_config = DeviceConfig(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
+print('device_config', device_config)
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print('device', device)
 model_path = "meta-llama/Llama-3.2-1B-Instruct"
 prm_path = "RLHFlow/Llama3.1-8B-PRM-Deepseek-Data"
     gpu_memory_utilization=0.5,  # Utilize 50% of GPU memory
     enable_prefix_caching=True,  # Optimize repeated prefix computations
     seed=42,  # Set seed for reproducibility
+    config=device_config
 )