Spaces:

sergiopaniego
/

LongerThinkingLLM

Sleeping

sergiopaniego HF Staff commited on Jan 8

Commit

66c19f0

1 Parent(s): 99dc488

Vllm test

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,7 +1,10 @@
 import os
 import subprocess
 import gradio as gr
 from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
@@ -14,6 +17,18 @@ if not os.path.exists("search-and-learn"):
     subprocess.run(["pip", "install", "-e", "./search-and-learn[dev]"])
 def respond(
     message,
     history: list[tuple[str, str]],

 import os
 import subprocess
+import torch
 import gradio as gr
 from huggingface_hub import InferenceClient
+from vllm import LLM
+from sal.models.reward_models import RLHFFlow
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
     subprocess.run(["pip", "install", "-e", "./search-and-learn[dev]"])
+model_path = "meta-llama/Llama-3.2-1B-Instruct"
+prm_path = "RLHFlow/Llama3.1-8B-PRM-Deepseek-Data"
+llm = LLM(
+    model=model_path,
+    gpu_memory_utilization=0.5,  # Utilize 50% of GPU memory
+    enable_prefix_caching=True,  # Optimize repeated prefix computations
+    seed=42,  # Set seed for reproducibility
+)
 def respond(
     message,
     history: list[tuple[str, str]],

requirements.txt CHANGED Viewed

+huggingface_hub==0.25.2
+torch
+vllm