sergiopaniego HF Staff commited on
Commit
66c19f0
·
1 Parent(s): 99dc488
Files changed (2) hide show
  1. app.py +15 -0
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,7 +1,10 @@
1
  import os
2
  import subprocess
 
3
  import gradio as gr
4
  from huggingface_hub import InferenceClient
 
 
5
 
6
  """
7
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
@@ -14,6 +17,18 @@ if not os.path.exists("search-and-learn"):
14
  subprocess.run(["pip", "install", "-e", "./search-and-learn[dev]"])
15
 
16
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def respond(
18
  message,
19
  history: list[tuple[str, str]],
 
1
  import os
2
  import subprocess
3
+ import torch
4
  import gradio as gr
5
  from huggingface_hub import InferenceClient
6
+ from vllm import LLM
7
+ from sal.models.reward_models import RLHFFlow
8
 
9
  """
10
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 
17
  subprocess.run(["pip", "install", "-e", "./search-and-learn[dev]"])
18
 
19
 
20
+ model_path = "meta-llama/Llama-3.2-1B-Instruct"
21
+ prm_path = "RLHFlow/Llama3.1-8B-PRM-Deepseek-Data"
22
+
23
+ llm = LLM(
24
+ model=model_path,
25
+ gpu_memory_utilization=0.5, # Utilize 50% of GPU memory
26
+ enable_prefix_caching=True, # Optimize repeated prefix computations
27
+ seed=42, # Set seed for reproducibility
28
+ )
29
+
30
+
31
+
32
  def respond(
33
  message,
34
  history: list[tuple[str, str]],
requirements.txt CHANGED
@@ -1 +1,3 @@
1
- huggingface_hub==0.25.2
 
 
 
1
+ huggingface_hub==0.25.2
2
+ torch
3
+ vllm