draft-docker

Running

harsh-manvar commited on Dec 27, 2024

Commit

f71f3be

verified ·

1 Parent(s): 8f95bbc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,7 +5,9 @@ from vllm import LLM, SamplingParams
 # Load the model and tokenizer from Hugging Face
 model_name = "facebook/opt-125m"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-vllm_model = LLM(model="facebook/opt-125m")
 def generate_response(prompt, max_tokens, temperature, top_p):
     # Tokenize the prompt
@@ -27,8 +29,8 @@ def generate_response(prompt, max_tokens, temperature, top_p):
 # Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("# 🚀 Hugging Face Integration with vLLM")
-    gr.Markdown("Generate text using the vLLM integration with Hugging Face models.")
     with gr.Row():
         with gr.Column():
@@ -74,4 +76,4 @@ with gr.Blocks() as demo:
     )
 # Launch the app
-demo.launch()

 # Load the model and tokenizer from Hugging Face
 model_name = "facebook/opt-125m"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Initialize vLLM with CPU-only configuration
+vllm_model = LLM(model=model_name, tensor_parallel_size=1, device="cpu")
 def generate_response(prompt, max_tokens, temperature, top_p):
     # Tokenize the prompt
 # Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("# 🚀 Hugging Face Integration with vLLM (CPU)")
+    gr.Markdown("Generate text using the vLLM integration with Hugging Face models on CPU.")
     with gr.Row():
         with gr.Column():
     )
 # Launch the app
+demo.launch()