Spaces:

Zakia
/

deepseek-r1-demo

Sleeping

Zakia commited on Feb 1

Commit

2fa9a9c

verified ·

1 Parent(s): 977ffc9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,18 +1,21 @@
 import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-# Model name
-model_name = "deepseek-ai/DeepSeek-R1"
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-# Load model with quantization
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     trust_remote_code=True
-).to("cuda" if torch.cuda.is_available() else "cpu")
 # Define the text generation function
 def generate_response(prompt):
@@ -26,9 +29,9 @@ interface = gr.Interface(
     fn=generate_response,
     inputs=gr.Textbox(label="Enter your prompt"),
     outputs=gr.Textbox(label="AI Response"),
-    title="DeepSeek-R1 Chatbot",
-    description="Enter a prompt and receive a response from DeepSeek-R1."
 )
 # Launch the app
-interface.launch()

 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+# Select the best distill model for Hugging Face Spaces
+model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+# Load model with quantization for optimized performance
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
+    quantization_config=quantization_config,
+    device_map="auto",
     trust_remote_code=True
+)
 # Define the text generation function
 def generate_response(prompt):
     fn=generate_response,
     inputs=gr.Textbox(label="Enter your prompt"),
     outputs=gr.Textbox(label="AI Response"),
+    title="DeepSeek-R1 Distilled LLaMA Chatbot",
+    description="Enter a prompt and receive a response from DeepSeek-R1-Distill-Llama-8B."
 )
 # Launch the app
+interface.launch()