Spaces:

Zakia
/

deepseek-r1-demo

Sleeping

Zakia commited on Feb 1

Commit

cda3c49

verified ·

1 Parent(s): 2fa9a9c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,22 +2,28 @@ import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
-# Select the best distill model for Hugging Face Spaces
 model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-# Load model with quantization for optimized performance
-quantization_config = BitsAndBytesConfig(load_in_8bit=True)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
-    quantization_config=quantization_config,
     device_map="auto",
     trust_remote_code=True
 )
-# Define the text generation function
 def generate_response(prompt):
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
@@ -29,7 +35,7 @@ interface = gr.Interface(
     fn=generate_response,
     inputs=gr.Textbox(label="Enter your prompt"),
     outputs=gr.Textbox(label="AI Response"),
-    title="DeepSeek-R1 Distilled LLaMA Chatbot",
     description="Enter a prompt and receive a response from DeepSeek-R1-Distill-Llama-8B."
 )

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+# Use a more compatible DeepSeek model
 model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+# Fix quantization issue by using 4-bit
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,  # Use 4-bit instead of 8-bit
+    bnb_4bit_compute_dtype=torch.float16,  # Use FP16 for better compatibility
+    bnb_4bit_use_double_quant=True,  # Enable double quantization for efficiency
+)
+# Load model with optimized quantization
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     device_map="auto",
+    quantization_config=quantization_config,
     trust_remote_code=True
 )
+# Define text generation function
 def generate_response(prompt):
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
     fn=generate_response,
     inputs=gr.Textbox(label="Enter your prompt"),
     outputs=gr.Textbox(label="AI Response"),
+    title="DeepSeek-R1 Distill LLaMA Chatbot",
     description="Enter a prompt and receive a response from DeepSeek-R1-Distill-Llama-8B."
 )