Spaces:

Zakia
/

deepseek-r1-demo

Sleeping

Zakia commited on Feb 1

Commit

e158ce7

verified ·

1 Parent(s): cda3c49

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,43 +1,3 @@
 import gradio as gr
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
-# Use a more compatible DeepSeek model
-model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
-# Load tokenizer
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-# Fix quantization issue by using 4-bit
-quantization_config = BitsAndBytesConfig(
-    load_in_4bit=True,  # Use 4-bit instead of 8-bit
-    bnb_4bit_compute_dtype=torch.float16,  # Use FP16 for better compatibility
-    bnb_4bit_use_double_quant=True,  # Enable double quantization for efficiency
-)
-# Load model with optimized quantization
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    device_map="auto",
-    quantization_config=quantization_config,
-    trust_remote_code=True
-)
-# Define text generation function
-def generate_response(prompt):
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    with torch.no_grad():
-        output = model.generate(**inputs, max_length=150)
-    return tokenizer.decode(output[0], skip_special_tokens=True)
-# Set up Gradio UI
-interface = gr.Interface(
-    fn=generate_response,
-    inputs=gr.Textbox(label="Enter your prompt"),
-    outputs=gr.Textbox(label="AI Response"),
-    title="DeepSeek-R1 Distill LLaMA Chatbot",
-    description="Enter a prompt and receive a response from DeepSeek-R1-Distill-Llama-8B."
-)
-# Launch the app
-interface.launch()


1	import gradio as gr


2
3	+ gr.load("models/deepseek-ai/DeepSeek-R1").launch()