Spaces:

druvx13
/

deepseek-coder-1.3b-instruct-GPTQ

Running

App Files Files Community

deepseek-coder-1.3b-instruct-GPTQ / app.py

druvx13

Create app.py

82b1c50 verified 8 days ago

raw

history blame

2.57 kB

	import gradio as gr
	from transformers import AutoTokenizer, pipeline
	from auto_gptq import AutoGPTQForCausalLM
	import torch
	import os

	# Model loading with memory optimization
	MODEL_NAME = "TheBloke/deepseek-coder-1.3b-instruct-GPTQ"
	cache_dir = "./model_cache"
	os.makedirs(cache_dir, exist_ok=True)

	# Load tokenizer and model with 4-bit quantization
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, cache_dir=cache_dir)
	model = AutoGPTQForCausalLM.from_quantized(
	MODEL_NAME,
	model_basename="model",
	use_safetensors=True,
	quantize_config=None,
	device_map="auto",
	low_cpu_mem_usage=True,
	cache_dir=cache_dir
	)

	# Create generation pipeline
	generator = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	device_map="auto"
	)

	def generate_text(prompt, max_length=512, temperature=0.7):
	"""Generate text with safety checks and context awareness"""
	full_prompt = f"Instruct: {prompt}\nOutput:"
	with torch.inference_mode():
	response = generator(
	full_prompt,
	max_new_tokens=max_length,
	temperature=temperature,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id
	)[0]["generated_text"]
	# Remove prompt from output
	return response.split("Output:")[-1].strip()

	# Gradio interface with enhanced UX
	with gr.Blocks(theme="soft", css=".gradio-container {max-width: 800px; margin: auto;}") as demo:
	gr.Markdown("""
	# 🧠 DeepSeek Coder 1.3B Instruct (GPTQ)
	Text-to-Code Generation App
	Enter a programming instruction below and adjust parameters for optimal output.
	""")

	with gr.Row():
	prompt = gr.Textbox(
	label="Enter your instruction",
	placeholder="Write a Python function to calculate Fibonacci numbers...",
	lines=4
	)

	with gr.Row():
	max_length = gr.Slider(64, 2048, value=512, label="Max Output Length")
	temperature = gr.Slider(0.1, 1.5, value=0.7, label="Creativity (Temperature)")

	output = gr.Textbox(label="Generated Output", lines=10)
	submit = gr.Button("✨ Generate Code", variant="primary")

	submit.click(
	fn=generate_text,
	inputs=[prompt, max_length, temperature],
	outputs=output
	)

	gr.Markdown("""
	### ℹ️ How it works
	- Uses GPTQ-quantized model for efficient inference
	- Automatically handles context window management
	- Temperature controls randomness (0.1=strict, 1.5=creative)
	""")

	demo.launch()