Spaces:

jorker121
/

my-chatbot

Sleeping

App Files Files Community

my-chatbot / app.py

jorker121

Deploy DeepSeek LLM chatbotV6

be34538 5 months ago

raw

history blame contribute delete

5.82 kB

	import gradio as gr
	from transformers import LlamaTokenizer, AutoModelForCausalLM, AutoTokenizer
	import torch

	# Load DeepSeek LLM
	model_name = "deepseek-ai/deepseek-llm-7b-chat"
	# llm = ChatOllama(model="deepseek-r1:1.5b", temperature=0)

	model_name = "deepseek-ai/deepseek-llm-7b-chat"

	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	# tokenizer = LlamaTokenizer.from_pretrained(model_name) # Explicitly use LlamaTokenizer
	# tokenizer = AutoTokenizer.from_pretrained(model_name)
	# model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16,
	device_map="auto",
	offload_folder="offload_weights"
	)
	'''
	ValueError: The current device_map had weights offloaded to the disk. Please provide an offload_folder f
	or them. Alternatively, make sure you have safetensors installed if the model you are using offers the
	weights in this format.

	'''

	SYSTEM_PROMPT = "You are a helpful AI assistant. Keep responses concise and informative."


	import wikipediaapi

	# wiki_wiki = wikipediaapi.Wikipedia('en')

	# Specify a valid user-agent string
	wiki_wiki = wikipediaapi.Wikipedia(
	language='en',
	user_agent='Chatbot/1.0 ([email protected])'
	)

	def fetch_wikipedia(query):
	"""Fetch a summary from Wikipedia"""
	page = wiki_wiki.page(query)
	if page.exists():
	return page.summary[:500] # Limit to 500 chars
	return "I couldn't find relevant Wikipedia information on that topic."

	def generate_response(message, history):
	history = history or []
	history.append(("User", message))

	# Check if the user asks for factual info
	if "wikipedia" in message.lower():
	query = message.lower().replace("wikipedia", "").strip()
	wiki_info = fetch_wikipedia(query)
	history.append(("Bot", wiki_info))
	return history, ""

	# Default chatbot behavior
	chat_history = ""
	for user, bot in history[-5:]:
	chat_history += f"User: {user}\nBot: {bot}\n"

	input_text = f"[SYSTEM] {SYSTEM_PROMPT}\n{chat_history}User: {message}\nBot:"
	inputs = tokenizer(input_text, return_tensors="pt").to("cuda")

	streamer = tokenizer.streamer()
	model.generate(**inputs, streamer=streamer, max_length=512, pad_token_id=tokenizer.eos_token_id)

	bot_message = ""
	for token in streamer:
	bot_message += token
	yield bot_message

	history.append(("Bot", bot_message))
	return history, ""


	# # Function to handle chat with memory
	# def generate_response(message, history):
	# history = history or []
	# history.append(("User", message))

	# # Format history for the model
	# chat_history = ""
	# for user, bot in history[-5:]: # Limit history to last 5 exchanges to avoid exceeding token limit
	# chat_history += f"User: {user}\nBot: {bot}\n"

	# input_text = f"[SYSTEM] {SYSTEM_PROMPT}\n{chat_history}User: {message}\nBot:"
	# inputs = tokenizer(input_text, return_tensors="pt").to("cuda")

	# streamer = tokenizer.streamer()
	# model.generate(**inputs, streamer=streamer, max_length=512, pad_token_id=tokenizer.eos_token_id)

	# bot_message = ""
	# for token in streamer:
	# bot_message += token
	# yield bot_message

	# history.append(("Bot", bot_message))
	# return history, ""

	# def generate_response(message, history):
	# history = history or []
	# history.append(("User", message))

	# # Add system message for better guidance
	# input_text = f"[SYSTEM] {SYSTEM_PROMPT}\n" + tokenizer.apply_chat_template(history, tokenize=False, add_generation_prompt=True)
	# inputs = tokenizer(input_text, return_tensors="pt").to("cuda")

	# streamer = tokenizer.streamer()
	# model.generate(**inputs, streamer=streamer, max_length=512, pad_token_id=tokenizer.eos_token_id)

	# bot_message = ""
	# for token in streamer:
	# bot_message += token
	# yield bot_message


	with gr.Blocks() as demo:
	gr.Markdown("# 🚀 DeepSeek LLM Chatbot with Memory & Wikipedia API")

	chatbot = gr.Chatbot()
	msg = gr.Textbox(placeholder="Ask me anything...", label="Your Message")
	clear_btn = gr.Button("Clear Chat")

	def respond(message, history):
	history, bot_message = generate_response(message, history)
	return history, bot_message

	msg.submit(respond, inputs=[msg, chatbot], outputs=[chatbot, msg])
	clear_btn.click(lambda: ([], ""), outputs=[chatbot, msg])



	# # Create Gradio Chatbot UI with streaming
	# with gr.Blocks() as demo:
	# gr.Markdown("### 🚀 DeepSeek LLM Chatbot (Streaming & Improved UI)")

	# chatbot = gr.Chatbot()
	# msg = gr.Textbox(placeholder="Type your message here...", label="Your Message")
	# clear_btn = gr.Button("Clear Chat")

	# def respond(message, history):
	# history = history or []
	# bot_response = generate_response(message, history)
	# return bot_response, history + [("User", message), ("Bot", bot_response)]

	# msg.submit(respond, inputs=[msg, chatbot], outputs=[chatbot, msg])
	# clear_btn.click(lambda: ([], ""), outputs=[chatbot, msg])


	demo.launch()






	# with gr.Blocks() as demo:
	# gr.Markdown("### 🚀 DeepSeek LLM Chatbot (Streaming Enabled)")
	# chat = gr.ChatInterface(fn=generate_response)

	# demo.launch()

	'''
	✅ Uses streaming (streamer=tokenizer.streamer())
	✅ Returns tokens in real-time instead of waiting for full response
	✅ Improved UI with gr.Blocks()

	✅ System prompt ensures responses are concise & helpful
	✅ Chat history is structured more clearly

	✅ Retains chat history
	✅ "Clear Chat" button
	✅ Better UI layout with Markdown & structured input boxes

	'''