Spaces:
Sleeping
Sleeping
File size: 5,817 Bytes
903a631 689b966 903a631 d6c2c58 689b966 d6c2c58 689b966 d6c2c58 689b966 3ed4994 903a631 3ed4994 be34538 3ed4994 903a631 3ed4994 903a631 3ed4994 903a631 3ed4994 903a631 3ed4994 903a631 3ed4994 903a631 3ed4994 903a631 3ed4994 903a631 3ed4994 903a631 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import gradio as gr
from transformers import LlamaTokenizer, AutoModelForCausalLM, AutoTokenizer
import torch
# Load DeepSeek LLM
model_name = "deepseek-ai/deepseek-llm-7b-chat"
# llm = ChatOllama(model="deepseek-r1:1.5b", temperature=0)
model_name = "deepseek-ai/deepseek-llm-7b-chat"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
# tokenizer = LlamaTokenizer.from_pretrained(model_name) # Explicitly use LlamaTokenizer
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
offload_folder="offload_weights"
)
'''
ValueError: The current device_map had weights offloaded to the disk. Please provide an offload_folder f
or them. Alternatively, make sure you have safetensors installed if the model you are using offers the
weights in this format.
'''
SYSTEM_PROMPT = "You are a helpful AI assistant. Keep responses concise and informative."
import wikipediaapi
# wiki_wiki = wikipediaapi.Wikipedia('en')
# Specify a valid user-agent string
wiki_wiki = wikipediaapi.Wikipedia(
language='en',
user_agent='Chatbot/1.0 ([email protected])'
)
def fetch_wikipedia(query):
"""Fetch a summary from Wikipedia"""
page = wiki_wiki.page(query)
if page.exists():
return page.summary[:500] # Limit to 500 chars
return "I couldn't find relevant Wikipedia information on that topic."
def generate_response(message, history):
history = history or []
history.append(("User", message))
# Check if the user asks for factual info
if "wikipedia" in message.lower():
query = message.lower().replace("wikipedia", "").strip()
wiki_info = fetch_wikipedia(query)
history.append(("Bot", wiki_info))
return history, ""
# Default chatbot behavior
chat_history = ""
for user, bot in history[-5:]:
chat_history += f"User: {user}\nBot: {bot}\n"
input_text = f"[SYSTEM] {SYSTEM_PROMPT}\n{chat_history}User: {message}\nBot:"
inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
streamer = tokenizer.streamer()
model.generate(**inputs, streamer=streamer, max_length=512, pad_token_id=tokenizer.eos_token_id)
bot_message = ""
for token in streamer:
bot_message += token
yield bot_message
history.append(("Bot", bot_message))
return history, ""
# # Function to handle chat with memory
# def generate_response(message, history):
# history = history or []
# history.append(("User", message))
# # Format history for the model
# chat_history = ""
# for user, bot in history[-5:]: # Limit history to last 5 exchanges to avoid exceeding token limit
# chat_history += f"User: {user}\nBot: {bot}\n"
# input_text = f"[SYSTEM] {SYSTEM_PROMPT}\n{chat_history}User: {message}\nBot:"
# inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
# streamer = tokenizer.streamer()
# model.generate(**inputs, streamer=streamer, max_length=512, pad_token_id=tokenizer.eos_token_id)
# bot_message = ""
# for token in streamer:
# bot_message += token
# yield bot_message
# history.append(("Bot", bot_message))
# return history, ""
# def generate_response(message, history):
# history = history or []
# history.append(("User", message))
# # Add system message for better guidance
# input_text = f"[SYSTEM] {SYSTEM_PROMPT}\n" + tokenizer.apply_chat_template(history, tokenize=False, add_generation_prompt=True)
# inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
# streamer = tokenizer.streamer()
# model.generate(**inputs, streamer=streamer, max_length=512, pad_token_id=tokenizer.eos_token_id)
# bot_message = ""
# for token in streamer:
# bot_message += token
# yield bot_message
with gr.Blocks() as demo:
gr.Markdown("# π DeepSeek LLM Chatbot with Memory & Wikipedia API")
chatbot = gr.Chatbot()
msg = gr.Textbox(placeholder="Ask me anything...", label="Your Message")
clear_btn = gr.Button("Clear Chat")
def respond(message, history):
history, bot_message = generate_response(message, history)
return history, bot_message
msg.submit(respond, inputs=[msg, chatbot], outputs=[chatbot, msg])
clear_btn.click(lambda: ([], ""), outputs=[chatbot, msg])
# # Create Gradio Chatbot UI with streaming
# with gr.Blocks() as demo:
# gr.Markdown("### π DeepSeek LLM Chatbot (Streaming & Improved UI)")
# chatbot = gr.Chatbot()
# msg = gr.Textbox(placeholder="Type your message here...", label="Your Message")
# clear_btn = gr.Button("Clear Chat")
# def respond(message, history):
# history = history or []
# bot_response = generate_response(message, history)
# return bot_response, history + [("User", message), ("Bot", bot_response)]
# msg.submit(respond, inputs=[msg, chatbot], outputs=[chatbot, msg])
# clear_btn.click(lambda: ([], ""), outputs=[chatbot, msg])
demo.launch()
# with gr.Blocks() as demo:
# gr.Markdown("### π DeepSeek LLM Chatbot (Streaming Enabled)")
# chat = gr.ChatInterface(fn=generate_response)
# demo.launch()
'''
β
Uses streaming (streamer=tokenizer.streamer())
β
Returns tokens in real-time instead of waiting for full response
β
Improved UI with gr.Blocks()
β
System prompt ensures responses are concise & helpful
β
Chat history is structured more clearly
β
Retains chat history
β
"Clear Chat" button
β
Better UI layout with Markdown & structured input boxes
''' |