jorker121 commited on
Commit
3ed4994
Β·
1 Parent(s): 903a631

Deploy DeepSeek LLM chatbotV2

Browse files
Files changed (2) hide show
  1. app.py +113 -12
  2. requirements.txt +1 -0
app.py CHANGED
@@ -5,46 +5,147 @@ import torch
5
  # Load DeepSeek LLM
6
  model_name = "deepseek-ai/deepseek-llm-7b-chat"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  SYSTEM_PROMPT = "You are a helpful AI assistant. Keep responses concise and informative."
11
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def generate_response(message, history):
13
  history = history or []
14
  history.append(("User", message))
15
-
16
- # Add system message for better guidance
17
- input_text = f"[SYSTEM] {SYSTEM_PROMPT}\n" + tokenizer.apply_chat_template(history, tokenize=False, add_generation_prompt=True)
 
 
 
 
 
 
 
 
 
 
 
18
  inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
19
 
20
  streamer = tokenizer.streamer()
21
  model.generate(**inputs, streamer=streamer, max_length=512, pad_token_id=tokenizer.eos_token_id)
22
-
23
  bot_message = ""
24
  for token in streamer:
25
  bot_message += token
26
  yield bot_message
27
 
28
- # Create Gradio Chatbot UI with streaming
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  with gr.Blocks() as demo:
30
- gr.Markdown("### πŸš€ DeepSeek LLM Chatbot (Streaming & Improved UI)")
31
 
32
  chatbot = gr.Chatbot()
33
- msg = gr.Textbox(placeholder="Type your message here...", label="Your Message")
34
  clear_btn = gr.Button("Clear Chat")
35
 
36
  def respond(message, history):
37
- history = history or []
38
- bot_response = generate_response(message, history)
39
- return bot_response, history + [("User", message), ("Bot", bot_response)]
40
-
41
  msg.submit(respond, inputs=[msg, chatbot], outputs=[chatbot, msg])
42
  clear_btn.click(lambda: ([], ""), outputs=[chatbot, msg])
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  demo.launch()
45
 
46
 
47
 
 
 
 
48
  # with gr.Blocks() as demo:
49
  # gr.Markdown("### πŸš€ DeepSeek LLM Chatbot (Streaming Enabled)")
50
  # chat = gr.ChatInterface(fn=generate_response)
 
5
  # Load DeepSeek LLM
6
  model_name = "deepseek-ai/deepseek-llm-7b-chat"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ # model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
9
+
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ model_name,
12
+ torch_dtype=torch.float16,
13
+ device_map="auto",
14
+ offload_folder="offload_weights"
15
+ )
16
+ '''
17
+ ValueError: The current device_map had weights offloaded to the disk. Please provide an offload_folder f
18
+ or them. Alternatively, make sure you have safetensors installed if the model you are using offers the
19
+ weights in this format.
20
+
21
+ '''
22
 
23
  SYSTEM_PROMPT = "You are a helpful AI assistant. Keep responses concise and informative."
24
 
25
+
26
+ import wikipediaapi
27
+
28
+ wiki_wiki = wikipediaapi.Wikipedia('en')
29
+
30
+ def fetch_wikipedia(query):
31
+ """Fetch a summary from Wikipedia"""
32
+ page = wiki_wiki.page(query)
33
+ if page.exists():
34
+ return page.summary[:500] # Limit to 500 chars
35
+ return "I couldn't find relevant Wikipedia information on that topic."
36
+
37
  def generate_response(message, history):
38
  history = history or []
39
  history.append(("User", message))
40
+
41
+ # Check if the user asks for factual info
42
+ if "wikipedia" in message.lower():
43
+ query = message.lower().replace("wikipedia", "").strip()
44
+ wiki_info = fetch_wikipedia(query)
45
+ history.append(("Bot", wiki_info))
46
+ return history, ""
47
+
48
+ # Default chatbot behavior
49
+ chat_history = ""
50
+ for user, bot in history[-5:]:
51
+ chat_history += f"User: {user}\nBot: {bot}\n"
52
+
53
+ input_text = f"[SYSTEM] {SYSTEM_PROMPT}\n{chat_history}User: {message}\nBot:"
54
  inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
55
 
56
  streamer = tokenizer.streamer()
57
  model.generate(**inputs, streamer=streamer, max_length=512, pad_token_id=tokenizer.eos_token_id)
58
+
59
  bot_message = ""
60
  for token in streamer:
61
  bot_message += token
62
  yield bot_message
63
 
64
+ history.append(("Bot", bot_message))
65
+ return history, ""
66
+
67
+
68
+ # # Function to handle chat with memory
69
+ # def generate_response(message, history):
70
+ # history = history or []
71
+ # history.append(("User", message))
72
+
73
+ # # Format history for the model
74
+ # chat_history = ""
75
+ # for user, bot in history[-5:]: # Limit history to last 5 exchanges to avoid exceeding token limit
76
+ # chat_history += f"User: {user}\nBot: {bot}\n"
77
+
78
+ # input_text = f"[SYSTEM] {SYSTEM_PROMPT}\n{chat_history}User: {message}\nBot:"
79
+ # inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
80
+
81
+ # streamer = tokenizer.streamer()
82
+ # model.generate(**inputs, streamer=streamer, max_length=512, pad_token_id=tokenizer.eos_token_id)
83
+
84
+ # bot_message = ""
85
+ # for token in streamer:
86
+ # bot_message += token
87
+ # yield bot_message
88
+
89
+ # history.append(("Bot", bot_message))
90
+ # return history, ""
91
+
92
+ # def generate_response(message, history):
93
+ # history = history or []
94
+ # history.append(("User", message))
95
+
96
+ # # Add system message for better guidance
97
+ # input_text = f"[SYSTEM] {SYSTEM_PROMPT}\n" + tokenizer.apply_chat_template(history, tokenize=False, add_generation_prompt=True)
98
+ # inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
99
+
100
+ # streamer = tokenizer.streamer()
101
+ # model.generate(**inputs, streamer=streamer, max_length=512, pad_token_id=tokenizer.eos_token_id)
102
+
103
+ # bot_message = ""
104
+ # for token in streamer:
105
+ # bot_message += token
106
+ # yield bot_message
107
+
108
+
109
  with gr.Blocks() as demo:
110
+ gr.Markdown("# πŸš€ DeepSeek LLM Chatbot with Memory & Wikipedia API")
111
 
112
  chatbot = gr.Chatbot()
113
+ msg = gr.Textbox(placeholder="Ask me anything...", label="Your Message")
114
  clear_btn = gr.Button("Clear Chat")
115
 
116
  def respond(message, history):
117
+ history, bot_message = generate_response(message, history)
118
+ return history, bot_message
119
+
 
120
  msg.submit(respond, inputs=[msg, chatbot], outputs=[chatbot, msg])
121
  clear_btn.click(lambda: ([], ""), outputs=[chatbot, msg])
122
 
123
+
124
+
125
+ # # Create Gradio Chatbot UI with streaming
126
+ # with gr.Blocks() as demo:
127
+ # gr.Markdown("### πŸš€ DeepSeek LLM Chatbot (Streaming & Improved UI)")
128
+
129
+ # chatbot = gr.Chatbot()
130
+ # msg = gr.Textbox(placeholder="Type your message here...", label="Your Message")
131
+ # clear_btn = gr.Button("Clear Chat")
132
+
133
+ # def respond(message, history):
134
+ # history = history or []
135
+ # bot_response = generate_response(message, history)
136
+ # return bot_response, history + [("User", message), ("Bot", bot_response)]
137
+
138
+ # msg.submit(respond, inputs=[msg, chatbot], outputs=[chatbot, msg])
139
+ # clear_btn.click(lambda: ([], ""), outputs=[chatbot, msg])
140
+
141
+
142
  demo.launch()
143
 
144
 
145
 
146
+
147
+
148
+
149
  # with gr.Blocks() as demo:
150
  # gr.Markdown("### πŸš€ DeepSeek LLM Chatbot (Streaming Enabled)")
151
  # chat = gr.ChatInterface(fn=generate_response)
requirements.txt CHANGED
@@ -2,3 +2,4 @@ gradio
2
  transformers
3
  torch
4
  accelerate
 
 
2
  transformers
3
  torch
4
  accelerate
5
+ wikipedia-api