openfree commited on
Commit
6d70605
·
verified ·
1 Parent(s): c92d933

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -45
app.py CHANGED
@@ -5,54 +5,33 @@ import torch
5
  from threading import Thread
6
 
7
  phi4_model_path = "microsoft/Phi-4-reasoning-plus"
8
- phi4_mini_model_path = "microsoft/Phi-4-mini-reasoning"
9
 
10
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
11
 
12
  phi4_model = AutoModelForCausalLM.from_pretrained(phi4_model_path, torch_dtype="auto").to(device)
13
  phi4_tokenizer = AutoTokenizer.from_pretrained(phi4_model_path)
14
- phi4_mini_model = AutoModelForCausalLM.from_pretrained(phi4_mini_model_path, torch_dtype="auto").to(device)
15
- phi4_mini_tokenizer = AutoTokenizer.from_pretrained(phi4_mini_model_path)
16
 
17
  @spaces.GPU(duration=60)
18
- def generate_response(user_message, model_name, max_tokens, temperature, top_k, top_p, repetition_penalty, history_state):
19
  if not user_message.strip():
20
  return history_state, history_state
21
 
22
- # Select models
23
- if model_name == "Phi-4":
24
- model = phi4_model
25
- tokenizer = phi4_tokenizer
26
- start_tag = "<|im_start|>"
27
- sep_tag = "<|im_sep|>"
28
- end_tag = "<|im_end|>"
29
- elif model_name == "Phi-4-mini-instruct":
30
- model = phi4_mini_model
31
- tokenizer = phi4_mini_tokenizer
32
- start_tag = ""
33
- sep_tag = ""
34
- end_tag = "<|end|>"
35
- else:
36
- raise ValueError("Error loading on models")
37
 
38
  # Recommended prompt settings by Microsoft
39
  system_message = "You are a friendly and knowledgeable assistant, here to help with any questions or tasks."
40
- if model_name == "Phi-4":
41
- prompt = f"{start_tag}system{sep_tag}{system_message}{end_tag}"
42
- for message in history_state:
43
- if message["role"] == "user":
44
- prompt += f"{start_tag}user{sep_tag}{message['content']}{end_tag}"
45
- elif message["role"] == "assistant" and message["content"]:
46
- prompt += f"{start_tag}assistant{sep_tag}{message['content']}{end_tag}"
47
- prompt += f"{start_tag}user{sep_tag}{user_message}{end_tag}{start_tag}assistant{sep_tag}"
48
- else:
49
- prompt = f"<|system|>{system_message}{end_tag}"
50
- for message in history_state:
51
- if message["role"] == "user":
52
- prompt += f"<|user|>{message['content']}{end_tag}"
53
- elif message["role"] == "assistant" and message["content"]:
54
- prompt += f"<|assistant|>{message['content']}{end_tag}"
55
- prompt += f"<|user|>{user_message}{end_tag}<|assistant|>"
56
 
57
  inputs = tokenizer(prompt, return_tensors="pt").to(device)
58
 
@@ -83,7 +62,7 @@ def generate_response(user_message, model_name, max_tokens, temperature, top_k,
83
  {"role": "assistant", "content": ""}
84
  ]
85
  for new_token in streamer:
86
- cleaned_token = new_token.replace("<|im_start|>", "").replace("<|im_sep|>", "").replace("<|im_end|>", "").replace("<|end|>", "").replace("<|system|>", "").replace("<|user|>", "").replace("<|assistant|>", "")
87
  assistant_response += cleaned_token
88
  new_history[-1]["content"] = assistant_response.strip()
89
  yield new_history, new_history
@@ -91,7 +70,7 @@ def generate_response(user_message, model_name, max_tokens, temperature, top_k,
91
  yield new_history, new_history
92
 
93
  example_messages = {
94
- "Learn about physics": "Explain Newtons laws of motion.",
95
  "Discover space facts": "What are some interesting facts about black holes?",
96
  "Write a factorial function": "Write a Python function to calculate the factorial of a number."
97
  }
@@ -99,8 +78,8 @@ example_messages = {
99
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
100
  gr.Markdown(
101
  """
102
- # Phi-4 Models Chatbot
103
- Welcome to the Phi-4 Chatbot! You can chat with Microsoft's Phi-4 or Phi-4-mini-instruct models. Adjust the settings on the left to customize the model's responses.
104
  """
105
  )
106
 
@@ -109,11 +88,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
109
  with gr.Row():
110
  with gr.Column(scale=1):
111
  gr.Markdown("### Settings")
112
- model_dropdown = gr.Dropdown(
113
- choices=["Phi-4", "Phi-4-mini-instruct"],
114
- label="Select Model",
115
- value="Phi-4"
116
- )
117
  max_tokens_slider = gr.Slider(
118
  minimum=64,
119
  maximum=4096,
@@ -166,7 +140,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
166
 
167
  submit_button.click(
168
  fn=generate_response,
169
- inputs=[user_input, model_dropdown, max_tokens_slider, temperature_slider, top_k_slider, top_p_slider, repetition_penalty_slider, history_state],
170
  outputs=[chatbot, history_state]
171
  ).then(
172
  fn=lambda: gr.update(value=""),
 
5
  from threading import Thread
6
 
7
  phi4_model_path = "microsoft/Phi-4-reasoning-plus"
 
8
 
9
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
10
 
11
  phi4_model = AutoModelForCausalLM.from_pretrained(phi4_model_path, torch_dtype="auto").to(device)
12
  phi4_tokenizer = AutoTokenizer.from_pretrained(phi4_model_path)
 
 
13
 
14
  @spaces.GPU(duration=60)
15
+ def generate_response(user_message, max_tokens, temperature, top_k, top_p, repetition_penalty, history_state):
16
  if not user_message.strip():
17
  return history_state, history_state
18
 
19
+ # Phi-4 model settings
20
+ model = phi4_model
21
+ tokenizer = phi4_tokenizer
22
+ start_tag = "<|im_start|>"
23
+ sep_tag = "<|im_sep|>"
24
+ end_tag = "<|im_end|>"
 
 
 
 
 
 
 
 
 
25
 
26
  # Recommended prompt settings by Microsoft
27
  system_message = "You are a friendly and knowledgeable assistant, here to help with any questions or tasks."
28
+ prompt = f"{start_tag}system{sep_tag}{system_message}{end_tag}"
29
+ for message in history_state:
30
+ if message["role"] == "user":
31
+ prompt += f"{start_tag}user{sep_tag}{message['content']}{end_tag}"
32
+ elif message["role"] == "assistant" and message["content"]:
33
+ prompt += f"{start_tag}assistant{sep_tag}{message['content']}{end_tag}"
34
+ prompt += f"{start_tag}user{sep_tag}{user_message}{end_tag}{start_tag}assistant{sep_tag}"
 
 
 
 
 
 
 
 
 
35
 
36
  inputs = tokenizer(prompt, return_tensors="pt").to(device)
37
 
 
62
  {"role": "assistant", "content": ""}
63
  ]
64
  for new_token in streamer:
65
+ cleaned_token = new_token.replace("<|im_start|>", "").replace("<|im_sep|>", "").replace("<|im_end|>", "")
66
  assistant_response += cleaned_token
67
  new_history[-1]["content"] = assistant_response.strip()
68
  yield new_history, new_history
 
70
  yield new_history, new_history
71
 
72
  example_messages = {
73
+ "Learn about physics": "Explain Newton's laws of motion.",
74
  "Discover space facts": "What are some interesting facts about black holes?",
75
  "Write a factorial function": "Write a Python function to calculate the factorial of a number."
76
  }
 
78
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
79
  gr.Markdown(
80
  """
81
+ # Phi-4-reasoning-plus Chatbot
82
+ Welcome to the Phi-4 Chatbot! You can chat with Microsoft's Phi-4-reasoning-plus model. Adjust the settings on the left to customize the model's responses.
83
  """
84
  )
85
 
 
88
  with gr.Row():
89
  with gr.Column(scale=1):
90
  gr.Markdown("### Settings")
 
 
 
 
 
91
  max_tokens_slider = gr.Slider(
92
  minimum=64,
93
  maximum=4096,
 
140
 
141
  submit_button.click(
142
  fn=generate_response,
143
+ inputs=[user_input, max_tokens_slider, temperature_slider, top_k_slider, top_p_slider, repetition_penalty_slider, history_state],
144
  outputs=[chatbot, history_state]
145
  ).then(
146
  fn=lambda: gr.update(value=""),