MohamedRashad commited on
Commit
6389312
ยท
verified ยท
1 Parent(s): c8ade90

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -9,12 +9,13 @@ from threading import Thread
9
 
10
  # Load model directly
11
  tokenizer = AutoTokenizer.from_pretrained("Navid-AI/Mulhem-1-Mini", token=os.getenv("HF_TOKEN"))
12
- model = AutoModelForCausalLM.from_pretrained("Navid-AI/Mulhem-1-Mini", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", token=os.getenv("HF_TOKEN"))
13
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
14
 
15
  def respond(
16
  message,
17
  history: list[tuple[str, str]],
 
18
  system_message,
19
  max_tokens,
20
  temperature,
@@ -29,7 +30,7 @@ def respond(
29
  messages.append({"role": "assistant", "content": val[1]})
30
 
31
  messages.append({"role": "user", "content": message})
32
- inputs = tokenizer.apply_chat_template(messages, return_tensors="pt")
33
  generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p)
34
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
35
 
@@ -44,8 +45,9 @@ For information on how to customize the ChatInterface, peruse the gradio docs: h
44
  demo = gr.ChatInterface(
45
  respond,
46
  additional_inputs=[
47
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
48
- gr.Slider(minimum=1, maximum=8192, value=512, step=1, label="Max new tokens"),
 
49
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
50
  gr.Slider(
51
  minimum=0.1,
@@ -59,4 +61,4 @@ demo = gr.ChatInterface(
59
 
60
 
61
  if __name__ == "__main__":
62
- demo.launch()
 
9
 
10
  # Load model directly
11
  tokenizer = AutoTokenizer.from_pretrained("Navid-AI/Mulhem-1-Mini", token=os.getenv("HF_TOKEN"))
12
+ model = AutoModelForCausalLM.from_pretrained("Navid-AI/Mulhem-1-Mini", torch_dtype=torch.bfloat16, device_map="auto", attn_implementation="flash_attention_2", token=os.getenv("HF_TOKEN"))
13
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
14
 
15
  def respond(
16
  message,
17
  history: list[tuple[str, str]],
18
+ enable_reasoning,
19
  system_message,
20
  max_tokens,
21
  temperature,
 
30
  messages.append({"role": "assistant", "content": val[1]})
31
 
32
  messages.append({"role": "user", "content": message})
33
+ inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True, enable_reasoning=enable_reasoning)
34
  generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p)
35
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
36
 
 
45
  demo = gr.ChatInterface(
46
  respond,
47
  additional_inputs=[
48
+ gr.Checkbox(label="Enable reasoning", value=False),
49
+ gr.Textbox(value="ุฃู†ุช ู…ูู„ู‡ู…. ุฐูƒุงุก ุงุตุทู†ุงุนูŠ ุชู… ุฅู†ุดุงุคู‡ ู…ู† ุดุฑูƒุฉ ู†ููŠุฏ ู„ุฅู„ู‡ุงู… ูˆุชุญููŠุฒ ุงู„ู…ุณุชุฎุฏู…ูŠู† ุนู„ู‰ ุงู„ุชุนู„ู‘ู…ุŒ ุงู„ู†ู…ูˆุŒ ูˆุชุญู‚ูŠู‚ ุฃู‡ุฏุงูู‡ู….", label="System message"),
50
+ gr.Slider(minimum=1, maximum=8192, value=2048, step=1, label="Max new tokens"),
51
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
  gr.Slider(
53
  minimum=0.1,
 
61
 
62
 
63
  if __name__ == "__main__":
64
+ demo.launch()