jedick commited on
Commit
e4c1af6
Β·
1 Parent(s): b42e964

Disable thinking by default

Browse files
Files changed (1) hide show
  1. app.py +38 -19
app.py CHANGED
@@ -96,7 +96,6 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
96
  if compute_mode == "local":
97
  gr.Info(
98
  f"Please wait for the local model to load",
99
- duration=8,
100
  title=f"Model loading...",
101
  )
102
  # Get the chat model and build the graph
@@ -105,7 +104,6 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
105
  chat_model,
106
  compute_mode,
107
  search_type,
108
- think_answer=True,
109
  embedding_ckpt_dir=embedding_ckpt_dir,
110
  )
111
  # Compile the graph with an in-memory checkpointer
@@ -225,24 +223,35 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
225
 
226
  def to_workflow(request: gr.Request, *args):
227
  """Wrapper function to call function with or without @spaces.GPU"""
 
228
  compute_mode = args[2]
229
  # Add session_hash to arguments
230
  new_args = args + (request.session_hash,)
231
  if compute_mode == "local":
232
  # Call the workflow function with the @spaces.GPU decorator
233
- for value in run_workflow_local(*new_args):
234
- yield value
 
 
 
 
235
  if compute_mode == "remote":
236
  for value in run_workflow_remote(*new_args):
237
  yield value
238
 
239
 
240
- @spaces.GPU(duration=100)
241
  def run_workflow_local(*args):
242
  for value in run_workflow(*args):
243
  yield value
244
 
245
 
 
 
 
 
 
 
246
  def run_workflow_remote(*args):
247
  for value in run_workflow(*args):
248
  yield value
@@ -401,9 +410,8 @@ with gr.Blocks(
401
  status_text = f"""
402
  πŸ“ Now in **local** mode, using ZeroGPU hardware<br>
403
  βŒ› Response time is about one minute<br>
404
- 🧠 Thinking is enabled for the answer<br>
405
- &emsp;&nbsp; πŸ” Add **/think** to enable thinking for the query</br>
406
- &emsp;&nbsp; 🚫 Add **/no_think** to disable all thinking</br>
407
  ✨ [{embedding_model_id.split("/")[-1]}](https://huggingface.co/{embedding_model_id}) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
408
  🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
409
  """
@@ -432,7 +440,7 @@ with gr.Blocks(
432
  questions = [
433
  # "What is today's date?",
434
  "Summarize emails from the last two months",
435
- "Show me code examples using plotmath /no_think",
436
  "When was has.HLC mentioned?",
437
  "Who reported installation problems in 2023-2024?",
438
  ]
@@ -456,6 +464,18 @@ with gr.Blocks(
456
 
457
  return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions
458
 
 
 
 
 
 
 
 
 
 
 
 
 
459
  with gr.Row():
460
  # Left column: Intro, Compute, Chat
461
  with gr.Column(scale=2):
@@ -494,10 +514,9 @@ with gr.Blocks(
494
  label="Multiple retrievals",
495
  )
496
  multi_turn_questions = gr.Examples(
497
- examples=[
498
- "Lookup emails that reference bugs.r-project.org in 2025",
499
- "Did those authors report bugs before 2025?",
500
- ],
501
  inputs=[input],
502
  label="Asking follow-up questions",
503
  )
@@ -585,18 +604,18 @@ with gr.Blocks(
585
  [compute_mode],
586
  [status],
587
  api_name=False,
588
- ).then(
589
- # Update examples based on compute mode
590
- get_example_questions,
591
- [compute_mode],
592
- [example_questions.dataset],
593
- api_name=False,
594
  ).then(
595
  # Update multi-tool examples based on compute mode
596
  get_multi_tool_questions,
597
  [compute_mode],
598
  [multi_tool_questions.dataset],
599
  api_name=False,
 
 
 
 
 
 
600
  )
601
 
602
  input.submit(
 
96
  if compute_mode == "local":
97
  gr.Info(
98
  f"Please wait for the local model to load",
 
99
  title=f"Model loading...",
100
  )
101
  # Get the chat model and build the graph
 
104
  chat_model,
105
  compute_mode,
106
  search_type,
 
107
  embedding_ckpt_dir=embedding_ckpt_dir,
108
  )
109
  # Compile the graph with an in-memory checkpointer
 
223
 
224
  def to_workflow(request: gr.Request, *args):
225
  """Wrapper function to call function with or without @spaces.GPU"""
226
+ input = args[0]
227
  compute_mode = args[2]
228
  # Add session_hash to arguments
229
  new_args = args + (request.session_hash,)
230
  if compute_mode == "local":
231
  # Call the workflow function with the @spaces.GPU decorator
232
+ if "/think" in input:
233
+ for value in run_workflow_local_long(*new_args):
234
+ yield value
235
+ else:
236
+ for value in run_workflow_local(*new_args):
237
+ yield value
238
  if compute_mode == "remote":
239
  for value in run_workflow_remote(*new_args):
240
  yield value
241
 
242
 
243
+ @spaces.GPU(duration=60)
244
  def run_workflow_local(*args):
245
  for value in run_workflow(*args):
246
  yield value
247
 
248
 
249
+ @spaces.GPU(duration=100)
250
+ def run_workflow_local_long(*args):
251
+ for value in run_workflow(*args):
252
+ yield value
253
+
254
+
255
  def run_workflow_remote(*args):
256
  for value in run_workflow(*args):
257
  yield value
 
410
  status_text = f"""
411
  πŸ“ Now in **local** mode, using ZeroGPU hardware<br>
412
  βŒ› Response time is about one minute<br>
413
+ 🧠 Add **/think** to enable thinking</br>
414
+ &emsp;&nbsp; 🐒 Increases ZeroGPU allotment to 100 seconds</br>
 
415
  ✨ [{embedding_model_id.split("/")[-1]}](https://huggingface.co/{embedding_model_id}) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
416
  🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
417
  """
 
440
  questions = [
441
  # "What is today's date?",
442
  "Summarize emails from the last two months",
443
+ "Show me code examples using plotmath",
444
  "When was has.HLC mentioned?",
445
  "Who reported installation problems in 2023-2024?",
446
  ]
 
464
 
465
  return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions
466
 
467
+ def get_multi_turn_questions(compute_mode, as_dataset=True):
468
+ """Get multi-turn example questions based on compute mode"""
469
+ questions = [
470
+ "Lookup emails that reference bugs.r-project.org in 2025",
471
+ "Did those authors report bugs before 2025? /think",
472
+ ]
473
+
474
+ if compute_mode == "remote":
475
+ questions = [q.replace(" /think", "") for q in questions]
476
+
477
+ return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions
478
+
479
  with gr.Row():
480
  # Left column: Intro, Compute, Chat
481
  with gr.Column(scale=2):
 
514
  label="Multiple retrievals",
515
  )
516
  multi_turn_questions = gr.Examples(
517
+ examples=get_multi_turn_questions(
518
+ compute_mode.value, as_dataset=False
519
+ ),
 
520
  inputs=[input],
521
  label="Asking follow-up questions",
522
  )
 
604
  [compute_mode],
605
  [status],
606
  api_name=False,
 
 
 
 
 
 
607
  ).then(
608
  # Update multi-tool examples based on compute mode
609
  get_multi_tool_questions,
610
  [compute_mode],
611
  [multi_tool_questions.dataset],
612
  api_name=False,
613
+ ).then(
614
+ # Update multi-turn examples based on compute mode
615
+ get_multi_turn_questions,
616
+ [compute_mode],
617
+ [multi_turn_questions.dataset],
618
+ api_name=False,
619
  )
620
 
621
  input.submit(