Spaces:
Running
on
Zero
Running
on
Zero
jedick
commited on
Commit
Β·
e4c1af6
1
Parent(s):
b42e964
Disable thinking by default
Browse files
app.py
CHANGED
@@ -96,7 +96,6 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
|
|
96 |
if compute_mode == "local":
|
97 |
gr.Info(
|
98 |
f"Please wait for the local model to load",
|
99 |
-
duration=8,
|
100 |
title=f"Model loading...",
|
101 |
)
|
102 |
# Get the chat model and build the graph
|
@@ -105,7 +104,6 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
|
|
105 |
chat_model,
|
106 |
compute_mode,
|
107 |
search_type,
|
108 |
-
think_answer=True,
|
109 |
embedding_ckpt_dir=embedding_ckpt_dir,
|
110 |
)
|
111 |
# Compile the graph with an in-memory checkpointer
|
@@ -225,24 +223,35 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
|
|
225 |
|
226 |
def to_workflow(request: gr.Request, *args):
|
227 |
"""Wrapper function to call function with or without @spaces.GPU"""
|
|
|
228 |
compute_mode = args[2]
|
229 |
# Add session_hash to arguments
|
230 |
new_args = args + (request.session_hash,)
|
231 |
if compute_mode == "local":
|
232 |
# Call the workflow function with the @spaces.GPU decorator
|
233 |
-
|
234 |
-
|
|
|
|
|
|
|
|
|
235 |
if compute_mode == "remote":
|
236 |
for value in run_workflow_remote(*new_args):
|
237 |
yield value
|
238 |
|
239 |
|
240 |
-
@spaces.GPU(duration=
|
241 |
def run_workflow_local(*args):
|
242 |
for value in run_workflow(*args):
|
243 |
yield value
|
244 |
|
245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
def run_workflow_remote(*args):
|
247 |
for value in run_workflow(*args):
|
248 |
yield value
|
@@ -401,9 +410,8 @@ with gr.Blocks(
|
|
401 |
status_text = f"""
|
402 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
403 |
β Response time is about one minute<br>
|
404 |
-
π§
|
405 |
-
 
|
406 |
-
  π« Add **/no_think** to disable all thinking</br>
|
407 |
β¨ [{embedding_model_id.split("/")[-1]}](https://huggingface.co/{embedding_model_id}) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
|
408 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
409 |
"""
|
@@ -432,7 +440,7 @@ with gr.Blocks(
|
|
432 |
questions = [
|
433 |
# "What is today's date?",
|
434 |
"Summarize emails from the last two months",
|
435 |
-
"Show me code examples using plotmath
|
436 |
"When was has.HLC mentioned?",
|
437 |
"Who reported installation problems in 2023-2024?",
|
438 |
]
|
@@ -456,6 +464,18 @@ with gr.Blocks(
|
|
456 |
|
457 |
return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions
|
458 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
459 |
with gr.Row():
|
460 |
# Left column: Intro, Compute, Chat
|
461 |
with gr.Column(scale=2):
|
@@ -494,10 +514,9 @@ with gr.Blocks(
|
|
494 |
label="Multiple retrievals",
|
495 |
)
|
496 |
multi_turn_questions = gr.Examples(
|
497 |
-
examples=
|
498 |
-
|
499 |
-
|
500 |
-
],
|
501 |
inputs=[input],
|
502 |
label="Asking follow-up questions",
|
503 |
)
|
@@ -585,18 +604,18 @@ with gr.Blocks(
|
|
585 |
[compute_mode],
|
586 |
[status],
|
587 |
api_name=False,
|
588 |
-
).then(
|
589 |
-
# Update examples based on compute mode
|
590 |
-
get_example_questions,
|
591 |
-
[compute_mode],
|
592 |
-
[example_questions.dataset],
|
593 |
-
api_name=False,
|
594 |
).then(
|
595 |
# Update multi-tool examples based on compute mode
|
596 |
get_multi_tool_questions,
|
597 |
[compute_mode],
|
598 |
[multi_tool_questions.dataset],
|
599 |
api_name=False,
|
|
|
|
|
|
|
|
|
|
|
|
|
600 |
)
|
601 |
|
602 |
input.submit(
|
|
|
96 |
if compute_mode == "local":
|
97 |
gr.Info(
|
98 |
f"Please wait for the local model to load",
|
|
|
99 |
title=f"Model loading...",
|
100 |
)
|
101 |
# Get the chat model and build the graph
|
|
|
104 |
chat_model,
|
105 |
compute_mode,
|
106 |
search_type,
|
|
|
107 |
embedding_ckpt_dir=embedding_ckpt_dir,
|
108 |
)
|
109 |
# Compile the graph with an in-memory checkpointer
|
|
|
223 |
|
224 |
def to_workflow(request: gr.Request, *args):
|
225 |
"""Wrapper function to call function with or without @spaces.GPU"""
|
226 |
+
input = args[0]
|
227 |
compute_mode = args[2]
|
228 |
# Add session_hash to arguments
|
229 |
new_args = args + (request.session_hash,)
|
230 |
if compute_mode == "local":
|
231 |
# Call the workflow function with the @spaces.GPU decorator
|
232 |
+
if "/think" in input:
|
233 |
+
for value in run_workflow_local_long(*new_args):
|
234 |
+
yield value
|
235 |
+
else:
|
236 |
+
for value in run_workflow_local(*new_args):
|
237 |
+
yield value
|
238 |
if compute_mode == "remote":
|
239 |
for value in run_workflow_remote(*new_args):
|
240 |
yield value
|
241 |
|
242 |
|
243 |
+
@spaces.GPU(duration=60)
|
244 |
def run_workflow_local(*args):
|
245 |
for value in run_workflow(*args):
|
246 |
yield value
|
247 |
|
248 |
|
249 |
+
@spaces.GPU(duration=100)
|
250 |
+
def run_workflow_local_long(*args):
|
251 |
+
for value in run_workflow(*args):
|
252 |
+
yield value
|
253 |
+
|
254 |
+
|
255 |
def run_workflow_remote(*args):
|
256 |
for value in run_workflow(*args):
|
257 |
yield value
|
|
|
410 |
status_text = f"""
|
411 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
412 |
β Response time is about one minute<br>
|
413 |
+
π§ Add **/think** to enable thinking</br>
|
414 |
+
  π’ Increases ZeroGPU allotment to 100 seconds</br>
|
|
|
415 |
β¨ [{embedding_model_id.split("/")[-1]}](https://huggingface.co/{embedding_model_id}) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
|
416 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
417 |
"""
|
|
|
440 |
questions = [
|
441 |
# "What is today's date?",
|
442 |
"Summarize emails from the last two months",
|
443 |
+
"Show me code examples using plotmath",
|
444 |
"When was has.HLC mentioned?",
|
445 |
"Who reported installation problems in 2023-2024?",
|
446 |
]
|
|
|
464 |
|
465 |
return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions
|
466 |
|
467 |
+
def get_multi_turn_questions(compute_mode, as_dataset=True):
|
468 |
+
"""Get multi-turn example questions based on compute mode"""
|
469 |
+
questions = [
|
470 |
+
"Lookup emails that reference bugs.r-project.org in 2025",
|
471 |
+
"Did those authors report bugs before 2025? /think",
|
472 |
+
]
|
473 |
+
|
474 |
+
if compute_mode == "remote":
|
475 |
+
questions = [q.replace(" /think", "") for q in questions]
|
476 |
+
|
477 |
+
return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions
|
478 |
+
|
479 |
with gr.Row():
|
480 |
# Left column: Intro, Compute, Chat
|
481 |
with gr.Column(scale=2):
|
|
|
514 |
label="Multiple retrievals",
|
515 |
)
|
516 |
multi_turn_questions = gr.Examples(
|
517 |
+
examples=get_multi_turn_questions(
|
518 |
+
compute_mode.value, as_dataset=False
|
519 |
+
),
|
|
|
520 |
inputs=[input],
|
521 |
label="Asking follow-up questions",
|
522 |
)
|
|
|
604 |
[compute_mode],
|
605 |
[status],
|
606 |
api_name=False,
|
|
|
|
|
|
|
|
|
|
|
|
|
607 |
).then(
|
608 |
# Update multi-tool examples based on compute mode
|
609 |
get_multi_tool_questions,
|
610 |
[compute_mode],
|
611 |
[multi_tool_questions.dataset],
|
612 |
api_name=False,
|
613 |
+
).then(
|
614 |
+
# Update multi-turn examples based on compute mode
|
615 |
+
get_multi_turn_questions,
|
616 |
+
[compute_mode],
|
617 |
+
[multi_turn_questions.dataset],
|
618 |
+
api_name=False,
|
619 |
)
|
620 |
|
621 |
input.submit(
|