Spaces:

jedick
/

R-help-chat

Running on Zero

App Files Files Community

jedick commited on 26 days ago

Commit

e4c1af6

1 Parent(s): b42e964

Disable thinking by default

Browse files

Files changed (1) hide show

app.py +38 -19

app.py CHANGED Viewed

@@ -96,7 +96,6 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
         if compute_mode == "local":
             gr.Info(
                 f"Please wait for the local model to load",
-                duration=8,
                 title=f"Model loading...",
             )
         # Get the chat model and build the graph
@@ -105,7 +104,6 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
             chat_model,
             compute_mode,
             search_type,
-            think_answer=True,
             embedding_ckpt_dir=embedding_ckpt_dir,
         )
         # Compile the graph with an in-memory checkpointer
@@ -225,24 +223,35 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
 def to_workflow(request: gr.Request, *args):
     """Wrapper function to call function with or without @spaces.GPU"""
     compute_mode = args[2]
     # Add session_hash to arguments
     new_args = args + (request.session_hash,)
     if compute_mode == "local":
         # Call the workflow function with the @spaces.GPU decorator
-        for value in run_workflow_local(*new_args):
-            yield value
     if compute_mode == "remote":
         for value in run_workflow_remote(*new_args):
             yield value
-@spaces.GPU(duration=100)
 def run_workflow_local(*args):
     for value in run_workflow(*args):
         yield value
 def run_workflow_remote(*args):
     for value in run_workflow(*args):
         yield value
@@ -401,9 +410,8 @@ with gr.Blocks(
             status_text = f"""
             📍 Now in **local** mode, using ZeroGPU hardware<br>
             ⌛ Response time is about one minute<br>
-            🧠 Thinking is enabled for the answer<br>
-            &emsp;&nbsp; 🔍 Add **/think** to enable thinking for the query</br>
-            &emsp;&nbsp; 🚫 Add **/no_think** to disable all thinking</br>
             ✨ [{embedding_model_id.split("/")[-1]}](https://huggingface.co/{embedding_model_id}) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
             🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
             """
@@ -432,7 +440,7 @@ with gr.Blocks(
         questions = [
             # "What is today's date?",
             "Summarize emails from the last two months",
-            "Show me code examples using plotmath /no_think",
             "When was has.HLC mentioned?",
             "Who reported installation problems in 2023-2024?",
         ]
@@ -456,6 +464,18 @@ with gr.Blocks(
         return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions
     with gr.Row():
         # Left column: Intro, Compute, Chat
         with gr.Column(scale=2):
@@ -494,10 +514,9 @@ with gr.Blocks(
                     label="Multiple retrievals",
                 )
                 multi_turn_questions = gr.Examples(
-                    examples=[
-                        "Lookup emails that reference bugs.r-project.org in 2025",
-                        "Did those authors report bugs before 2025?",
-                    ],
                     inputs=[input],
                     label="Asking follow-up questions",
                 )
@@ -585,18 +604,18 @@ with gr.Blocks(
         [compute_mode],
         [status],
         api_name=False,
-    ).then(
-        # Update examples based on compute mode
-        get_example_questions,
-        [compute_mode],
-        [example_questions.dataset],
-        api_name=False,
     ).then(
         # Update multi-tool examples based on compute mode
         get_multi_tool_questions,
         [compute_mode],
         [multi_tool_questions.dataset],
         api_name=False,
     )
     input.submit(

         if compute_mode == "local":
             gr.Info(
                 f"Please wait for the local model to load",
                 title=f"Model loading...",
             )
         # Get the chat model and build the graph
             chat_model,
             compute_mode,
             search_type,
             embedding_ckpt_dir=embedding_ckpt_dir,
         )
         # Compile the graph with an in-memory checkpointer
 def to_workflow(request: gr.Request, *args):
     """Wrapper function to call function with or without @spaces.GPU"""
+    input = args[0]
     compute_mode = args[2]
     # Add session_hash to arguments
     new_args = args + (request.session_hash,)
     if compute_mode == "local":
         # Call the workflow function with the @spaces.GPU decorator
+        if "/think" in input:
+            for value in run_workflow_local_long(*new_args):
+                yield value
+        else:
+            for value in run_workflow_local(*new_args):
+                yield value
     if compute_mode == "remote":
         for value in run_workflow_remote(*new_args):
             yield value
+@spaces.GPU(duration=60)
 def run_workflow_local(*args):
     for value in run_workflow(*args):
         yield value
+@spaces.GPU(duration=100)
+def run_workflow_local_long(*args):
+    for value in run_workflow(*args):
+        yield value
 def run_workflow_remote(*args):
     for value in run_workflow(*args):
         yield value
             status_text = f"""
             📍 Now in **local** mode, using ZeroGPU hardware<br>
             ⌛ Response time is about one minute<br>
+            🧠 Add **/think** to enable thinking</br>
+            &emsp;&nbsp; 🐢 Increases ZeroGPU allotment to 100 seconds</br>
             ✨ [{embedding_model_id.split("/")[-1]}](https://huggingface.co/{embedding_model_id}) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
             🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
             """
         questions = [
             # "What is today's date?",
             "Summarize emails from the last two months",
+            "Show me code examples using plotmath",
             "When was has.HLC mentioned?",
             "Who reported installation problems in 2023-2024?",
         ]
         return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions
+    def get_multi_turn_questions(compute_mode, as_dataset=True):
+        """Get multi-turn example questions based on compute mode"""
+        questions = [
+            "Lookup emails that reference bugs.r-project.org in 2025",
+            "Did those authors report bugs before 2025? /think",
+        ]
+        if compute_mode == "remote":
+            questions = [q.replace(" /think", "") for q in questions]
+        return gr.Dataset(samples=[[q] for q in questions]) if as_dataset else questions
     with gr.Row():
         # Left column: Intro, Compute, Chat
         with gr.Column(scale=2):
                     label="Multiple retrievals",
                 )
                 multi_turn_questions = gr.Examples(
+                    examples=get_multi_turn_questions(
+                        compute_mode.value, as_dataset=False
+                    ),
                     inputs=[input],
                     label="Asking follow-up questions",
                 )
         [compute_mode],
         [status],
         api_name=False,
     ).then(
         # Update multi-tool examples based on compute mode
         get_multi_tool_questions,
         [compute_mode],
         [multi_tool_questions.dataset],
         api_name=False,
+    ).then(
+        # Update multi-turn examples based on compute mode
+        get_multi_turn_questions,
+        [compute_mode],
+        [multi_turn_questions.dataset],
+        api_name=False,
     )
     input.submit(