Spaces:

Somekindofathing
/

ontology-individuals-filler

Paused

App Files Files Community

theosaurus commited on Mar 5

Commit

7c18ca4

1 Parent(s): 526fda1

feat/ Added a streamer response for the final answer

Browse files

Files changed (1) hide show

app.py +32 -30

app.py CHANGED Viewed

@@ -295,27 +295,28 @@ with gr.Blocks() as demo:
             thinking_conversation.append({"role": "user", "content": thinking_prompt})
             # GENERATE THINKING
-            thinking_result = generate_llm_response(
-                thinking_conversation,
-                max_new_tokens=max_tokens * 2,
-                temperature=temp,
-                top_p=top_p,
-                top_k=top_k,
-                repetition_penalty=rep_penalty
-            )
-            # update the thinking message
-            history[-1] = {
-                "role": "assistant",
-                "content": thinking_result,
-                "metadata": {
-                    "title": "Réflexion",
-                    "status": "done"
                 }
-            }
             yield history
-            print("DEBUG:\t\tYielded history of ```thinking_result```")
             final_conversation = []
             if system_prompt:
                     final_conversation.append({"role": "system", "content": system_prompt})
@@ -331,23 +332,24 @@ with gr.Blocks() as demo:
             final_conversation.append({
                 "role": "assistant",
-                "content": f"Voici mon analyse étape par étape:\n{thinking_result}\n\nMaintenant je vais formaliser le résultat final."
             })
-            final_answer = generate_llm_response(
-                final_conversation,
-                max_new_tokens=max_tokens,
-                temperature=temp * 0.8,  # Lower temperature for final answer
-                top_p=top_p,
-                top_k=top_k,
-                repetition_penalty=rep_penalty
-            )
-            history.append({
                 "role": "assistant",
-                "content": final_answer
             })
             yield history
-            print("DEBUG:\t\tYielded history of ```final_answer```")
         except Exception as e:
             error_traceback = traceback.format_exc()
             print(f"Error traceback:\n{error_traceback}")

             thinking_conversation.append({"role": "user", "content": thinking_prompt})
             # GENERATE THINKING
+            for thinking_partial in generate_llm_response(thinking_conversation,
+                                                          max_new_tokens=max_tokens * 2,
+                                                          temperature=temp,
+                                                          top_p=top_p,
+                                                          top_k=top_k,
+                                                          repetition_penalty=rep_penalty):
+                # update the thinking message
+                history[-1] = {
+                    "role": "assistant",
+                    "content": thinking_partial,
+                    "metadata": {
+                        "title": "Réflexion",
+                        "status": "done"
+                    }
                 }
+                yield history
+            history[-1]["metadata"]["status"] = "done"
             yield history
+            print("DEBUG:\t\tYielded history of ```thinking_result```")
             final_conversation = []
             if system_prompt:
                     final_conversation.append({"role": "system", "content": system_prompt})
             final_conversation.append({
                 "role": "assistant",
+                "content": f"Voici mon analyse étape par étape:\n{history[-1]['content']}\n\nMaintenant je vais formaliser le résultat final."
             })
+            final_conversation.append({
                 "role": "assistant",
+                "content": "Je formule ma réponse finale..."
             })
             yield history
+            for final_partial in generate_llm_response(final_conversation,
+                                                       max_new_tokens=max_tokens,
+                                                       temperature=temp * 0.8,  # Lower temperature for final answer
+                                                       top_p=top_p,
+                                                       top_k=top_k,
+                                                       repetition_penalty=rep_penalty):
+                history[-1]["content"] = final_partial
+                yield history
+            print("DEBUG:\t\tYielded history of ```final_answer```")
         except Exception as e:
             error_traceback = traceback.format_exc()
             print(f"Error traceback:\n{error_traceback}")