theosaurus commited on
Commit
7c18ca4
·
1 Parent(s): 526fda1

feat/ Added a streamer response for the final answer

Browse files
Files changed (1) hide show
  1. app.py +32 -30
app.py CHANGED
@@ -295,27 +295,28 @@ with gr.Blocks() as demo:
295
  thinking_conversation.append({"role": "user", "content": thinking_prompt})
296
 
297
  # GENERATE THINKING
298
- thinking_result = generate_llm_response(
299
- thinking_conversation,
300
- max_new_tokens=max_tokens * 2,
301
- temperature=temp,
302
- top_p=top_p,
303
- top_k=top_k,
304
- repetition_penalty=rep_penalty
305
- )
306
-
307
- # update the thinking message
308
- history[-1] = {
309
- "role": "assistant",
310
- "content": thinking_result,
311
- "metadata": {
312
- "title": "Réflexion",
313
- "status": "done"
314
  }
315
- }
 
 
316
  yield history
317
- print("DEBUG:\t\tYielded history of ```thinking_result```")
318
 
 
 
319
  final_conversation = []
320
  if system_prompt:
321
  final_conversation.append({"role": "system", "content": system_prompt})
@@ -331,23 +332,24 @@ with gr.Blocks() as demo:
331
 
332
  final_conversation.append({
333
  "role": "assistant",
334
- "content": f"Voici mon analyse étape par étape:\n{thinking_result}\n\nMaintenant je vais formaliser le résultat final."
335
  })
336
- final_answer = generate_llm_response(
337
- final_conversation,
338
- max_new_tokens=max_tokens,
339
- temperature=temp * 0.8, # Lower temperature for final answer
340
- top_p=top_p,
341
- top_k=top_k,
342
- repetition_penalty=rep_penalty
343
- )
344
- history.append({
345
  "role": "assistant",
346
- "content": final_answer
347
  })
348
  yield history
349
- print("DEBUG:\t\tYielded history of ```final_answer```")
350
 
 
 
 
 
 
 
 
 
 
 
351
  except Exception as e:
352
  error_traceback = traceback.format_exc()
353
  print(f"Error traceback:\n{error_traceback}")
 
295
  thinking_conversation.append({"role": "user", "content": thinking_prompt})
296
 
297
  # GENERATE THINKING
298
+ for thinking_partial in generate_llm_response(thinking_conversation,
299
+ max_new_tokens=max_tokens * 2,
300
+ temperature=temp,
301
+ top_p=top_p,
302
+ top_k=top_k,
303
+ repetition_penalty=rep_penalty):
304
+ # update the thinking message
305
+ history[-1] = {
306
+ "role": "assistant",
307
+ "content": thinking_partial,
308
+ "metadata": {
309
+ "title": "Réflexion",
310
+ "status": "done"
311
+ }
 
 
312
  }
313
+ yield history
314
+
315
+ history[-1]["metadata"]["status"] = "done"
316
  yield history
 
317
 
318
+ print("DEBUG:\t\tYielded history of ```thinking_result```")
319
+
320
  final_conversation = []
321
  if system_prompt:
322
  final_conversation.append({"role": "system", "content": system_prompt})
 
332
 
333
  final_conversation.append({
334
  "role": "assistant",
335
+ "content": f"Voici mon analyse étape par étape:\n{history[-1]['content']}\n\nMaintenant je vais formaliser le résultat final."
336
  })
337
+ final_conversation.append({
 
 
 
 
 
 
 
 
338
  "role": "assistant",
339
+ "content": "Je formule ma réponse finale..."
340
  })
341
  yield history
 
342
 
343
+ for final_partial in generate_llm_response(final_conversation,
344
+ max_new_tokens=max_tokens,
345
+ temperature=temp * 0.8, # Lower temperature for final answer
346
+ top_p=top_p,
347
+ top_k=top_k,
348
+ repetition_penalty=rep_penalty):
349
+ history[-1]["content"] = final_partial
350
+ yield history
351
+ print("DEBUG:\t\tYielded history of ```final_answer```")
352
+
353
  except Exception as e:
354
  error_traceback = traceback.format_exc()
355
  print(f"Error traceback:\n{error_traceback}")