Mistral-RAG-BitSix

Running on Zero

App Files Files Community

aiqcamp commited on Jan 25

Commit

6eb5405

verified ·

1 Parent(s): 517e915

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -20

app.py CHANGED Viewed

@@ -33,19 +33,19 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
     try:
         print(f"\n=== New Request ===")
         print(f"User message: {user_message}")
         # Format chat history for Gemini
         chat_history = format_chat_history(messages)
         # Initialize Gemini chat
         chat = model.start_chat(history=chat_history)
         response = chat.send_message(user_message, stream=True)
         # Initialize buffers and flags
         thought_buffer = ""
         response_buffer = ""
         thinking_complete = False
         # Add initial thinking message
         messages.append(
             ChatMessage(
@@ -54,27 +54,27 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
                 metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
             )
         )
         for chunk in response:
             parts = chunk.candidates[0].content.parts
             current_chunk = parts[0].text
             if len(parts) == 2 and not thinking_complete:
                 # Complete thought and start response
                 thought_buffer += current_chunk
                 print(f"\n=== Complete Thought ===\n{thought_buffer}")
                 messages[-1] = ChatMessage(
                     role="assistant",
                     content=thought_buffer,
                     metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
                 )
                 yield messages
                 # Start response
                 response_buffer = parts[1].text
                 print(f"\n=== Starting Response ===\n{response_buffer}")
                 messages.append(
                     ChatMessage(
                         role="assistant",
@@ -82,32 +82,32 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
                     )
                 )
                 thinking_complete = True
             elif thinking_complete:
                 # Stream response
                 response_buffer += current_chunk
                 print(f"\n=== Response Chunk ===\n{current_chunk}")
                 messages[-1] = ChatMessage(
                     role="assistant",
                     content=response_buffer
                 )
             else:
                 # Stream thinking
                 thought_buffer += current_chunk
                 print(f"\n=== Thinking Chunk ===\n{current_chunk}")
                 messages[-1] = ChatMessage(
                     role="assistant",
                     content=thought_buffer,
                     metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
                 )
             yield messages
         print(f"\n=== Final Response ===\n{response_buffer}")
     except Exception as e:
         print(f"\n=== Error ===\n{str(e)}")
         messages.append(
@@ -122,12 +122,34 @@ def user_message(msg: str, history: list) -> tuple[str, list]:
     """Adds user message to chat history"""
     history.append(ChatMessage(role="user", content=msg))
     return "", history
 # Create the Gradio interface
 with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo:
-  #with gr.Column():
-    gr.Markdown("# Chat with Gemini 2.0 Flash and See its Thoughts 💭")
     chatbot = gr.Chatbot(
         type="messages",
@@ -149,7 +171,7 @@ with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo:
     # Set up event handlers
     msg_store = gr.State("")  # Store for preserving user message
     input_box.submit(
         lambda msg: (msg, msg, ""),  # Store message and clear input
         inputs=[input_box],

     try:
         print(f"\n=== New Request ===")
         print(f"User message: {user_message}")
         # Format chat history for Gemini
         chat_history = format_chat_history(messages)
         # Initialize Gemini chat
         chat = model.start_chat(history=chat_history)
         response = chat.send_message(user_message, stream=True)
         # Initialize buffers and flags
         thought_buffer = ""
         response_buffer = ""
         thinking_complete = False
         # Add initial thinking message
         messages.append(
             ChatMessage(
                 metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
             )
         )
         for chunk in response:
             parts = chunk.candidates[0].content.parts
             current_chunk = parts[0].text
             if len(parts) == 2 and not thinking_complete:
                 # Complete thought and start response
                 thought_buffer += current_chunk
                 print(f"\n=== Complete Thought ===\n{thought_buffer}")
                 messages[-1] = ChatMessage(
                     role="assistant",
                     content=thought_buffer,
                     metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
                 )
                 yield messages
                 # Start response
                 response_buffer = parts[1].text
                 print(f"\n=== Starting Response ===\n{response_buffer}")
                 messages.append(
                     ChatMessage(
                         role="assistant",
                     )
                 )
                 thinking_complete = True
             elif thinking_complete:
                 # Stream response
                 response_buffer += current_chunk
                 print(f"\n=== Response Chunk ===\n{current_chunk}")
                 messages[-1] = ChatMessage(
                     role="assistant",
                     content=response_buffer
                 )
             else:
                 # Stream thinking
                 thought_buffer += current_chunk
                 print(f"\n=== Thinking Chunk ===\n{current_chunk}")
                 messages[-1] = ChatMessage(
                     role="assistant",
                     content=thought_buffer,
                     metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
                 )
             yield messages
         print(f"\n=== Final Response ===\n{response_buffer}")
     except Exception as e:
         print(f"\n=== Error ===\n{str(e)}")
         messages.append(
     """Adds user message to chat history"""
     history.append(ChatMessage(role="user", content=msg))
     return "", history
 # Create the Gradio interface
 with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo:
+    gr.Markdown(
+        """
+        # Gemini 2.0 Flash 'Thinking' Chatbot 💭
+        This chatbot demonstrates the experimental 'thinking' capability of the **Gemini 2.0 Flash** model.
+        You can observe the model's thought process as it generates responses, displayed with the "⚙️ Thinking" prefix.
+        **Key Features:**
+        *   Powered by Google's **Gemini 2.0 Flash** model.
+        *   Shows the model's **thoughts** before the final answer (experimental feature).
+        *   Supports **conversation history** for multi-turn chats.
+        *   Uses **streaming** for a more interactive experience.
+        **Instructions:**
+        1.  Type your message in the input box below.
+        2.  Press Enter or click Submit to send.
+        3.  Observe the chatbot's "Thinking" process followed by the final response.
+        4.  Use the "Clear Chat" button to start a new conversation.
+        *Please note*: The 'thinking' feature is experimental and the quality of thoughts may vary.
+        """
+    )
     chatbot = gr.Chatbot(
         type="messages",
     # Set up event handlers
     msg_store = gr.State("")  # Store for preserving user message
     input_box.submit(
         lambda msg: (msg, msg, ""),  # Store message and clear input
         inputs=[input_box],