Spaces:

mgbam
/

PhilosBeta-Advanced-Chat

Runtime error

App Files Files Community

mgbam commited on Jan 13

Commit

e69fb50

verified ·

1 Parent(s): 7bc132c

Create app.py

Browse files

Files changed (1) hide show

app.py +283 -0

app.py ADDED Viewed

	@@ -0,0 +1,283 @@

+import gradio as gr
+import torch
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    TextIteratorStreamer,
+)
+import threading
+import time
+# -----------------------------------------------------------------------------
+# 1. MODEL LOADING
+# -----------------------------------------------------------------------------
+# In this advanced example, we'll instantiate the model directly (instead of using pipeline).
+# We'll do streaming outputs via TextIteratorStreamer.
+MODEL_NAME = "microsoft/phi-4"  # Replace with an actual HF model if phi-4 is unavailable
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+try:
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
+except:
+    # Fallback if model is not found or large. Here we default to a smaller model
+    MODEL_NAME = "gpt2"
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE)
+model.eval()
+# -----------------------------------------------------------------------------
+# 2. CONVERSATION / PROMPTS
+# -----------------------------------------------------------------------------
+# We'll keep track of conversation using a list of dictionaries:
+# [
+#   {"role": "system",    "content": "..."},
+#   {"role": "developer", "content": "..."},
+#   {"role": "user",      "content": "User message"},
+#   {"role": "assistant", "content": "Assistant answer"},
+#   ...
+# ]
+#
+# We’ll also build in a mock retrieval system that merges knowledge snippets
+# into the final prompt if the user chooses to do so.
+DEFAULT_SYSTEM_PROMPT = (
+    "You are Philos, an advanced AI system created by ACC (Algorithmic Computer-generated Consciousness). "
+    "Answer user queries accurately, thoroughly, and helpfully. Keep your responses relevant and correct."
+)
+DEFAULT_DEVELOPER_PROMPT = (
+    "Ensure that you respond in a style that is professional, clear, and approachable. "
+    "Include reasoning steps if needed, but keep them concise."
+)
+# A small dictionary to emulate knowledge retrieval
+# In real scenarios, you might use an actual vector DB + retrieval method
+MOCK_KB = {
+    "python": "Python is a high-level, interpreted programming language famous for its readability and flexibility.",
+    "accelerate library": "The accelerate library by HF helps in distributed training and inference.",
+    "phi-4 architecture": "phi-4 is a 14B-parameter, decoder-only Transformer with a 16K context window.",
+}
+def retrieve_knowledge(user_query):
+    # Simple naive approach: check keywords in user query
+    # Return a knowledge snippet if found
+    matches = []
+    for keyword, snippet in MOCK_KB.items():
+        if keyword.lower() in user_query.lower():
+            matches.append(snippet)
+    return matches
+# -----------------------------------------------------------------------------
+# 3. HELPER: Build the prompt from conversation
+# -----------------------------------------------------------------------------
+def build_prompt(conversation):
+    """
+    Convert conversation (list of role/content dicts) into a single text prompt
+    that the model can process. We adopt a simple format:
+        system, developer, user, assistant, ...
+    """
+    prompt = ""
+    for msg in conversation:
+        if msg["role"] == "system":
+            prompt += f"[System]\n{msg['content']}\n"
+        elif msg["role"] == "developer":
+            prompt += f"[Developer]\n{msg['content']}\n"
+        elif msg["role"] == "user":
+            prompt += f"[User]\n{msg['content']}\n"
+        else:  # assistant
+            prompt += f"[Assistant]\n{msg['content']}\n"
+    prompt += "[Assistant]\n"  # We end with an assistant role so model can continue
+    return prompt
+# -----------------------------------------------------------------------------
+# 4. STREAMING GENERATION
+# -----------------------------------------------------------------------------
+def generate_tokens_stream(prompt, temperature=0.7, top_p=0.9, max_new_tokens=128):
+    """
+    Uses TextIteratorStreamer to yield tokens one by one (or in small chunks).
+    """
+    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(DEVICE)
+    generation_kwargs = dict(
+        input_ids=input_ids,
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        do_sample=True,
+        pad_token_id=tokenizer.eos_token_id,
+    )
+    # We'll run generation in a background thread, streaming tokens
+    thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    # Stream tokens
+    partial_text = ""
+    for new_token in streamer:
+        partial_text += new_token
+        yield partial_text
+    thread.join()
+# -----------------------------------------------------------------------------
+# 5. MAIN CHAT FUNCTION
+# -----------------------------------------------------------------------------
+def advanced_chat(user_msg, conversation, system_prompt, dev_prompt, retrieve_flg, temperature, top_p):
+    """
+    - Update conversation with the user's message
+    - Optionally retrieve knowledge and incorporate into the system or developer prompt
+    - Build the final prompt
+    - Stream the assistant's reply
+    """
+    # If user message is empty
+    if not user_msg.strip():
+        yield "Please enter a message."
+        return
+    # 1) Construct or update system/dev prompts
+    system_message = {"role": "system", "content": system_prompt}
+    developer_message = {"role": "developer", "content": dev_prompt}
+    # 2) Insert or replace system/dev in the conversation
+    #    We'll assume the first system/dev messages are at the start of conversation
+    #    or add them if not present
+    filtered = [msg for msg in conversation if msg["role"] not in ["system", "developer"]]
+    conversation = [system_message, developer_message] + filtered
+    # 3) Append user's message
+    conversation.append({"role": "user", "content": user_msg})
+    # 4) Retrieve knowledge if user toggled "Include knowledge retrieval"
+    if retrieve_flg:
+        knowledge_snippets = retrieve_knowledge(user_msg)
+        if knowledge_snippets:
+            # We can just append them to developer or system content for simplicity
+            knowledge_text = "\n".join(["[Knowledge] " + s for s in knowledge_snippets])
+            conversation[1]["content"] += f"\n\n[Additional Knowledge]\n{knowledge_text}"
+    # 5) Build final prompt
+    prompt = build_prompt(conversation)
+    # 6) Stream the assistant’s response
+    partial_response = ""
+    for partial_text in generate_tokens_stream(prompt, temperature, top_p):
+        partial_response = partial_text
+        yield partial_text  # Send partial tokens to Gradio for display
+    # 7) Now that generation is complete, append final assistant message
+    conversation.append({"role": "assistant", "content": partial_response})
+# -----------------------------------------------------------------------------
+# 6. BUILD GRADIO INTERFACE
+# -----------------------------------------------------------------------------
+def build_ui():
+    with gr.Blocks(title="PhilosBeta-Advanced", css="#chatbot{height:550px} .overflow-y-auto{max-height:550px}") as demo:
+        gr.Markdown("# **PhilosBeta: Advanced Demo**")
+        gr.Markdown(
+            "An example of multi-turn conversation with streaming responses, "
+            "optional retrieval, and custom system/developer prompts."
+        )
+        # State to store the conversation as a list of role/content dicts
+        conversation_state = gr.State([])
+        # TEXT ELEMENTS
+        with gr.Row():
+            with gr.Column():
+                system_prompt_box = gr.Textbox(
+                    label="System Prompt",
+                    value=DEFAULT_SYSTEM_PROMPT,
+                    lines=3
+                )
+                developer_prompt_box = gr.Textbox(
+                    label="Developer Prompt",
+                    value=DEFAULT_DEVELOPER_PROMPT,
+                    lines=3
+                )
+            with gr.Column():
+                retrieve_flag = gr.Checkbox(label="Include Knowledge Retrieval", value=False)
+                temperature_slider = gr.Slider(0.0, 2.0, 0.7, step=0.1, label="Temperature")
+                top_p_slider = gr.Slider(0.0, 1.0, 0.9, step=0.05, label="Top-p")
+                max_tokens_info = gr.Markdown("Max new tokens = 128 (fixed in code).")
+        # MAIN CHAT UI
+        chatbox = gr.Chatbot(label="Philos Conversation", elem_id="chatbot").style(height=500)
+        user_input = gr.Textbox(
+            label="Your Message",
+            placeholder="Type here...",
+            lines=3
+        )
+        send_btn = gr.Button("Send", variant="primary")
+        # ---------------------------------------------------------------------
+        # ACTION: Handle user input
+        # ---------------------------------------------------------------------
+        def user_send(
+            user_text, conversation, sys_prompt, dev_prompt, retrieve_flg, temperature, top_p
+        ):
+            """
+            This function calls advanced_chat() and streams tokens back to update the Chatbot UI.
+            """
+            # We'll create a generator to update the Chatbot in real-time
+            message_stream = advanced_chat(
+                user_msg=user_text,
+                conversation=conversation,
+                system_prompt=sys_prompt,
+                dev_prompt=dev_prompt,
+                retrieve_flg=retrieve_flg,
+                temperature=temperature,
+                top_p=top_p
+            )
+            return message_stream, conversation
+        # Gradio can handle generator outputs for streaming.
+        # We map the streamed tokens to the Chatbot component in real-time.
+        chatbox_stream = gr.Chatbot.update()
+        send_btn.click(
+            fn=user_send,
+            inputs=[
+                user_input,
+                conversation_state,
+                system_prompt_box,
+                developer_prompt_box,
+                retrieve_flag,
+                temperature_slider,
+                top_p_slider,
+            ],
+            outputs=[chatbox_stream, conversation_state],
+        )
+        # We also let the user press Enter to send messages
+        user_input.submit(
+            fn=user_send,
+            inputs=[
+                user_input,
+                conversation_state,
+                system_prompt_box,
+                developer_prompt_box,
+                retrieve_flag,
+                temperature_slider,
+                top_p_slider,
+            ],
+            outputs=[chatbox_stream, conversation_state],
+        )
+    return demo
+# -----------------------------------------------------------------------------
+# 7. LAUNCH
+# -----------------------------------------------------------------------------
+if __name__ == "__main__":
+    ui = build_ui()
+    ui.launch()