Llama3-philosophy-demo

Sleeping

App Files Files Community

ruggsea commited on Jan 14

Commit

a4e396f

1 Parent(s): 43523a0

fix

Browse files

Files changed (1) hide show

app.py +8 -10

app.py CHANGED Viewed

@@ -44,27 +44,22 @@ def generate(
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.1,
-) -> Iterator[str]:
-    # Ensure we have valid chat history
     if chat_history is None:
         chat_history = []
-    # Build conversation context
     conversation = []
     if system_prompt:
         conversation.append({"role": "system", "content": system_prompt})
-    # Add all previous exchanges
     for user, assistant in chat_history:
         conversation.extend([
             {"role": "user", "content": str(user).strip()},
             {"role": "assistant", "content": str(assistant).strip()}
         ])
-    # Add current message
     conversation.append({"role": "user", "content": str(message).strip()})
-    # Generate response
     try:
         input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
@@ -91,11 +86,14 @@ def generate(
         outputs = []
         for text in streamer:
             outputs.append(text)
-            yield "".join(outputs)
     except Exception as e:
         gr.Warning(f"Error during generation: {str(e)}")
-        yield "I apologize, but I encountered an error. Please try again."
 def create_demo() -> gr.Blocks:
     with gr.Blocks(css="style.css") as demo:
@@ -173,8 +171,8 @@ def create_demo() -> gr.Blocks:
                 ["How does Kant's Categorical Imperative work?"],
                 ["What is the problem of consciousness in philosophy of mind?"],
             ],
-            inputs=msg,
-            fn=generate,
             outputs=chatbot,
             cache_examples=True,
             api_name=False

     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.1,
+) -> list[tuple[str, str]]:
     if chat_history is None:
         chat_history = []
     conversation = []
     if system_prompt:
         conversation.append({"role": "system", "content": system_prompt})
     for user, assistant in chat_history:
         conversation.extend([
             {"role": "user", "content": str(user).strip()},
             {"role": "assistant", "content": str(assistant).strip()}
         ])
     conversation.append({"role": "user", "content": str(message).strip()})
     try:
         input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         outputs = []
         for text in streamer:
             outputs.append(text)
+            partial_output = "".join(outputs)
+            chat_history = chat_history + [(message, partial_output)]
+            yield chat_history
     except Exception as e:
         gr.Warning(f"Error during generation: {str(e)}")
+        chat_history = chat_history + [(message, "I apologize, but I encountered an error. Please try again.")]
+        yield chat_history
 def create_demo() -> gr.Blocks:
     with gr.Blocks(css="style.css") as demo:
                 ["How does Kant's Categorical Imperative work?"],
                 ["What is the problem of consciousness in philosophy of mind?"],
             ],
+            inputs=[msg],
+            fn=lambda x: generate(x, [], system_prompt.value, max_new_tokens.value, temperature.value, top_p.value, top_k.value, repetition_penalty.value),
             outputs=chatbot,
             cache_examples=True,
             api_name=False