Spaces:

AstroMLab
/

AstroSage-8B

Runtime error

Tijmen2 commited on Nov 20, 2024

Commit

7efaceb

verified ·

1 Parent(s): b39668d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -36,13 +36,31 @@ GREETING_MESSAGES = [
     "The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
 ]
 def generate_text(prompt: str, history: list, max_new_tokens=512, temperature=0.7, top_p=0.95, top_k=50):
     """
-    Generate a response using the transformer model.
     """
-    # Combine history into the prompt
-    formatted_history = "\n".join([f"{msg['role']}: {msg['content']}" for msg in history])
-    prompt_with_history = f"{formatted_history}\nUser: {prompt}\nAssistant:"
     # Encode the prompt
     inputs = tokenizer([prompt_with_history], return_tensors="pt", truncation=True).to(DEVICE)
@@ -56,6 +74,7 @@ def generate_text(prompt: str, history: list, max_new_tokens=512, temperature=0.
         skip_prompt=True,
         skip_special_tokens=True
     )
     generation_kwargs = dict(
         **inputs,
         streamer=streamer,

     "The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
 ]
+def format_message(role: str, content: str) -> str:
+    """Format a single message according to Llama-3 chat template."""
+    return f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
 def generate_text(prompt: str, history: list, max_new_tokens=512, temperature=0.7, top_p=0.95, top_k=50):
     """
+    Generate a response using the transformer model with proper Llama-3 chat formatting.
     """
+    # Start with begin_of_text token
+    formatted_messages = ["<|begin_of_text|>"]
+    # Add formatted history
+    for msg in history:
+        formatted_message = format_message(msg['role'], msg['content'])
+        formatted_messages.append(formatted_message)
+    # Add the current prompt
+    formatted_message = format_message('user', prompt)
+    formatted_messages.append(formatted_message)
+    # Add the start of assistant's response
+    formatted_messages.append("<|start_header_id|>assistant<|end_header_id|>\n\n")
+    # Combine all messages
+    prompt_with_history = "\n".join(formatted_messages)
     # Encode the prompt
     inputs = tokenizer([prompt_with_history], return_tensors="pt", truncation=True).to(DEVICE)
         skip_prompt=True,
         skip_special_tokens=True
     )
     generation_kwargs = dict(
         **inputs,
         streamer=streamer,