Spaces:

Spestly
/

Athena-Chat

Running

App Files Files Community

Spestly commited on Jan 26

Commit

2b609ab

verified ·

1 Parent(s): 0c00cb8

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -16

app.py CHANGED Viewed

@@ -6,11 +6,9 @@ from huggingface_hub import login
 import re
 import os
-# Load Hugging Face token
 HF_TOKEN = os.getenv("HF_TOKEN")
 login(token=HF_TOKEN)
-# Define models
 MODELS = {
     "athena-1": {
         "name": "🦁 Atlas-Flash",
@@ -22,9 +20,9 @@ MODELS = {
     },
 }
-# Profile pictures
-USER_PFP = "user.png"  # Hugging Face user avatar
-AI_PFP = "ai_pfp.png"  # Replace with the path to your AI's image or a URL
 class AtlasInferenceApp:
     def __init__(self):
@@ -61,17 +59,17 @@ class AtlasInferenceApp:
             model_path = MODELS[model_key]["sizes"][model_size]
-            # Load Qwen-compatible tokenizer and model
             tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
             model = AutoModelForCausalLM.from_pretrained(
                 model_path,
-                device_map="cpu",  # Force CPU usage
-                torch_dtype=torch.float32,  # Use float32 for CPU
                 trust_remote_code=True,
                 low_cpu_mem_usage=True
             )
-            # Update session state
             st.session_state.current_model.update({
                 "tokenizer": tokenizer,
                 "model": model,
@@ -89,7 +87,7 @@ class AtlasInferenceApp:
             return "⚠️ Please select and load a model first"
         try:
-            # Add a system instruction to guide the model's behavior
             system_instruction = "You are Atlas, a helpful AI assistant trained to help the user. You are a Deepseek R1 fine-tune."
             prompt = f"{system_instruction}\n\n### Instruction:\n{message}\n\n### Response:"
@@ -101,8 +99,11 @@ class AtlasInferenceApp:
                 padding=True
             )
             with torch.no_grad():
-                output = st.session_state.current_model["model"].generate(
                     input_ids=inputs.input_ids,
                     attention_mask=inputs.attention_mask,
                     max_new_tokens=max_tokens,
@@ -112,9 +113,13 @@ class AtlasInferenceApp:
                     do_sample=True,
                     pad_token_id=st.session_state.current_model["tokenizer"].pad_token_id,
                     eos_token_id=st.session_state.current_model["tokenizer"].eos_token_id,
-                )
-            response = st.session_state.current_model["tokenizer"].decode(output[0], skip_special_tokens=True)
-            return response.split("### Response:")[-1].strip()  # Extract the response
         except Exception as e:
             return f"⚠️ Generation Error: {str(e)}"
         finally:
@@ -154,7 +159,6 @@ class AtlasInferenceApp:
         st.markdown("*⚠️ CAUTION: Atlas is an experimental model and this is just a preview. Responses may not be expected. Please double-check sensitive information!*")
-        # Display chat history
         for message in st.session_state.chat_history:
             with st.chat_message(
                 message["role"],
@@ -162,7 +166,6 @@ class AtlasInferenceApp:
             ):
                 st.markdown(message["content"])
-        # Input box for user messages
         if prompt := st.chat_input("Message Atlas..."):
             st.session_state.chat_history.append({"role": "user", "content": prompt})
             with st.chat_message("user", avatar=USER_PFP):

 import re
 import os
 HF_TOKEN = os.getenv("HF_TOKEN")
 login(token=HF_TOKEN)
 MODELS = {
     "athena-1": {
         "name": "🦁 Atlas-Flash",
     },
 }
+USER_PFP = "user.png"
+AI_PFP = "ai_pfp.png"
 class AtlasInferenceApp:
     def __init__(self):
             model_path = MODELS[model_key]["sizes"][model_size]
             tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
             model = AutoModelForCausalLM.from_pretrained(
                 model_path,
+                device_map="cpu",
+                torch_dtype=torch.float32,
                 trust_remote_code=True,
                 low_cpu_mem_usage=True
             )
             st.session_state.current_model.update({
                 "tokenizer": tokenizer,
                 "model": model,
             return "⚠️ Please select and load a model first"
         try:
             system_instruction = "You are Atlas, a helpful AI assistant trained to help the user. You are a Deepseek R1 fine-tune."
             prompt = f"{system_instruction}\n\n### Instruction:\n{message}\n\n### Response:"
                 padding=True
             )
+            response_container = st.empty()
+            full_response = ""
             with torch.no_grad():
+                for chunk in st.session_state.current_model["model"].generate(
                     input_ids=inputs.input_ids,
                     attention_mask=inputs.attention_mask,
                     max_new_tokens=max_tokens,
                     do_sample=True,
                     pad_token_id=st.session_state.current_model["tokenizer"].pad_token_id,
                     eos_token_id=st.session_state.current_model["tokenizer"].eos_token_id,
+                    streamer=None,  # Use a custom streamer for real-time updates
+                ):
+                    chunk_text = st.session_state.current_model["tokenizer"].decode(chunk, skip_special_tokens=True)
+                    full_response += chunk_text
+                    response_container.markdown(full_response)
+            return full_response.split("### Response:")[-1].strip()
         except Exception as e:
             return f"⚠️ Generation Error: {str(e)}"
         finally:
         st.markdown("*⚠️ CAUTION: Atlas is an experimental model and this is just a preview. Responses may not be expected. Please double-check sensitive information!*")
         for message in st.session_state.chat_history:
             with st.chat_message(
                 message["role"],
             ):
                 st.markdown(message["content"])
         if prompt := st.chat_input("Message Atlas..."):
             st.session_state.chat_history.append({"role": "user", "content": prompt})
             with st.chat_message("user", avatar=USER_PFP):