Spaces:

Alaaeldin
/

llama-app

Sleeping

App Files Files Community

Alaaeldin commited on Jan 19

Commit

5ba8b96

verified ·

1 Parent(s): fd8f252

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -5

app.py CHANGED Viewed

@@ -9,7 +9,14 @@ from huggingface_hub import login
 st.set_page_config(page_title="LLaMA Chatbot", page_icon="🦙")
 status_placeholder = st.empty()
-# Authentication (keeping the working code)
 try:
     hf_token = os.environ.get("HF_TOKEN")
     if not hf_token:
@@ -26,7 +33,7 @@ st.title("🦙 LLaMA Chatbot")
 @st.cache_resource
 def load_model():
     try:
-        model_path = "Alaaeldin/Llama-demo"  # Updated model path
         with st.spinner("🔄 Loading tokenizer..."):
             tokenizer = AutoTokenizer.from_pretrained(
@@ -41,7 +48,6 @@ def load_model():
                 model_path,
                 torch_dtype=torch.float16,
                 device_map="auto",
-                load_in_8bit=True,
                 token=hf_token,
                 trust_remote_code=True
             )
@@ -59,7 +65,7 @@ if "messages" not in st.session_state:
 # Load model
 model, tokenizer = load_model()
-# Display chat interface
 if model and tokenizer:
     st.success("✨ Ready to chat! Enter your message below.")
@@ -72,6 +78,36 @@ if model and tokenizer:
     if prompt := st.chat_input("Your message"):
         # Add user message to chat history
         st.session_state.messages.append({"role": "user", "content": prompt})
         # Display user message
         with st.chat_message("user"):
-            st.markdown(prompt)

 st.set_page_config(page_title="LLaMA Chatbot", page_icon="🦙")
 status_placeholder = st.empty()
+# Check GPU
+if torch.cuda.is_available():
+    st.sidebar.success("✅ CUDA is available")
+    st.sidebar.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
+else:
+    st.sidebar.warning("⚠️ CUDA is not available. Using CPU.")
+# Authentication
 try:
     hf_token = os.environ.get("HF_TOKEN")
     if not hf_token:
 @st.cache_resource
 def load_model():
     try:
+        model_path = "Alaaeldin/Llama-demo"
         with st.spinner("🔄 Loading tokenizer..."):
             tokenizer = AutoTokenizer.from_pretrained(
                 model_path,
                 torch_dtype=torch.float16,
                 device_map="auto",
                 token=hf_token,
                 trust_remote_code=True
             )
 # Load model
 model, tokenizer = load_model()
+# Chat interface
 if model and tokenizer:
     st.success("✨ Ready to chat! Enter your message below.")
     if prompt := st.chat_input("Your message"):
         # Add user message to chat history
         st.session_state.messages.append({"role": "user", "content": prompt})
         # Display user message
         with st.chat_message("user"):
+            st.markdown(prompt)
+        # Generate response
+        with st.chat_message("assistant"):
+            with st.spinner("🤔 Thinking..."):
+                # Prepare input
+                inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+                # Generate response
+                with torch.no_grad():
+                    outputs = model.generate(
+                        inputs["input_ids"],
+                        max_length=200,
+                        num_return_sequences=1,
+                        temperature=0.7,
+                        do_sample=True,
+                        pad_token_id=tokenizer.eos_token_id
+                    )
+                # Decode response
+                response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+                # Display response
+                st.markdown(response)
+                # Add assistant response to chat history
+                st.session_state.messages.append({"role": "assistant", "content": response})
+else:
+    st.error("⚠️ Model loading failed. Please check the error messages above.")