Spaces:

Alaaeldin
/

llama-app

Sleeping

App Files Files Community

Alaaeldin commited on Jan 19

Commit

8a0a858

verified ·

1 Parent(s): 9998464

Create app.py

Browse files

Files changed (1) hide show

app.py +132 -0

app.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import streamlit as st
+import torch
+import os
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from peft import PeftModel, PeftConfig
+from huggingface_hub import login, HfApi
+# Set page config for better display
+st.set_page_config(page_title="LLaMA Chatbot", page_icon="🦙")
+status_placeholder = st.empty()
+# Check GPU
+if torch.cuda.is_available():
+    st.sidebar.success("✅ CUDA is available")
+    st.sidebar.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
+else:
+    st.sidebar.warning("⚠️ CUDA is not available. Using CPU.")
+# Debug token access and authentication
+try:
+    # Try to get token
+    hf_token = os.environ.get("HF_TOKEN")
+    if not hf_token:
+        hf_token = st.secrets.get("HF_TOKEN")
+    if hf_token:
+        st.info(f"Token found! First 4 characters: {hf_token[:4]}...")
+        # Test token validity
+        api = HfApi()
+        try:
+            user_info = api.whoami(token=hf_token)
+            st.success(f"Token validated! Associated with user: {user_info.name}")
+        except Exception as e:
+            st.error(f"Token validation failed: {str(e)}")
+            st.stop()
+        # Try login
+        login(token=hf_token)
+        status_placeholder.success("🔑 Successfully logged in to Hugging Face!")
+    else:
+        st.error("No token found in any location")
+        st.stop()
+except Exception as e:
+    st.error(f"🚫 Error with HF token: {str(e)}")
+    st.stop()
+st.title("🦙 LLaMA Chatbot")
+# Model loading with detailed status updates
+@st.cache_resource
+def load_model():
+    try:
+        model_path = "Alaaeldin/Llama-demo"
+        with st.spinner("🔄 Loading tokenizer..."):
+            tokenizer = AutoTokenizer.from_pretrained(
+                model_path,
+                token=hf_token,
+                trust_remote_code=True
+            )
+            st.success("✅ Tokenizer loaded!")
+        with st.spinner("🔄 Loading model... This might take a few minutes..."):
+            model = AutoModelForCausalLM.from_pretrained(
+                model_path,
+                torch_dtype=torch.float16,
+                device_map="auto",
+                token=hf_token,
+                trust_remote_code=True
+            )
+            st.success("✅ Model loaded!")
+        return model, tokenizer
+    except Exception as e:
+        st.error(f"❌ Error loading model: {str(e)}")
+        return None, None
+# Initialize chat history
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+# Load model
+model, tokenizer = load_model()
+# Chat interface
+if model and tokenizer:
+    st.success("✨ Ready to chat! Enter your message below.")
+    # Display chat history
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+    # Chat input
+    if prompt := st.chat_input("Your message"):
+        # Add user message to chat history
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        # Display user message
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        # Generate response
+        with st.chat_message("assistant"):
+            with st.spinner("🤔 Thinking..."):
+                # Prepare input
+                inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+                # Generate response
+                with torch.no_grad():
+                    outputs = model.generate(
+                        inputs["input_ids"],
+                        max_length=200,
+                        num_return_sequences=1,
+                        temperature=0.7,
+                        do_sample=True,
+                        pad_token_id=tokenizer.eos_token_id
+                    )
+                # Decode response
+                response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+                # Display response
+                st.markdown(response)
+                # Add assistant response to chat history
+                st.session_state.messages.append({"role": "assistant", "content": response})
+else:
+    st.error("⚠️ Model loading failed. Please check the error messages above.")