File size: 4,321 Bytes
8a0a858 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import streamlit as st
import torch
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig
from huggingface_hub import login, HfApi
# Set page config for better display
st.set_page_config(page_title="LLaMA Chatbot", page_icon="π¦")
status_placeholder = st.empty()
# Check GPU
if torch.cuda.is_available():
st.sidebar.success("β
CUDA is available")
st.sidebar.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
st.sidebar.warning("β οΈ CUDA is not available. Using CPU.")
# Debug token access and authentication
try:
# Try to get token
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
hf_token = st.secrets.get("HF_TOKEN")
if hf_token:
st.info(f"Token found! First 4 characters: {hf_token[:4]}...")
# Test token validity
api = HfApi()
try:
user_info = api.whoami(token=hf_token)
st.success(f"Token validated! Associated with user: {user_info.name}")
except Exception as e:
st.error(f"Token validation failed: {str(e)}")
st.stop()
# Try login
login(token=hf_token)
status_placeholder.success("π Successfully logged in to Hugging Face!")
else:
st.error("No token found in any location")
st.stop()
except Exception as e:
st.error(f"π« Error with HF token: {str(e)}")
st.stop()
st.title("π¦ LLaMA Chatbot")
# Model loading with detailed status updates
@st.cache_resource
def load_model():
try:
model_path = "Alaaeldin/Llama-demo"
with st.spinner("π Loading tokenizer..."):
tokenizer = AutoTokenizer.from_pretrained(
model_path,
token=hf_token,
trust_remote_code=True
)
st.success("β
Tokenizer loaded!")
with st.spinner("π Loading model... This might take a few minutes..."):
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
device_map="auto",
token=hf_token,
trust_remote_code=True
)
st.success("β
Model loaded!")
return model, tokenizer
except Exception as e:
st.error(f"β Error loading model: {str(e)}")
return None, None
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Load model
model, tokenizer = load_model()
# Chat interface
if model and tokenizer:
st.success("β¨ Ready to chat! Enter your message below.")
# Display chat history
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Chat input
if prompt := st.chat_input("Your message"):
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
# Display user message
with st.chat_message("user"):
st.markdown(prompt)
# Generate response
with st.chat_message("assistant"):
with st.spinner("π€ Thinking..."):
# Prepare input
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Generate response
with torch.no_grad():
outputs = model.generate(
inputs["input_ids"],
max_length=200,
num_return_sequences=1,
temperature=0.7,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
# Decode response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Display response
st.markdown(response)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": response})
else:
st.error("β οΈ Model loading failed. Please check the error messages above.") |