Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ import torch
|
|
3 |
import os
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
5 |
from peft import PeftModel, PeftConfig
|
6 |
-
from huggingface_hub import login
|
7 |
|
8 |
# Set page config for better display
|
9 |
st.set_page_config(page_title="LLaMA Chatbot", page_icon="🦙")
|
@@ -16,7 +16,7 @@ if torch.cuda.is_available():
|
|
16 |
else:
|
17 |
st.sidebar.warning("⚠️ CUDA is not available. Using CPU.")
|
18 |
|
19 |
-
#
|
20 |
try:
|
21 |
# Try to get token
|
22 |
hf_token = os.environ.get("HF_TOKEN")
|
@@ -25,17 +25,6 @@ try:
|
|
25 |
|
26 |
if hf_token:
|
27 |
st.info(f"Token found! First 4 characters: {hf_token[:4]}...")
|
28 |
-
|
29 |
-
# Test token validity
|
30 |
-
api = HfApi()
|
31 |
-
try:
|
32 |
-
user_info = api.whoami(token=hf_token)
|
33 |
-
st.success(f"Token validated! Associated with user: {user_info.name}")
|
34 |
-
except Exception as e:
|
35 |
-
st.error(f"Token validation failed: {str(e)}")
|
36 |
-
st.stop()
|
37 |
-
|
38 |
-
# Try login
|
39 |
login(token=hf_token)
|
40 |
status_placeholder.success("🔑 Successfully logged in to Hugging Face!")
|
41 |
else:
|
@@ -94,7 +83,7 @@ if model and tokenizer:
|
|
94 |
st.markdown(message["content"])
|
95 |
|
96 |
# Chat input
|
97 |
-
if prompt := st.chat_input("
|
98 |
# Add user message to chat history
|
99 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
100 |
|
@@ -104,29 +93,32 @@ if model and tokenizer:
|
|
104 |
|
105 |
# Generate response
|
106 |
with st.chat_message("assistant"):
|
107 |
-
with st.spinner("🤔
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
130 |
|
131 |
else:
|
132 |
st.error("⚠️ Model loading failed. Please check the error messages above.")
|
|
|
3 |
import os
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
5 |
from peft import PeftModel, PeftConfig
|
6 |
+
from huggingface_hub import login
|
7 |
|
8 |
# Set page config for better display
|
9 |
st.set_page_config(page_title="LLaMA Chatbot", page_icon="🦙")
|
|
|
16 |
else:
|
17 |
st.sidebar.warning("⚠️ CUDA is not available. Using CPU.")
|
18 |
|
19 |
+
# Authentication with simplified token handling
|
20 |
try:
|
21 |
# Try to get token
|
22 |
hf_token = os.environ.get("HF_TOKEN")
|
|
|
25 |
|
26 |
if hf_token:
|
27 |
st.info(f"Token found! First 4 characters: {hf_token[:4]}...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
login(token=hf_token)
|
29 |
status_placeholder.success("🔑 Successfully logged in to Hugging Face!")
|
30 |
else:
|
|
|
83 |
st.markdown(message["content"])
|
84 |
|
85 |
# Chat input
|
86 |
+
if prompt := st.chat_input("Speak thy mind..."):
|
87 |
# Add user message to chat history
|
88 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
89 |
|
|
|
93 |
|
94 |
# Generate response
|
95 |
with st.chat_message("assistant"):
|
96 |
+
with st.spinner("🤔 Composing a verse..."):
|
97 |
+
try:
|
98 |
+
# Prepare input
|
99 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
100 |
+
|
101 |
+
# Generate response
|
102 |
+
with torch.no_grad():
|
103 |
+
outputs = model.generate(
|
104 |
+
inputs["input_ids"],
|
105 |
+
max_length=200,
|
106 |
+
num_return_sequences=1,
|
107 |
+
temperature=0.7,
|
108 |
+
do_sample=True,
|
109 |
+
pad_token_id=tokenizer.eos_token_id
|
110 |
+
)
|
111 |
+
|
112 |
+
# Decode response
|
113 |
+
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
114 |
+
|
115 |
+
# Display response
|
116 |
+
st.markdown(response)
|
117 |
+
|
118 |
+
# Add assistant response to chat history
|
119 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
120 |
+
except Exception as e:
|
121 |
+
st.error(f"Error generating response: {str(e)}")
|
122 |
|
123 |
else:
|
124 |
st.error("⚠️ Model loading failed. Please check the error messages above.")
|