Alaaeldin commited on
Commit
8a0a858
Β·
verified Β·
1 Parent(s): 9998464

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -0
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ import os
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
+ from peft import PeftModel, PeftConfig
6
+ from huggingface_hub import login, HfApi
7
+
8
+ # Set page config for better display
9
+ st.set_page_config(page_title="LLaMA Chatbot", page_icon="πŸ¦™")
10
+ status_placeholder = st.empty()
11
+
12
+ # Check GPU
13
+ if torch.cuda.is_available():
14
+ st.sidebar.success("βœ… CUDA is available")
15
+ st.sidebar.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
16
+ else:
17
+ st.sidebar.warning("⚠️ CUDA is not available. Using CPU.")
18
+
19
+ # Debug token access and authentication
20
+ try:
21
+ # Try to get token
22
+ hf_token = os.environ.get("HF_TOKEN")
23
+ if not hf_token:
24
+ hf_token = st.secrets.get("HF_TOKEN")
25
+
26
+ if hf_token:
27
+ st.info(f"Token found! First 4 characters: {hf_token[:4]}...")
28
+
29
+ # Test token validity
30
+ api = HfApi()
31
+ try:
32
+ user_info = api.whoami(token=hf_token)
33
+ st.success(f"Token validated! Associated with user: {user_info.name}")
34
+ except Exception as e:
35
+ st.error(f"Token validation failed: {str(e)}")
36
+ st.stop()
37
+
38
+ # Try login
39
+ login(token=hf_token)
40
+ status_placeholder.success("πŸ”‘ Successfully logged in to Hugging Face!")
41
+ else:
42
+ st.error("No token found in any location")
43
+ st.stop()
44
+
45
+ except Exception as e:
46
+ st.error(f"🚫 Error with HF token: {str(e)}")
47
+ st.stop()
48
+
49
+ st.title("πŸ¦™ LLaMA Chatbot")
50
+
51
+ # Model loading with detailed status updates
52
+ @st.cache_resource
53
+ def load_model():
54
+ try:
55
+ model_path = "Alaaeldin/Llama-demo"
56
+
57
+ with st.spinner("πŸ”„ Loading tokenizer..."):
58
+ tokenizer = AutoTokenizer.from_pretrained(
59
+ model_path,
60
+ token=hf_token,
61
+ trust_remote_code=True
62
+ )
63
+ st.success("βœ… Tokenizer loaded!")
64
+
65
+ with st.spinner("πŸ”„ Loading model... This might take a few minutes..."):
66
+ model = AutoModelForCausalLM.from_pretrained(
67
+ model_path,
68
+ torch_dtype=torch.float16,
69
+ device_map="auto",
70
+ token=hf_token,
71
+ trust_remote_code=True
72
+ )
73
+ st.success("βœ… Model loaded!")
74
+
75
+ return model, tokenizer
76
+ except Exception as e:
77
+ st.error(f"❌ Error loading model: {str(e)}")
78
+ return None, None
79
+
80
+ # Initialize chat history
81
+ if "messages" not in st.session_state:
82
+ st.session_state.messages = []
83
+
84
+ # Load model
85
+ model, tokenizer = load_model()
86
+
87
+ # Chat interface
88
+ if model and tokenizer:
89
+ st.success("✨ Ready to chat! Enter your message below.")
90
+
91
+ # Display chat history
92
+ for message in st.session_state.messages:
93
+ with st.chat_message(message["role"]):
94
+ st.markdown(message["content"])
95
+
96
+ # Chat input
97
+ if prompt := st.chat_input("Your message"):
98
+ # Add user message to chat history
99
+ st.session_state.messages.append({"role": "user", "content": prompt})
100
+
101
+ # Display user message
102
+ with st.chat_message("user"):
103
+ st.markdown(prompt)
104
+
105
+ # Generate response
106
+ with st.chat_message("assistant"):
107
+ with st.spinner("πŸ€” Thinking..."):
108
+ # Prepare input
109
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
110
+
111
+ # Generate response
112
+ with torch.no_grad():
113
+ outputs = model.generate(
114
+ inputs["input_ids"],
115
+ max_length=200,
116
+ num_return_sequences=1,
117
+ temperature=0.7,
118
+ do_sample=True,
119
+ pad_token_id=tokenizer.eos_token_id
120
+ )
121
+
122
+ # Decode response
123
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
124
+
125
+ # Display response
126
+ st.markdown(response)
127
+
128
+ # Add assistant response to chat history
129
+ st.session_state.messages.append({"role": "assistant", "content": response})
130
+
131
+ else:
132
+ st.error("⚠️ Model loading failed. Please check the error messages above.")