Alaaeldin commited on
Commit
5ba8b96
Β·
verified Β·
1 Parent(s): fd8f252

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -5
app.py CHANGED
@@ -9,7 +9,14 @@ from huggingface_hub import login
9
  st.set_page_config(page_title="LLaMA Chatbot", page_icon="πŸ¦™")
10
  status_placeholder = st.empty()
11
 
12
- # Authentication (keeping the working code)
 
 
 
 
 
 
 
13
  try:
14
  hf_token = os.environ.get("HF_TOKEN")
15
  if not hf_token:
@@ -26,7 +33,7 @@ st.title("πŸ¦™ LLaMA Chatbot")
26
  @st.cache_resource
27
  def load_model():
28
  try:
29
- model_path = "Alaaeldin/Llama-demo" # Updated model path
30
 
31
  with st.spinner("πŸ”„ Loading tokenizer..."):
32
  tokenizer = AutoTokenizer.from_pretrained(
@@ -41,7 +48,6 @@ def load_model():
41
  model_path,
42
  torch_dtype=torch.float16,
43
  device_map="auto",
44
- load_in_8bit=True,
45
  token=hf_token,
46
  trust_remote_code=True
47
  )
@@ -59,7 +65,7 @@ if "messages" not in st.session_state:
59
  # Load model
60
  model, tokenizer = load_model()
61
 
62
- # Display chat interface
63
  if model and tokenizer:
64
  st.success("✨ Ready to chat! Enter your message below.")
65
 
@@ -72,6 +78,36 @@ if model and tokenizer:
72
  if prompt := st.chat_input("Your message"):
73
  # Add user message to chat history
74
  st.session_state.messages.append({"role": "user", "content": prompt})
 
75
  # Display user message
76
  with st.chat_message("user"):
77
- st.markdown(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  st.set_page_config(page_title="LLaMA Chatbot", page_icon="πŸ¦™")
10
  status_placeholder = st.empty()
11
 
12
+ # Check GPU
13
+ if torch.cuda.is_available():
14
+ st.sidebar.success("βœ… CUDA is available")
15
+ st.sidebar.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
16
+ else:
17
+ st.sidebar.warning("⚠️ CUDA is not available. Using CPU.")
18
+
19
+ # Authentication
20
  try:
21
  hf_token = os.environ.get("HF_TOKEN")
22
  if not hf_token:
 
33
  @st.cache_resource
34
  def load_model():
35
  try:
36
+ model_path = "Alaaeldin/Llama-demo"
37
 
38
  with st.spinner("πŸ”„ Loading tokenizer..."):
39
  tokenizer = AutoTokenizer.from_pretrained(
 
48
  model_path,
49
  torch_dtype=torch.float16,
50
  device_map="auto",
 
51
  token=hf_token,
52
  trust_remote_code=True
53
  )
 
65
  # Load model
66
  model, tokenizer = load_model()
67
 
68
+ # Chat interface
69
  if model and tokenizer:
70
  st.success("✨ Ready to chat! Enter your message below.")
71
 
 
78
  if prompt := st.chat_input("Your message"):
79
  # Add user message to chat history
80
  st.session_state.messages.append({"role": "user", "content": prompt})
81
+
82
  # Display user message
83
  with st.chat_message("user"):
84
+ st.markdown(prompt)
85
+
86
+ # Generate response
87
+ with st.chat_message("assistant"):
88
+ with st.spinner("πŸ€” Thinking..."):
89
+ # Prepare input
90
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
91
+
92
+ # Generate response
93
+ with torch.no_grad():
94
+ outputs = model.generate(
95
+ inputs["input_ids"],
96
+ max_length=200,
97
+ num_return_sequences=1,
98
+ temperature=0.7,
99
+ do_sample=True,
100
+ pad_token_id=tokenizer.eos_token_id
101
+ )
102
+
103
+ # Decode response
104
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
105
+
106
+ # Display response
107
+ st.markdown(response)
108
+
109
+ # Add assistant response to chat history
110
+ st.session_state.messages.append({"role": "assistant", "content": response})
111
+
112
+ else:
113
+ st.error("⚠️ Model loading failed. Please check the error messages above.")