Update app.py
Browse files
app.py
CHANGED
@@ -22,7 +22,7 @@ st.title("π WizNerd Insp π")
|
|
22 |
# Sidebar configuration
|
23 |
with st.sidebar:
|
24 |
st.header("Authentication π")
|
25 |
-
hf_token = st.text_input("
|
26 |
help="Get your token from https://huggingface.co/settings/tokens")
|
27 |
|
28 |
if not hf_token:
|
@@ -90,7 +90,7 @@ def generate_response(prompt, file_context):
|
|
90 |
Answer:"""
|
91 |
|
92 |
# Create streamer
|
93 |
-
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
|
94 |
|
95 |
# Tokenize input
|
96 |
inputs = tokenizer(
|
@@ -107,7 +107,9 @@ def generate_response(prompt, file_context):
|
|
107 |
max_new_tokens=1024,
|
108 |
temperature=0.7,
|
109 |
top_p=0.9,
|
110 |
-
repetition_penalty=1.1
|
|
|
|
|
111 |
)
|
112 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
113 |
thread.start()
|
@@ -160,12 +162,10 @@ if prompt := st.chat_input("Ask your inspection question..."):
|
|
160 |
with st.chat_message("assistant", avatar="π€"):
|
161 |
streamer = generate_response(prompt, file_context)
|
162 |
response = st.write_stream(streamer)
|
|
|
|
|
163 |
st.session_state.messages.append({"role": "assistant", "content": response})
|
164 |
-
except:
|
165 |
-
|
166 |
-
with st.chat_message("assistant"):
|
167 |
-
streamer = generate_response(prompt, file_context)
|
168 |
-
response = st.write_stream(streamer)
|
169 |
-
st.session_state.messages.append({"role": "assistant", "content": response})
|
170 |
else:
|
171 |
st.error("π€ Model not loaded - check your token and connection!")
|
|
|
22 |
# Sidebar configuration
|
23 |
with st.sidebar:
|
24 |
st.header("Authentication π")
|
25 |
+
hf_token = st.text_input("Hugging Face Token", type="password",
|
26 |
help="Get your token from https://huggingface.co/settings/tokens")
|
27 |
|
28 |
if not hf_token:
|
|
|
90 |
Answer:"""
|
91 |
|
92 |
# Create streamer
|
93 |
+
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
94 |
|
95 |
# Tokenize input
|
96 |
inputs = tokenizer(
|
|
|
107 |
max_new_tokens=1024,
|
108 |
temperature=0.7,
|
109 |
top_p=0.9,
|
110 |
+
repetition_penalty=1.1,
|
111 |
+
do_sample=True,
|
112 |
+
use_cache=True # Enable caching for faster generation
|
113 |
)
|
114 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
115 |
thread.start()
|
|
|
162 |
with st.chat_message("assistant", avatar="π€"):
|
163 |
streamer = generate_response(prompt, file_context)
|
164 |
response = st.write_stream(streamer)
|
165 |
+
# Remove <think> tokens if present
|
166 |
+
response = response.replace("<think>", "").replace("</think>", "").strip()
|
167 |
st.session_state.messages.append({"role": "assistant", "content": response})
|
168 |
+
except Exception as e:
|
169 |
+
st.error(f"β‘ Generation error: {str(e)}")
|
|
|
|
|
|
|
|
|
170 |
else:
|
171 |
st.error("π€ Model not loaded - check your token and connection!")
|