amiguel commited on
Commit
a941d96
Β·
verified Β·
1 Parent(s): 27b07a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -22,7 +22,7 @@ st.title("πŸš€ WizNerd Insp πŸš€")
22
  # Sidebar configuration
23
  with st.sidebar:
24
  st.header("Authentication πŸ”’")
25
- hf_token = st.text_input("HuggingFace Token", type="password",
26
  help="Get your token from https://huggingface.co/settings/tokens")
27
 
28
  if not hf_token:
@@ -90,7 +90,7 @@ def generate_response(prompt, file_context):
90
  Answer:"""
91
 
92
  # Create streamer
93
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
94
 
95
  # Tokenize input
96
  inputs = tokenizer(
@@ -107,7 +107,9 @@ def generate_response(prompt, file_context):
107
  max_new_tokens=1024,
108
  temperature=0.7,
109
  top_p=0.9,
110
- repetition_penalty=1.1
 
 
111
  )
112
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
113
  thread.start()
@@ -160,12 +162,10 @@ if prompt := st.chat_input("Ask your inspection question..."):
160
  with st.chat_message("assistant", avatar="πŸ€–"):
161
  streamer = generate_response(prompt, file_context)
162
  response = st.write_stream(streamer)
 
 
163
  st.session_state.messages.append({"role": "assistant", "content": response})
164
- except:
165
- # Fallback if avatar fails
166
- with st.chat_message("assistant"):
167
- streamer = generate_response(prompt, file_context)
168
- response = st.write_stream(streamer)
169
- st.session_state.messages.append({"role": "assistant", "content": response})
170
  else:
171
  st.error("πŸ€– Model not loaded - check your token and connection!")
 
22
  # Sidebar configuration
23
  with st.sidebar:
24
  st.header("Authentication πŸ”’")
25
+ hf_token = st.text_input("Hugging Face Token", type="password",
26
  help="Get your token from https://huggingface.co/settings/tokens")
27
 
28
  if not hf_token:
 
90
  Answer:"""
91
 
92
  # Create streamer
93
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
94
 
95
  # Tokenize input
96
  inputs = tokenizer(
 
107
  max_new_tokens=1024,
108
  temperature=0.7,
109
  top_p=0.9,
110
+ repetition_penalty=1.1,
111
+ do_sample=True,
112
+ use_cache=True # Enable caching for faster generation
113
  )
114
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
115
  thread.start()
 
162
  with st.chat_message("assistant", avatar="πŸ€–"):
163
  streamer = generate_response(prompt, file_context)
164
  response = st.write_stream(streamer)
165
+ # Remove <think> tokens if present
166
+ response = response.replace("<think>", "").replace("</think>", "").strip()
167
  st.session_state.messages.append({"role": "assistant", "content": response})
168
+ except Exception as e:
169
+ st.error(f"⚑ Generation error: {str(e)}")
 
 
 
 
170
  else:
171
  st.error("πŸ€– Model not loaded - check your token and connection!")