domenicrosati commited on
Commit
82fe24c
Β·
1 Parent(s): 8457196

try fallback

Browse files
Files changed (1) hide show
  1. app.py +23 -6
app.py CHANGED
@@ -150,12 +150,15 @@ st.markdown("""
150
  """, unsafe_allow_html=True)
151
 
152
  with st.expander("Settings (strictness, context limit, top hits)"):
 
 
 
153
  confidence_threshold = st.slider('Confidence threshold for answering questions? This number represents how confident the model should be in the answers it gives. The number is out of 100%', 0, 100, 1)
154
  use_reranking = st.radio(
155
  "Use Reranking? Reranking will rerank the top hits using semantic similarity of document and query.",
156
  ('yes', 'no'))
157
  top_hits_limit = st.slider('Top hits? How many documents to use for reranking. Larger is slower but higher quality', 10, 300, 100)
158
- context_lim = st.slider('Context limit? How many documents to use for answering from. Larger is slower but higher quality', 10, 300, 25)
159
 
160
  # def paraphrase(text, max_length=128):
161
  # input_ids = queryexp_tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)
@@ -171,15 +174,29 @@ def run_query(query):
171
  # If you are not getting good results try one of:
172
  # * {query_exp}
173
  # """)
 
 
174
  limit = top_hits_limit or 100
175
  context_limit = context_lim or 10
176
  contexts_strict, orig_docs_strict = search(query, limit=limit, strict=True)
177
- contexts_lenient, orig_docs_lenient = search(query, limit=limit, strict=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
- contexts = list(
180
- set(contexts_strict + contexts_lenient)
181
- )
182
- orig_docs = orig_docs_strict + orig_docs_lenient
183
 
184
  if len(contexts) == 0 or not ''.join(contexts).strip():
185
  return st.markdown("""
 
150
  """, unsafe_allow_html=True)
151
 
152
  with st.expander("Settings (strictness, context limit, top hits)"):
153
+ strict_lenient_mix = st.radio(
154
+ "Type of strict+lenient combination: Fallback or Mix? If fallback, strict is run first then if the results are less than context_lim we also search lenient. Mix will search them both and let reranking sort em out",
155
+ ('fallback', 'mix'))
156
  confidence_threshold = st.slider('Confidence threshold for answering questions? This number represents how confident the model should be in the answers it gives. The number is out of 100%', 0, 100, 1)
157
  use_reranking = st.radio(
158
  "Use Reranking? Reranking will rerank the top hits using semantic similarity of document and query.",
159
  ('yes', 'no'))
160
  top_hits_limit = st.slider('Top hits? How many documents to use for reranking. Larger is slower but higher quality', 10, 300, 100)
161
+ context_lim = st.slider('Context limit? How many documents to use for answering from. Larger is slower but higher quality', 10, 300, 10)
162
 
163
  # def paraphrase(text, max_length=128):
164
  # input_ids = queryexp_tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)
 
174
  # If you are not getting good results try one of:
175
  # * {query_exp}
176
  # """)
177
+
178
+ # could also try fallback if there are no good answers by score...
179
  limit = top_hits_limit or 100
180
  context_limit = context_lim or 10
181
  contexts_strict, orig_docs_strict = search(query, limit=limit, strict=True)
182
+ if strict_lenient_mix == 'fallback' and len(contexts_strict) < context_limit:
183
+ contexts_lenient, orig_docs_lenient = search(query, limit=limit, strict=False)
184
+ contexts = list(
185
+ set(contexts_strict + contexts_lenient)
186
+ )
187
+ orig_docs = orig_docs_strict + orig_docs_lenient
188
+ elif strict_lenient_mix == 'mix':
189
+ contexts_lenient, orig_docs_lenient = search(query, limit=limit, strict=False)
190
+ contexts = list(
191
+ set(contexts_strict + contexts_lenient)
192
+ )
193
+ orig_docs = orig_docs_strict + orig_docs_lenient
194
+ else:
195
+ contexts = list(
196
+ set(contexts_strict)
197
+ )
198
+ orig_docs = orig_docs_strict
199
 
 
 
 
 
200
 
201
  if len(contexts) == 0 or not ''.join(contexts).strip():
202
  return st.markdown("""