Update app.py
Browse files
app.py
CHANGED
@@ -99,7 +99,7 @@ def preprocess_plain_text(text, window_size=3):
|
|
99 |
if len(paragraph.strip()) > 0:
|
100 |
paragraphs.append(sent_tokenize(paragraph.strip()))
|
101 |
|
102 |
-
window_size =
|
103 |
passages = []
|
104 |
for paragraph in paragraphs:
|
105 |
for start_idx in range(0, len(paragraph), window_size):
|
@@ -152,6 +152,7 @@ def display_as_table(model, top_k=2, score='score'):
|
|
152 |
# Streamlit App
|
153 |
|
154 |
st.title("Search with Retrieve & Rerank")
|
|
|
155 |
|
156 |
|
157 |
# This function will search all wikipedia articles for passages that answer the query
|
@@ -225,12 +226,12 @@ search_query = st.text_input("Please Enter your search query here",
|
|
225 |
if validators.url(url_text):
|
226 |
# if input is URL
|
227 |
title, text = extract_text_from_url(url_text)
|
228 |
-
passages = preprocess_plain_text(text, window_size=
|
229 |
|
230 |
elif upload_doc:
|
231 |
|
232 |
text, pdf_title = extract_text_from_file(upload_doc)
|
233 |
-
passages = preprocess_plain_text(text, window_size=
|
234 |
|
235 |
col1, col2 = st.columns(2)
|
236 |
|
|
|
99 |
if len(paragraph.strip()) > 0:
|
100 |
paragraphs.append(sent_tokenize(paragraph.strip()))
|
101 |
|
102 |
+
window_size = 3
|
103 |
passages = []
|
104 |
for paragraph in paragraphs:
|
105 |
for start_idx in range(0, len(paragraph), window_size):
|
|
|
152 |
# Streamlit App
|
153 |
|
154 |
st.title("Search with Retrieve & Rerank")
|
155 |
+
window_size = 3
|
156 |
|
157 |
|
158 |
# This function will search all wikipedia articles for passages that answer the query
|
|
|
226 |
if validators.url(url_text):
|
227 |
# if input is URL
|
228 |
title, text = extract_text_from_url(url_text)
|
229 |
+
passages = preprocess_plain_text(text, window_size=3)
|
230 |
|
231 |
elif upload_doc:
|
232 |
|
233 |
text, pdf_title = extract_text_from_file(upload_doc)
|
234 |
+
passages = preprocess_plain_text(text, window_size=3)
|
235 |
|
236 |
col1, col2 = st.columns(2)
|
237 |
|