Jai12345 commited on
Commit
dcd92b3
·
1 Parent(s): b535db4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -11
app.py CHANGED
@@ -87,10 +87,10 @@ def preprocess_plain_text(text, window_size=3):
87
  # #break multi-headlines into a line each
88
  chunks = [phrase.strip() for line in lines for phrase in line.split(" ")]
89
 
90
- # # drop blank lines
91
  text = '\n'.join(chunk for chunk in chunks if chunk)
92
 
93
- ## We split this article into paragraphs and then every paragraph into sentences
94
  paragraphs = []
95
  for paragraph in text.replace('\n', ' ').split("\n\n"):
96
  if len(paragraph.strip()) > 0:
@@ -106,15 +106,13 @@ def preprocess_plain_text(text, window_size=3):
106
  return passages
107
 
108
 
109
- def bi_encode(bi_enc, passages):
110
  global bi_encoder
111
  # We use the Bi-Encoder to encode all passages, so that we can use it with sematic search
112
- bi_encoder = SentenceTransformer(bi_enc)
113
 
114
- # quantize the model
115
- # bi_encoder = quantize_dynamic(model, {Linear, Embedding})
116
 
117
- # Compute the embeddings using the multi-process pool
118
  with st.spinner('Encoding passages into a vector space...'):
119
  corpus_embeddings = bi_encoder.encode(passages, convert_to_tensor=True, show_progress_bar=True)
120
 
@@ -142,8 +140,7 @@ def display_as_table(model, score='score'):
142
  st.title("Search Your Query Here")
143
  window_size = 3
144
 
145
- bi_encoder_type="multi-qa-mpnet-base-dot-v1"
146
- # This function will search all wikipedia articles for passages that answer the query
147
  def search_func(query):
148
  global bi_encoder, cross_encoder
149
 
@@ -157,7 +154,7 @@ def search_func(query):
157
 
158
  st.write(f"Document Header: {pdf_title}")
159
 
160
- # Encode the query using the bi-encoder and find potentially relevant passages
161
  question_embedding = bi_encoder.encode(query, convert_to_tensor=True)
162
  question_embedding = question_embedding.cpu()
163
  hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=2, score_function=util.dot_score)
@@ -224,7 +221,7 @@ if search:
224
  with st.spinner(
225
  text=f"Loading..........................."
226
  ):
227
- bi_encoder, corpus_embeddings = bi_encode(bi_encoder_type, passages)
228
  cross_encoder = cross_encode()
229
 
230
  with st.spinner(
 
87
  # #break multi-headlines into a line each
88
  chunks = [phrase.strip() for line in lines for phrase in line.split(" ")]
89
 
90
+ # drop blank lines
91
  text = '\n'.join(chunk for chunk in chunks if chunk)
92
 
93
+ # We split this article into paragraphs and then every paragraph into sentences
94
  paragraphs = []
95
  for paragraph in text.replace('\n', ' ').split("\n\n"):
96
  if len(paragraph.strip()) > 0:
 
106
  return passages
107
 
108
 
109
+ def bi_encode(passages):
110
  global bi_encoder
111
  # We use the Bi-Encoder to encode all passages, so that we can use it with sematic search
112
+ bi_encoder = SentenceTransformer("multi-qa-mpnet-base-dot-v1")
113
 
 
 
114
 
115
+ # Compute the embeddings
116
  with st.spinner('Encoding passages into a vector space...'):
117
  corpus_embeddings = bi_encoder.encode(passages, convert_to_tensor=True, show_progress_bar=True)
118
 
 
140
  st.title("Search Your Query Here")
141
  window_size = 3
142
 
143
+ # This will search articles for passages to answer the query
 
144
  def search_func(query):
145
  global bi_encoder, cross_encoder
146
 
 
154
 
155
  st.write(f"Document Header: {pdf_title}")
156
 
157
+ # Encode the query using the bi-encoder and find relevant answers
158
  question_embedding = bi_encoder.encode(query, convert_to_tensor=True)
159
  question_embedding = question_embedding.cpu()
160
  hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=2, score_function=util.dot_score)
 
221
  with st.spinner(
222
  text=f"Loading..........................."
223
  ):
224
+ bi_encoder, corpus_embeddings = bi_encode(passages)
225
  cross_encoder = cross_encode()
226
 
227
  with st.spinner(