Spaces:

Jai12345
/

App

Runtime error

App Files Files Community

Jai12345 commited on Jul 27, 2022

Commit

dcd92b3

1 Parent(s): b535db4

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -11

app.py CHANGED Viewed

@@ -87,10 +87,10 @@ def preprocess_plain_text(text, window_size=3):
     # #break multi-headlines into a line each
     chunks = [phrase.strip() for line in lines for phrase in line.split("  ")]
-    # # drop blank lines
     text = '\n'.join(chunk for chunk in chunks if chunk)
-    ## We split this article into paragraphs and then every paragraph into sentences
     paragraphs = []
     for paragraph in text.replace('\n', ' ').split("\n\n"):
         if len(paragraph.strip()) > 0:
@@ -106,15 +106,13 @@ def preprocess_plain_text(text, window_size=3):
     return passages
-def bi_encode(bi_enc, passages):
     global bi_encoder
     # We use the Bi-Encoder to encode all passages, so that we can use it with sematic search
-    bi_encoder = SentenceTransformer(bi_enc)
-    # quantize the model
-    # bi_encoder = quantize_dynamic(model, {Linear, Embedding})
-    # Compute the embeddings using the multi-process pool
     with st.spinner('Encoding passages into a vector space...'):
         corpus_embeddings = bi_encoder.encode(passages, convert_to_tensor=True, show_progress_bar=True)
@@ -142,8 +140,7 @@ def display_as_table(model, score='score'):
 st.title("Search Your Query Here")
 window_size = 3
-bi_encoder_type="multi-qa-mpnet-base-dot-v1"
-# This function will search all wikipedia articles for passages that answer the query
 def search_func(query):
     global bi_encoder, cross_encoder
@@ -157,7 +154,7 @@ def search_func(query):
         st.write(f"Document Header: {pdf_title}")
-    # Encode the query using the bi-encoder and find potentially relevant passages
     question_embedding = bi_encoder.encode(query, convert_to_tensor=True)
     question_embedding = question_embedding.cpu()
     hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=2, score_function=util.dot_score)
@@ -224,7 +221,7 @@ if search:
         with st.spinner(
                 text=f"Loading..........................."
         ):
-            bi_encoder, corpus_embeddings = bi_encode(bi_encoder_type, passages)
             cross_encoder = cross_encode()
         with st.spinner(

     # #break multi-headlines into a line each
     chunks = [phrase.strip() for line in lines for phrase in line.split("  ")]
+    # drop blank lines
     text = '\n'.join(chunk for chunk in chunks if chunk)
+    # We split this article into paragraphs and then every paragraph into sentences
     paragraphs = []
     for paragraph in text.replace('\n', ' ').split("\n\n"):
         if len(paragraph.strip()) > 0:
     return passages
+def bi_encode(passages):
     global bi_encoder
     # We use the Bi-Encoder to encode all passages, so that we can use it with sematic search
+    bi_encoder = SentenceTransformer("multi-qa-mpnet-base-dot-v1")
+    # Compute the embeddings
     with st.spinner('Encoding passages into a vector space...'):
         corpus_embeddings = bi_encoder.encode(passages, convert_to_tensor=True, show_progress_bar=True)
 st.title("Search Your Query Here")
 window_size = 3
+# This will search articles for passages to answer the query
 def search_func(query):
     global bi_encoder, cross_encoder
         st.write(f"Document Header: {pdf_title}")
+    # Encode the query using the bi-encoder and find relevant answers
     question_embedding = bi_encoder.encode(query, convert_to_tensor=True)
     question_embedding = question_embedding.cpu()
     hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=2, score_function=util.dot_score)
         with st.spinner(
                 text=f"Loading..........................."
         ):
+            bi_encoder, corpus_embeddings = bi_encode(passages)
             cross_encoder = cross_encode()
         with st.spinner(