Spaces:

not-lain
/

PDF-Search-Engine

Running

not-lain commited on Dec 14, 2023

Commit

6ae3bc4

1 Parent(s): 320f164

fixed layout

Files changed (1) hide show

app.py CHANGED Viewed

@@ -50,12 +50,12 @@ def process_pdfs(parent_dir: Union[str,list]):
             # 512 is related to the positional encoding "facebook/dpr-ctx_encoder-single-nq-base" model
             file_name = file_path.split("/")[-1]
             if len(txt) < 512 :
-                new_data = {"title":f"{file_name}-page-{i}","text":txt}
-                df = df.append(new_data,ignore_index=True)
             else :
                 while len(txt) > 512 :
-                    new_data = {"title":f"{file_name}-pg{i}","text":txt[:512]}
-                    df = df.append(new_data,ignore_index=True)
                     txt = txt[512:]
         # closing the pdf file object
@@ -101,15 +101,16 @@ def predict(query,file_paths, k=3):
     return out
 with gr.Blocks() as demo :
-    with gr.Column():
-        gr.Markdown("## PDF Search Engine")
-        files = gr.Files(label="Upload PDFs",type="filepath",file_count="multiple")
-        query = gr.Text(label="query")
-        with gr.Accordion():
-            k = gr.Number(label="number of results",value=3)
-        button = gr.Button("search")
-    with gr.Column():
-        output = gr.Textbox(label="output")
     button.click(predict, [query,files,k],outputs=output)
 demo.launch()

             # 512 is related to the positional encoding "facebook/dpr-ctx_encoder-single-nq-base" model
             file_name = file_path.split("/")[-1]
             if len(txt) < 512 :
+                new_data = pd.DataFrame([[f"{file_name}-page-{i}",txt]],columns=["title","text"])
+                df = pd.concat([df,new_data],ignore_index=True)
             else :
                 while len(txt) > 512 :
+                    new_data = pd.DataFrame([[f"{file_name}-page-{i}",txt[:512]]],columns=["title","text"])
+                    df = pd.concat([df,new_data],ignore_index=True)
                     txt = txt[512:]
         # closing the pdf file object
     return out
 with gr.Blocks() as demo :
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("## PDF Search Engine")
+            files = gr.Files(label="Upload PDFs",type="filepath",file_count="multiple")
+            query = gr.Text(label="query")
+            with gr.Accordion("number of references",open=False):
+                k = gr.Number(value=3)
+            button = gr.Button("search")
+        with gr.Column():
+            output = gr.Textbox(label="output")
     button.click(predict, [query,files,k],outputs=output)
 demo.launch()