Spaces:
Sleeping
Sleeping
fixed layout
Browse files
app.py
CHANGED
@@ -50,12 +50,12 @@ def process_pdfs(parent_dir: Union[str,list]):
|
|
50 |
# 512 is related to the positional encoding "facebook/dpr-ctx_encoder-single-nq-base" model
|
51 |
file_name = file_path.split("/")[-1]
|
52 |
if len(txt) < 512 :
|
53 |
-
new_data =
|
54 |
-
df =
|
55 |
else :
|
56 |
while len(txt) > 512 :
|
57 |
-
new_data =
|
58 |
-
df =
|
59 |
txt = txt[512:]
|
60 |
|
61 |
# closing the pdf file object
|
@@ -101,15 +101,16 @@ def predict(query,file_paths, k=3):
|
|
101 |
return out
|
102 |
|
103 |
with gr.Blocks() as demo :
|
104 |
-
with gr.
|
105 |
-
gr.
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
113 |
button.click(predict, [query,files,k],outputs=output)
|
114 |
|
115 |
demo.launch()
|
|
|
50 |
# 512 is related to the positional encoding "facebook/dpr-ctx_encoder-single-nq-base" model
|
51 |
file_name = file_path.split("/")[-1]
|
52 |
if len(txt) < 512 :
|
53 |
+
new_data = pd.DataFrame([[f"{file_name}-page-{i}",txt]],columns=["title","text"])
|
54 |
+
df = pd.concat([df,new_data],ignore_index=True)
|
55 |
else :
|
56 |
while len(txt) > 512 :
|
57 |
+
new_data = pd.DataFrame([[f"{file_name}-page-{i}",txt[:512]]],columns=["title","text"])
|
58 |
+
df = pd.concat([df,new_data],ignore_index=True)
|
59 |
txt = txt[512:]
|
60 |
|
61 |
# closing the pdf file object
|
|
|
101 |
return out
|
102 |
|
103 |
with gr.Blocks() as demo :
|
104 |
+
with gr.Row():
|
105 |
+
with gr.Column():
|
106 |
+
gr.Markdown("## PDF Search Engine")
|
107 |
+
files = gr.Files(label="Upload PDFs",type="filepath",file_count="multiple")
|
108 |
+
query = gr.Text(label="query")
|
109 |
+
with gr.Accordion("number of references",open=False):
|
110 |
+
k = gr.Number(value=3)
|
111 |
+
button = gr.Button("search")
|
112 |
+
with gr.Column():
|
113 |
+
output = gr.Textbox(label="output")
|
114 |
button.click(predict, [query,files,k],outputs=output)
|
115 |
|
116 |
demo.launch()
|