Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -77,20 +77,44 @@ def extract_text(file):
|
|
77 |
|
78 |
return text
|
79 |
|
80 |
-
def
|
81 |
|
82 |
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
|
83 |
|
84 |
text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
-
return
|
87 |
|
88 |
with gr.Blocks() as demo:
|
89 |
file_input = gr.File(label="Upload a PDF file")
|
|
|
90 |
text_output = gr.Textbox(label="Extracted Text")
|
91 |
-
|
|
|
|
|
|
|
92 |
file_input.upload(extract_text, inputs=file_input, outputs=text_output)
|
93 |
-
text_output.change(
|
|
|
94 |
|
95 |
|
96 |
if __name__ == "__main__":
|
|
|
77 |
|
78 |
return text
|
79 |
|
80 |
+
def qa(text, question):
|
81 |
|
82 |
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
|
83 |
|
84 |
text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])
|
85 |
+
|
86 |
+
d = text_embeddings.shape[1]
|
87 |
+
index = faiss.IndexFlatL2(d)
|
88 |
+
index.add(text_embeddings)
|
89 |
+
|
90 |
+
question_embeddings = np.array([get_text_embedding(question)])
|
91 |
+
|
92 |
+
D, I = index.search(question_embeddings, k=2) # distance, index
|
93 |
+
retrieved_chunk = [chunks[i] for i in I.tolist()[0]]
|
94 |
+
|
95 |
+
prompt = f"""
|
96 |
+
Context information is below.
|
97 |
+
---------------------
|
98 |
+
{retrieved_chunk}
|
99 |
+
---------------------
|
100 |
+
Given the context information and not prior knowledge, answer the query.
|
101 |
+
Query: {question}
|
102 |
+
Answer:
|
103 |
+
"""
|
104 |
|
105 |
+
return prompt
|
106 |
|
107 |
with gr.Blocks() as demo:
|
108 |
file_input = gr.File(label="Upload a PDF file")
|
109 |
+
question_input = gr.Textbox(label="Question")
|
110 |
text_output = gr.Textbox(label="Extracted Text")
|
111 |
+
|
112 |
+
promp_output = gr.Textbox(label="prompt")
|
113 |
+
|
114 |
+
|
115 |
file_input.upload(extract_text, inputs=file_input, outputs=text_output)
|
116 |
+
text_output.change(qa,[text_output,question_input],promp_output)
|
117 |
+
|
118 |
|
119 |
|
120 |
if __name__ == "__main__":
|