Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -48,7 +48,7 @@ embeddings = HuggingFaceEmbeddings(
|
|
48 |
encode_kwargs=encode_kwargs
|
49 |
)
|
50 |
|
51 |
-
|
52 |
|
53 |
# creating a pdf reader object
|
54 |
|
@@ -60,10 +60,29 @@ def get_text_embedding(text):
|
|
60 |
|
61 |
return embeddings.embed_query(text)
|
62 |
|
|
|
|
|
63 |
doc_path = hf_hub_download(repo_id="xavierbarbier/rag_ngap", filename="resource/embeddings_ngap.faiss", repo_type="space")
|
64 |
|
65 |
index = faiss.read_index(doc_path)
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
def qa(question):
|
68 |
|
69 |
|
|
|
48 |
encode_kwargs=encode_kwargs
|
49 |
)
|
50 |
|
51 |
+
|
52 |
|
53 |
# creating a pdf reader object
|
54 |
|
|
|
60 |
|
61 |
return embeddings.embed_query(text)
|
62 |
|
63 |
+
|
64 |
+
# FAISS index
|
65 |
doc_path = hf_hub_download(repo_id="xavierbarbier/rag_ngap", filename="resource/embeddings_ngap.faiss", repo_type="space")
|
66 |
|
67 |
index = faiss.read_index(doc_path)
|
68 |
|
69 |
+
# Chunks
|
70 |
+
doc_path = hf_hub_download(repo_id="xavierbarbier/rag_ngap", filename="resource/NGAP 01042024.pdf", repo_type="space")
|
71 |
+
|
72 |
+
reader = PdfReader(doc_path)
|
73 |
+
|
74 |
+
text = []
|
75 |
+
for p in np.arange(0, len(reader.pages), 1):
|
76 |
+
page = reader.pages[int(p)]
|
77 |
+
|
78 |
+
# extracting text from page
|
79 |
+
text.append(page.extract_text())
|
80 |
+
|
81 |
+
text = ' '.join(text)
|
82 |
+
|
83 |
+
chunk_size = 2048
|
84 |
+
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
|
85 |
+
|
86 |
def qa(question):
|
87 |
|
88 |
|