xavierbarbier commited on
Commit
61f44e7
·
verified ·
1 Parent(s): 447319c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -1
app.py CHANGED
@@ -48,7 +48,7 @@ embeddings = HuggingFaceEmbeddings(
48
  encode_kwargs=encode_kwargs
49
  )
50
 
51
- chunk_size = 500
52
 
53
  # creating a pdf reader object
54
 
@@ -60,10 +60,29 @@ def get_text_embedding(text):
60
 
61
  return embeddings.embed_query(text)
62
 
 
 
63
  doc_path = hf_hub_download(repo_id="xavierbarbier/rag_ngap", filename="resource/embeddings_ngap.faiss", repo_type="space")
64
 
65
  index = faiss.read_index(doc_path)
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def qa(question):
68
 
69
 
 
48
  encode_kwargs=encode_kwargs
49
  )
50
 
51
+
52
 
53
  # creating a pdf reader object
54
 
 
60
 
61
  return embeddings.embed_query(text)
62
 
63
+
64
+ # FAISS index
65
  doc_path = hf_hub_download(repo_id="xavierbarbier/rag_ngap", filename="resource/embeddings_ngap.faiss", repo_type="space")
66
 
67
  index = faiss.read_index(doc_path)
68
 
69
+ # Chunks
70
+ doc_path = hf_hub_download(repo_id="xavierbarbier/rag_ngap", filename="resource/NGAP 01042024.pdf", repo_type="space")
71
+
72
+ reader = PdfReader(doc_path)
73
+
74
+ text = []
75
+ for p in np.arange(0, len(reader.pages), 1):
76
+ page = reader.pages[int(p)]
77
+
78
+ # extracting text from page
79
+ text.append(page.extract_text())
80
+
81
+ text = ' '.join(text)
82
+
83
+ chunk_size = 2048
84
+ chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
85
+
86
  def qa(question):
87
 
88