xavierbarbier commited on
Commit
c88d72c
·
verified ·
1 Parent(s): 892786f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -30
app.py CHANGED
@@ -51,53 +51,32 @@ chunk_size = 2048
51
 
52
  # creating a pdf reader object
53
 
54
- reader = PdfReader("./resource/NGAP 01042024.pdf")
55
-
56
- text = []
57
- for p in np.arange(0, len(reader.pages), 1):
58
- page = reader.pages[int(p)]
59
-
60
- # extracting text from page
61
- text.append(page.extract_text())
62
-
63
- text = ' '.join(text)
64
-
65
- chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
66
-
67
-
68
- def get_text_embedding(text):
69
-
70
- return embeddings.embed_query(text)
71
-
72
- text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])
73
-
74
- d = text_embeddings.shape[1]
75
- index = faiss.IndexFlatL2(d)
76
- index.add(text_embeddings)
77
 
78
  print("Finish the model init process")
79
 
80
  def qa(question: str) -> str:
81
 
82
-
83
 
84
- question_embeddings = np.array([get_text_embedding(question)])
85
 
86
- D, I = index.search(question_embeddings, k=2) # distance, index
87
- retrieved_chunk = [chunks[i] for i in I.tolist()[0]]
88
 
89
  prompt = f"""<s>[INST]
90
  Les informations contextuelles sont ci-dessous.
91
  ---------------------
92
- {retrieved_chunk}
93
  ---------------------
94
  [/INST]
95
  Compte tenu des informations contextuelles et non des connaissances préalables, répondez à la requête. </s>
96
  [INST] Requête: {question} [/INST]
97
  Réponse:
98
  """
99
- outputs = model.generate(prompt=prompt, temp=0.5, top_k = 40, top_p = 1, max_tokens = max_new_tokens)
100
- return "".join(outputs)
101
 
102
 
103
  demo = gr.Interface(
 
51
 
52
  # creating a pdf reader object
53
 
54
+ vectordb = Chroma(
55
+ persist_directory="./resource/chroma/",
56
+ embedding_function=embeddings
57
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  print("Finish the model init process")
60
 
61
  def qa(question: str) -> str:
62
 
 
63
 
64
+ docs = vectordb.max_marginal_relevance_search(question,k=1)
65
 
66
+
 
67
 
68
  prompt = f"""<s>[INST]
69
  Les informations contextuelles sont ci-dessous.
70
  ---------------------
71
+ {docs[0].page_content}
72
  ---------------------
73
  [/INST]
74
  Compte tenu des informations contextuelles et non des connaissances préalables, répondez à la requête. </s>
75
  [INST] Requête: {question} [/INST]
76
  Réponse:
77
  """
78
+ #outputs = model.generate(prompt=prompt, temp=0.5, top_k = 40, top_p = 1, max_tokens = max_new_tokens)
79
+ return prompt #"".join(outputs)
80
 
81
 
82
  demo = gr.Interface(