Eric Marchand
commited on
Commit
·
590d088
1
Parent(s):
102dd78
Bugfix: create_vector dans HuggingFaceModel renvoie un np.ndarray
Browse files- src/model_huggingface.py +12 -6
- src/rag.py +35 -26
src/model_huggingface.py
CHANGED
@@ -38,12 +38,18 @@ class HuggingFaceModel(AModel):
|
|
38 |
raise
|
39 |
|
40 |
def create_vector(self, chunk:str)->list[float]:
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
def create_vectors(self, chunks:list[str])->list[list[float]]:
|
49 |
'''
|
|
|
38 |
raise
|
39 |
|
40 |
def create_vector(self, chunk:str)->list[float]:
|
41 |
+
try:
|
42 |
+
resp = self.model.feature_extraction(
|
43 |
+
text=chunk,
|
44 |
+
# normalize=True, # Only available on server powered by Text-Embedding-Inference.
|
45 |
+
model=self.feature_name, # normalisé ??
|
46 |
+
)
|
47 |
+
if isinstance(resp, np.ndarray):
|
48 |
+
return resp
|
49 |
+
else:
|
50 |
+
raise Exception("Error with embedding !")
|
51 |
+
except:
|
52 |
+
raise
|
53 |
|
54 |
def create_vectors(self, chunks:list[str])->list[list[float]]:
|
55 |
'''
|
src/rag.py
CHANGED
@@ -79,11 +79,11 @@ class Rag:
|
|
79 |
'''
|
80 |
vectors:list = []
|
81 |
tokens:int = 0
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
|
88 |
def load_pdf(self, file_name:str)->str:
|
89 |
''' Charge le fichier 'file_name' et renvoie son contenu sous forme de texte. '''
|
@@ -142,8 +142,11 @@ class Rag:
|
|
142 |
collection_name: Le nom de la collection dans laquelle il faut ajouter les chunks
|
143 |
La collection est créée si elle n'existe pas.
|
144 |
source: la source des chunks (nom du fichier, url ...)
|
145 |
-
'''
|
146 |
-
|
|
|
|
|
|
|
147 |
self.emb_store.add_to_collection(
|
148 |
collection_name=collection_name,
|
149 |
source=source,
|
@@ -160,7 +163,10 @@ class Rag:
|
|
160 |
Returns:
|
161 |
La réponse du llm_model
|
162 |
'''
|
163 |
-
|
|
|
|
|
|
|
164 |
|
165 |
def ask_rag(self, question:str, collection_name:str)->tuple[str, str, list[str], list[str]]:
|
166 |
'''
|
@@ -180,24 +186,27 @@ class Rag:
|
|
180 |
return "", "Error: No collection specified !", [], []
|
181 |
if not collection_name in self.emb_store.get_collection_names():
|
182 |
return "", "Error: {name} is no more in the database !".format(name=collection_name), [], []
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
|
|
|
|
|
|
201 |
def test_cours_TSTL()->None:
|
202 |
# Test placé ici pendant la mise au point
|
203 |
STORE_DIR = "./db/chroma_vectors"
|
|
|
79 |
'''
|
80 |
vectors:list = []
|
81 |
tokens:int = 0
|
82 |
+
try:
|
83 |
+
vectors:list[list[float]] = self.model.create_vectors(chunks) # batch si le model le permet
|
84 |
+
return vectors
|
85 |
+
except:
|
86 |
+
raise
|
87 |
|
88 |
def load_pdf(self, file_name:str)->str:
|
89 |
''' Charge le fichier 'file_name' et renvoie son contenu sous forme de texte. '''
|
|
|
142 |
collection_name: Le nom de la collection dans laquelle il faut ajouter les chunks
|
143 |
La collection est créée si elle n'existe pas.
|
144 |
source: la source des chunks (nom du fichier, url ...)
|
145 |
+
'''
|
146 |
+
try:
|
147 |
+
vectors = self.create_vectors(chunks=chunks)
|
148 |
+
except:
|
149 |
+
raise
|
150 |
self.emb_store.add_to_collection(
|
151 |
collection_name=collection_name,
|
152 |
source=source,
|
|
|
163 |
Returns:
|
164 |
La réponse du llm_model
|
165 |
'''
|
166 |
+
try:
|
167 |
+
return self.model.ask_llm(question=question)
|
168 |
+
except:
|
169 |
+
return "Error while comminicating with model !"
|
170 |
|
171 |
def ask_rag(self, question:str, collection_name:str)->tuple[str, str, list[str], list[str]]:
|
172 |
'''
|
|
|
186 |
return "", "Error: No collection specified !", [], []
|
187 |
if not collection_name in self.emb_store.get_collection_names():
|
188 |
return "", "Error: {name} is no more in the database !".format(name=collection_name), [], []
|
189 |
+
try:
|
190 |
+
# Transformer la 'question' en vecteur avec emb_model
|
191 |
+
query_vector:list[float] = self.model.create_vector(question)
|
192 |
+
# Récupérer les chunks du store similaires à la question
|
193 |
+
chunks, sources, ids = self.emb_store.get_similar_chunks(
|
194 |
+
query_vector=query_vector,
|
195 |
+
count=2,
|
196 |
+
collection_name=collection_name
|
197 |
+
)
|
198 |
+
# Préparer le prompt final à partir du prompt_template
|
199 |
+
prompt:str = self.prompt_template.format(
|
200 |
+
context="\n\n\n".join(chunks),
|
201 |
+
question=question
|
202 |
+
)
|
203 |
+
# demander au llm_model de répondre
|
204 |
+
resp:str = self.ask_llm(question=prompt)
|
205 |
+
|
206 |
+
return prompt, resp, sources, ids
|
207 |
+
except:
|
208 |
+
return "", "Error with communicating with model !", [], []
|
209 |
+
|
210 |
def test_cours_TSTL()->None:
|
211 |
# Test placé ici pendant la mise au point
|
212 |
STORE_DIR = "./db/chroma_vectors"
|