Spaces:

ricoh51
/

Ragnar

Sleeping

App Files Files Community

Eric Marchand commited on Dec 13, 2024

Commit

590d088

1 Parent(s): 102dd78

Bugfix: create_vector dans HuggingFaceModel renvoie un np.ndarray

Browse files

Files changed (2) hide show

src/model_huggingface.py +12 -6
src/rag.py +35 -26

src/model_huggingface.py CHANGED Viewed

@@ -38,12 +38,18 @@ class HuggingFaceModel(AModel):
             raise
     def create_vector(self, chunk:str)->list[float]:
-        resp = self.model.feature_extraction(
-            text=chunk,
-            # normalize=True, # Only available on server powered by Text-Embedding-Inference.
-            model=self.feature_name, # normalisé ??
-        )
-        return resp
     def create_vectors(self, chunks:list[str])->list[list[float]]:
         '''

             raise
     def create_vector(self, chunk:str)->list[float]:
+        try:
+            resp = self.model.feature_extraction(
+                text=chunk,
+                # normalize=True, # Only available on server powered by Text-Embedding-Inference.
+                model=self.feature_name, # normalisé ??
+            )
+            if isinstance(resp, np.ndarray):
+                return resp
+            else:
+                raise Exception("Error with embedding !")
+        except:
+            raise
     def create_vectors(self, chunks:list[str])->list[list[float]]:
         '''

src/rag.py CHANGED Viewed

@@ -79,11 +79,11 @@ class Rag:
         '''
         vectors:list = []
         tokens:int = 0
-        vectors:list[list[float]] = self.model.create_vectors(chunks) # batch si le model le permet
-        # for chunk in chunks:
-        #     vector:list[float] = self.model.create_vector(chunk=chunk)
-        #     vectors.append(vector)
-        return vectors
     def load_pdf(self, file_name:str)->str:
         ''' Charge le fichier 'file_name' et renvoie son contenu sous forme de texte. '''
@@ -142,8 +142,11 @@ class Rag:
                 collection_name: Le nom de la collection dans laquelle il faut ajouter les chunks
                     La collection est créée si elle n'existe pas.
                 source: la source des chunks (nom du fichier, url ...)
-        '''
-        vectors = self.create_vectors(chunks=chunks)
         self.emb_store.add_to_collection(
             collection_name=collection_name,
             source=source,
@@ -160,7 +163,10 @@ class Rag:
             Returns:
                 La réponse du llm_model
         '''
-        return self.model.ask_llm(question=question)
     def ask_rag(self, question:str, collection_name:str)->tuple[str, str, list[str], list[str]]:
         '''
@@ -180,24 +186,27 @@ class Rag:
             return "", "Error: No collection specified !", [], []
         if not collection_name in self.emb_store.get_collection_names():
             return "", "Error: {name} is no more in the database !".format(name=collection_name), [], []
-        # Transformer la 'question' en vecteur avec emb_model
-        query_vector:list[float] = self.model.create_vector(question)
-        # Récupérer les chunks du store similaires à la question
-        chunks, sources, ids = self.emb_store.get_similar_chunks(
-            query_vector=query_vector,
-            count=2,
-            collection_name=collection_name
-        )
-        # Préparer le prompt final à partir du prompt_template
-        prompt:str = self.prompt_template.format(
-            context="\n\n\n".join(chunks),
-            question=question
-        )
-        # demander au llm_model de répondre
-        resp:str = self.ask_llm(question=prompt)
-        return prompt, resp, sources, ids
 def test_cours_TSTL()->None:
     # Test placé ici pendant la mise au point
     STORE_DIR = "./db/chroma_vectors"

         '''
         vectors:list = []
         tokens:int = 0
+        try:
+            vectors:list[list[float]] = self.model.create_vectors(chunks) # batch si le model le permet
+            return vectors
+        except:
+            raise
     def load_pdf(self, file_name:str)->str:
         ''' Charge le fichier 'file_name' et renvoie son contenu sous forme de texte. '''
                 collection_name: Le nom de la collection dans laquelle il faut ajouter les chunks
                     La collection est créée si elle n'existe pas.
                 source: la source des chunks (nom du fichier, url ...)
+        '''
+        try:
+            vectors = self.create_vectors(chunks=chunks)
+        except:
+            raise
         self.emb_store.add_to_collection(
             collection_name=collection_name,
             source=source,
             Returns:
                 La réponse du llm_model
         '''
+        try:
+            return self.model.ask_llm(question=question)
+        except:
+            return "Error while comminicating with model !"
     def ask_rag(self, question:str, collection_name:str)->tuple[str, str, list[str], list[str]]:
         '''
             return "", "Error: No collection specified !", [], []
         if not collection_name in self.emb_store.get_collection_names():
             return "", "Error: {name} is no more in the database !".format(name=collection_name), [], []
+        try:
+            # Transformer la 'question' en vecteur avec emb_model
+            query_vector:list[float] = self.model.create_vector(question)
+            # Récupérer les chunks du store similaires à la question
+            chunks, sources, ids = self.emb_store.get_similar_chunks(
+                query_vector=query_vector,
+                count=2,
+                collection_name=collection_name
+            )
+            # Préparer le prompt final à partir du prompt_template
+            prompt:str = self.prompt_template.format(
+                context="\n\n\n".join(chunks),
+                question=question
+            )
+            # demander au llm_model de répondre
+            resp:str = self.ask_llm(question=prompt)
+            return prompt, resp, sources, ids
+        except:
+            return "", "Error with communicating with model !", [], []
 def test_cours_TSTL()->None:
     # Test placé ici pendant la mise au point
     STORE_DIR = "./db/chroma_vectors"