TI_RAG_Demo_L3.1

Sleeping

App Files Files Community

syedmudassir16 commited on Sep 4, 2024

Commit

e085441

verified ·

1 Parent(s): 3cba93e

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -28

app.py CHANGED Viewed

@@ -14,7 +14,6 @@ import json
 import gradio as gr
 import re
 from threading import Thread
-from transformers.agents import Tool, HfEngine, ReactJsonAgent
 class DocumentRetrievalAndGeneration:
     def __init__(self, embedding_model_name, lm_model_id, data_folder):
@@ -23,7 +22,6 @@ class DocumentRetrievalAndGeneration:
         self.gpu_index = self.create_faiss_index()
         self.tokenizer, self.model = self.initialize_llm(lm_model_id)
         self.retriever_tool = self.create_retriever_tool()
-        self.agent = self.create_agent()
     def load_documents(self, folder_path):
         loader = DirectoryLoader(folder_path, loader_cls=TextLoader)
@@ -89,22 +87,11 @@ class DocumentRetrievalAndGeneration:
             return "Text generation process encountered an error"
     def create_retriever_tool(self):
-        class RetrieverTool(Tool):
-            name = "retriever"
-            description = "Retrieves documents from the knowledge base that are semantically similar to the input query."
-            inputs = {
-                "query": {
-                    "type": "text",
-                    "description": "The query to perform. Use affirmative form rather than a question.",
-                }
-            }
-            output_type = "text"
-            def __init__(self, parent, **kwargs):
-                super().__init__(**kwargs)
                 self.parent = parent
-            def forward(self, query: str) -> str:
                 similarityThreshold = 1
                 query_embedding = self.parent.embeddings.encode(query, convert_to_tensor=True).cpu().numpy()
                 distances, indices = self.parent.gpu_index.search(np.array([query_embedding]), k=3)
@@ -117,22 +104,23 @@ class DocumentRetrievalAndGeneration:
         return RetrieverTool(self)
-    def create_agent(self):
-        llm_engine = HfEngine("meta-llama/Meta-Llama-3.1-8B-Instruct")
-        return ReactJsonAgent(tools=[self.retriever_tool], llm_engine=llm_engine, max_iterations=4, verbose=2)
     def run_agentic_rag(self, question: str) -> str:
-        enhanced_question = f"""Using the information in your knowledge base, accessible with the 'retriever' tool,
-give a comprehensive answer to the question below.
 Respond only to the question asked, be concise and relevant.
-If you can't find information, try calling your retriever again with different arguments.
-Make sure to cover the question completely by calling the retriever tool several times with semantically different queries.
-Your queries should be in affirmative form, not questions.
-Question:
-{question}"""
-        return self.agent.run(enhanced_question)
     def query_and_generate_response(self, query):
         # Standard RAG

 import gradio as gr
 import re
 from threading import Thread
 class DocumentRetrievalAndGeneration:
     def __init__(self, embedding_model_name, lm_model_id, data_folder):
         self.gpu_index = self.create_faiss_index()
         self.tokenizer, self.model = self.initialize_llm(lm_model_id)
         self.retriever_tool = self.create_retriever_tool()
     def load_documents(self, folder_path):
         loader = DirectoryLoader(folder_path, loader_cls=TextLoader)
             return "Text generation process encountered an error"
     def create_retriever_tool(self):
+        class RetrieverTool:
+            def __init__(self, parent):
                 self.parent = parent
+            def run(self, query: str) -> str:
                 similarityThreshold = 1
                 query_embedding = self.parent.embeddings.encode(query, convert_to_tensor=True).cpu().numpy()
                 distances, indices = self.parent.gpu_index.search(np.array([query_embedding]), k=3)
         return RetrieverTool(self)
     def run_agentic_rag(self, question: str) -> str:
+        retriever_output = self.retriever_tool.run(question)
+        enhanced_prompt = f"""Using the following information retrieved from the knowledge base:
+{retriever_output}
+Give a comprehensive answer to the question below.
 Respond only to the question asked, be concise and relevant.
+If you can't find information, say "No relevant information found."
+Question: {question}
+Answer:"""
+        input_ids = self.tokenizer.encode(enhanced_prompt, return_tensors="pt").to(self.model.device)
+        return self.generate_response_with_timeout(input_ids)
     def query_and_generate_response(self, query):
         # Standard RAG