Spaces:

Rahatara
/

insta_rag

Sleeping

Rahatara commited on May 9, 2024

Commit

8f7a4d5

verified ·

1 Parent(s): 10d05a8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,6 +26,8 @@ class MyApp:
         self.chat_history: list = []
         self.N: int = 0
         self.count: int = 0
     def __call__(self, file: str) -> Any:
         if self.count == 0:
@@ -35,29 +37,33 @@ class MyApp:
     def process_file(self, file: str):
         loader = PyMuPDFLoader(file.name)
-        documents = loader.load()
         pattern = r"/([^/]+)$"
         match = re.search(pattern, file.name)
         try:
-            file_name = match.group(1)
         except:
-            file_name = os.path.basename(file)
-        return documents, file_name
     def build_chain(self, file: str):
-        documents, file_name = self.process_file(file)
         embeddings = OpenAIEmbeddings(openai_api_key=self.OPENAI_API_KEY)
         pdfsearch = Chroma.from_documents(
-            documents,
             embeddings,
-            collection_name=file_name,
         )
-        chain = ConversationalRetrievalChain.from_llm(
             ChatOpenAI(temperature=0.0, openai_api_key=self.OPENAI_API_KEY),
             retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
             return_source_documents=True,
         )
-        return chain
 def get_response(history, query, file):
     if not file:

         self.chat_history: list = []
         self.N: int = 0
         self.count: int = 0
+        self.documents = None
+        self.file_name = None
     def __call__(self, file: str) -> Any:
         if self.count == 0:
     def process_file(self, file: str):
         loader = PyMuPDFLoader(file.name)
+        self.documents = loader.load()
         pattern = r"/([^/]+)$"
         match = re.search(pattern, file.name)
         try:
+            self.file_name = match.group(1)
         except:
+            self.file_name = os.path.basename(file)
+        # Render the first page for display
+        doc = fitz.open(file.name)
+        page = doc[0]
+        pix = page.get_pixmap(dpi=150)
+        image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+        return image
     def build_chain(self, file: str):
         embeddings = OpenAIEmbeddings(openai_api_key=self.OPENAI_API_KEY)
         pdfsearch = Chroma.from_documents(
+            self.documents,
             embeddings,
+            collection_name=self.file_name,
         )
+        self.chain = ConversationalRetrievalChain.from_llm(
             ChatOpenAI(temperature=0.0, openai_api_key=self.OPENAI_API_KEY),
             retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
             return_source_documents=True,
         )
+        return "Vector database built successfully!"
 def get_response(history, query, file):
     if not file: