Rahatara commited on
Commit
8f7a4d5
·
verified ·
1 Parent(s): 10d05a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -26,6 +26,8 @@ class MyApp:
26
  self.chat_history: list = []
27
  self.N: int = 0
28
  self.count: int = 0
 
 
29
 
30
  def __call__(self, file: str) -> Any:
31
  if self.count == 0:
@@ -35,29 +37,33 @@ class MyApp:
35
 
36
  def process_file(self, file: str):
37
  loader = PyMuPDFLoader(file.name)
38
- documents = loader.load()
39
  pattern = r"/([^/]+)$"
40
  match = re.search(pattern, file.name)
41
  try:
42
- file_name = match.group(1)
43
  except:
44
- file_name = os.path.basename(file)
45
- return documents, file_name
 
 
 
 
 
46
 
47
  def build_chain(self, file: str):
48
- documents, file_name = self.process_file(file)
49
  embeddings = OpenAIEmbeddings(openai_api_key=self.OPENAI_API_KEY)
50
  pdfsearch = Chroma.from_documents(
51
- documents,
52
  embeddings,
53
- collection_name=file_name,
54
  )
55
- chain = ConversationalRetrievalChain.from_llm(
56
  ChatOpenAI(temperature=0.0, openai_api_key=self.OPENAI_API_KEY),
57
  retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
58
  return_source_documents=True,
59
  )
60
- return chain
61
 
62
  def get_response(history, query, file):
63
  if not file:
 
26
  self.chat_history: list = []
27
  self.N: int = 0
28
  self.count: int = 0
29
+ self.documents = None
30
+ self.file_name = None
31
 
32
  def __call__(self, file: str) -> Any:
33
  if self.count == 0:
 
37
 
38
  def process_file(self, file: str):
39
  loader = PyMuPDFLoader(file.name)
40
+ self.documents = loader.load()
41
  pattern = r"/([^/]+)$"
42
  match = re.search(pattern, file.name)
43
  try:
44
+ self.file_name = match.group(1)
45
  except:
46
+ self.file_name = os.path.basename(file)
47
+ # Render the first page for display
48
+ doc = fitz.open(file.name)
49
+ page = doc[0]
50
+ pix = page.get_pixmap(dpi=150)
51
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
52
+ return image
53
 
54
  def build_chain(self, file: str):
 
55
  embeddings = OpenAIEmbeddings(openai_api_key=self.OPENAI_API_KEY)
56
  pdfsearch = Chroma.from_documents(
57
+ self.documents,
58
  embeddings,
59
+ collection_name=self.file_name,
60
  )
61
+ self.chain = ConversationalRetrievalChain.from_llm(
62
  ChatOpenAI(temperature=0.0, openai_api_key=self.OPENAI_API_KEY),
63
  retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
64
  return_source_documents=True,
65
  )
66
+ return "Vector database built successfully!"
67
 
68
  def get_response(history, query, file):
69
  if not file: