Update app.py
Browse files
app.py
CHANGED
@@ -26,6 +26,8 @@ class MyApp:
|
|
26 |
self.chat_history: list = []
|
27 |
self.N: int = 0
|
28 |
self.count: int = 0
|
|
|
|
|
29 |
|
30 |
def __call__(self, file: str) -> Any:
|
31 |
if self.count == 0:
|
@@ -35,29 +37,33 @@ class MyApp:
|
|
35 |
|
36 |
def process_file(self, file: str):
|
37 |
loader = PyMuPDFLoader(file.name)
|
38 |
-
documents = loader.load()
|
39 |
pattern = r"/([^/]+)$"
|
40 |
match = re.search(pattern, file.name)
|
41 |
try:
|
42 |
-
file_name = match.group(1)
|
43 |
except:
|
44 |
-
file_name = os.path.basename(file)
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
def build_chain(self, file: str):
|
48 |
-
documents, file_name = self.process_file(file)
|
49 |
embeddings = OpenAIEmbeddings(openai_api_key=self.OPENAI_API_KEY)
|
50 |
pdfsearch = Chroma.from_documents(
|
51 |
-
documents,
|
52 |
embeddings,
|
53 |
-
collection_name=file_name,
|
54 |
)
|
55 |
-
chain = ConversationalRetrievalChain.from_llm(
|
56 |
ChatOpenAI(temperature=0.0, openai_api_key=self.OPENAI_API_KEY),
|
57 |
retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
|
58 |
return_source_documents=True,
|
59 |
)
|
60 |
-
return
|
61 |
|
62 |
def get_response(history, query, file):
|
63 |
if not file:
|
|
|
26 |
self.chat_history: list = []
|
27 |
self.N: int = 0
|
28 |
self.count: int = 0
|
29 |
+
self.documents = None
|
30 |
+
self.file_name = None
|
31 |
|
32 |
def __call__(self, file: str) -> Any:
|
33 |
if self.count == 0:
|
|
|
37 |
|
38 |
def process_file(self, file: str):
|
39 |
loader = PyMuPDFLoader(file.name)
|
40 |
+
self.documents = loader.load()
|
41 |
pattern = r"/([^/]+)$"
|
42 |
match = re.search(pattern, file.name)
|
43 |
try:
|
44 |
+
self.file_name = match.group(1)
|
45 |
except:
|
46 |
+
self.file_name = os.path.basename(file)
|
47 |
+
# Render the first page for display
|
48 |
+
doc = fitz.open(file.name)
|
49 |
+
page = doc[0]
|
50 |
+
pix = page.get_pixmap(dpi=150)
|
51 |
+
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
52 |
+
return image
|
53 |
|
54 |
def build_chain(self, file: str):
|
|
|
55 |
embeddings = OpenAIEmbeddings(openai_api_key=self.OPENAI_API_KEY)
|
56 |
pdfsearch = Chroma.from_documents(
|
57 |
+
self.documents,
|
58 |
embeddings,
|
59 |
+
collection_name=self.file_name,
|
60 |
)
|
61 |
+
self.chain = ConversationalRetrievalChain.from_llm(
|
62 |
ChatOpenAI(temperature=0.0, openai_api_key=self.OPENAI_API_KEY),
|
63 |
retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
|
64 |
return_source_documents=True,
|
65 |
)
|
66 |
+
return "Vector database built successfully!"
|
67 |
|
68 |
def get_response(history, query, file):
|
69 |
if not file:
|