Spaces:
Runtime error
Runtime error
qorgh346
commited on
Commit
·
dd9ce97
1
Parent(s):
7af777c
update app.py
Browse files
app.py
CHANGED
@@ -10,16 +10,26 @@ from langchain.memory import ConversationBufferMemory
|
|
10 |
from langchain.chains import ConversationalRetrievalChain
|
11 |
from htmlTemplates import css, bot_template, user_template
|
12 |
from langchain.llms import HuggingFaceHub, LlamaCpp,CTransformers # For loading transformer models.
|
13 |
-
|
|
|
14 |
def get_pdf_text(pdf_docs):
|
15 |
-
text = ''
|
16 |
# pdf_file_ = open(pdf_docs,'rb')
|
17 |
# text = "example hofjin"
|
18 |
-
pdf_reader = PdfReader(pdf_docs)
|
19 |
-
for page in pdf_reader.pages:
|
20 |
-
text += page.extract_text()
|
21 |
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
|
25 |
def get_text_chunks(text):
|
@@ -151,7 +161,7 @@ def main():
|
|
151 |
if st.button("Process"):
|
152 |
with st.spinner("Processing"):
|
153 |
# get pdf text
|
154 |
-
|
155 |
|
156 |
for file in docs:
|
157 |
print('file - type : ', file.type)
|
@@ -160,7 +170,7 @@ def main():
|
|
160 |
raw_text += get_text_file(file)
|
161 |
elif file.type in ['application/octet-stream', 'application/pdf']:
|
162 |
#file is .pdf
|
163 |
-
|
164 |
elif file.type == 'text/csv':
|
165 |
#file is .csv
|
166 |
raw_text += get_csv_file(file)
|
|
|
10 |
from langchain.chains import ConversationalRetrievalChain
|
11 |
from htmlTemplates import css, bot_template, user_template
|
12 |
from langchain.llms import HuggingFaceHub, LlamaCpp,CTransformers # For loading transformer models.
|
13 |
+
from langchain.document_loaders import PyPDFLoader
|
14 |
+
from tempfile import NamedTemporaryFile
|
15 |
def get_pdf_text(pdf_docs):
|
16 |
+
# text = ''
|
17 |
# pdf_file_ = open(pdf_docs,'rb')
|
18 |
# text = "example hofjin"
|
|
|
|
|
|
|
19 |
|
20 |
+
|
21 |
+
# for page in pdf_reader.pages:
|
22 |
+
# text += page.extract_text()
|
23 |
+
|
24 |
+
# return text
|
25 |
+
with NamedTemporaryFile() as temp_file:
|
26 |
+
temp_file.write(pdf_docs.getvalue())
|
27 |
+
temp_file.seek(0)
|
28 |
+
pdf_loader = PyPDFLoader(temp_file.name)
|
29 |
+
print('pdf_loader = ', pdf_loader)
|
30 |
+
pdf_doc = pdf_loader.load()
|
31 |
+
print('pdf_doc = ',pdf_doc)
|
32 |
+
return pdf_doc
|
33 |
|
34 |
|
35 |
def get_text_chunks(text):
|
|
|
161 |
if st.button("Process"):
|
162 |
with st.spinner("Processing"):
|
163 |
# get pdf text
|
164 |
+
doc_list = []
|
165 |
|
166 |
for file in docs:
|
167 |
print('file - type : ', file.type)
|
|
|
170 |
raw_text += get_text_file(file)
|
171 |
elif file.type in ['application/octet-stream', 'application/pdf']:
|
172 |
#file is .pdf
|
173 |
+
doc_list.append(get_pdf_text(file))
|
174 |
elif file.type == 'text/csv':
|
175 |
#file is .csv
|
176 |
raw_text += get_csv_file(file)
|