fedor-ch commited on
Commit
b65cb58
·
1 Parent(s): 4bd3786

Add handling multiple PDFs

Browse files
Files changed (1) hide show
  1. app.py +27 -10
app.py CHANGED
@@ -3,31 +3,48 @@ import os
3
  import time
4
 
5
  from langchain.document_loaders import OnlinePDFLoader
6
-
7
  from langchain.text_splitter import CharacterTextSplitter
8
-
9
-
10
  from langchain.llms import OpenAI
11
-
12
  from langchain.embeddings import OpenAIEmbeddings
 
 
 
13
 
14
 
15
- from langchain.vectorstores import Chroma
 
 
 
 
16
 
17
- from langchain.chains import ConversationalRetrievalChain
 
 
 
 
 
 
 
 
 
18
 
19
  def loading_pdf():
20
  return "Loading..."
21
 
22
- def pdf_changes(pdf_doc, open_ai_key):
 
23
  if openai_key is not None:
24
  os.environ['OPENAI_API_KEY'] = open_ai_key
25
- loader = OnlinePDFLoader(pdf_doc.name)
26
- documents = loader.load()
 
 
 
 
27
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
28
  texts = text_splitter.split_documents(documents)
29
  embeddings = OpenAIEmbeddings()
30
- db = Chroma.from_documents(texts, embeddings)
31
  retriever = db.as_retriever()
32
  global qa
33
  qa = ConversationalRetrievalChain.from_llm(
 
3
  import time
4
 
5
  from langchain.document_loaders import OnlinePDFLoader
 
6
  from langchain.text_splitter import CharacterTextSplitter
 
 
7
  from langchain.llms import OpenAI
 
8
  from langchain.embeddings import OpenAIEmbeddings
9
+ from langchain.vectorstores import Chroma
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ from langchain import PromptTemplate
12
 
13
 
14
+ _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
15
+ Chat History:
16
+ {chat_history}
17
+ Follow Up Input: {question}
18
+ Standalone question:"""
19
 
20
+ CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
21
+
22
+ template = """
23
+ You are given the following extracted parts of a long document and a question. Provide a short structured answer.
24
+ If you don't know the answer, look on the web. Don't try to make up an answer.
25
+ Question: {question}
26
+ =========
27
+ {context}
28
+ =========
29
+ Answer in Markdown:"""
30
 
31
  def loading_pdf():
32
  return "Loading..."
33
 
34
+
35
+ def pdf_changes(pdf_docs, open_ai_key):
36
  if openai_key is not None:
37
  os.environ['OPENAI_API_KEY'] = open_ai_key
38
+ documents = []
39
+ for file in pdf_docs:
40
+ loader = OnlinePDFLoader(file.name)
41
+ documents.extend(loader.load_and_split())
42
+ print(f'{file} loaded')
43
+
44
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
45
  texts = text_splitter.split_documents(documents)
46
  embeddings = OpenAIEmbeddings()
47
+ db = Chroma.from_documents(documents, embeddings)
48
  retriever = db.as_retriever()
49
  global qa
50
  qa = ConversationalRetrievalChain.from_llm(