antoniorached commited on
Commit
e585fa4
·
verified ·
1 Parent(s): b0edbc8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_openai import ChatOpenAI
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain_community.vectorstores import Chroma
6
+ from langchain_community.document_loaders import PyPDFLoader
7
+ from langchain.chains import ConversationalRetrievalChain
8
+ from langchain_community.chat_message_histories import ChatMessageHistory
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain_core.prompts import PromptTemplate
11
+
12
+ # Access the OpenAI API key from the environment
13
+ open_ai_key = os.getenv("OPENAI_API_KEY")
14
+
15
+ llm = ChatOpenAI(api_key=open_ai_key)
16
+
17
+ template = """Use the following pieces of information to answer the user's question.
18
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
19
+
20
+ Context: {context}
21
+ Question: {question}
22
+
23
+ Only return the helpful answer below and nothing else.
24
+ Helpful answer:
25
+ """
26
+
27
+ prompt = PromptTemplate(template=template, input_variables=["context", "question"])
28
+
29
+
30
+ # Load and process the PDF
31
+ loader = PyPDFLoader(pdf_file.name)
32
+ pdf_data = loader.load()
33
+
34
+ # Split the text into chunks
35
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
36
+ docs = text_splitter.split_documents(pdf_data)
37
+
38
+ # Create a Chroma vector store
39
+ embeddings = HuggingFaceEmbeddings(model_name="embaas/sentence-transformers-multilingual-e5-base")
40
+ db = Chroma.from_documents(docs, embeddings)
41
+
42
+ # Initialize message history for conversation
43
+ message_history = ChatMessageHistory()
44
+
45
+ # Memory for conversational context
46
+ memory = ConversationBufferMemory(
47
+ memory_key="chat_history",
48
+ output_key="answer",
49
+ chat_memory=message_history,
50
+ return_messages=True,
51
+ )
52
+
53
+ # Create a chain that uses the Chroma vector store
54
+ chain = ConversationalRetrievalChain.from_llm(
55
+ llm=llm,
56
+ chain_type="stuff",
57
+ retriever=db.as_retriever(),
58
+ memory=memory,
59
+ return_source_documents=False,
60
+ combine_docs_chain_kwargs={'prompt': prompt}
61
+ )
62
+
63
+ # Process the question
64
+ res = chain({"question": question})
65
+ answer = res["answer"]
66
+