dfasd commited on
Commit
620b6be
·
verified ·
1 Parent(s): 697a8cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -42
app.py CHANGED
@@ -9,7 +9,10 @@ from langchain_core.runnables import RunnablePassthrough
9
  from langchain_openai import ChatOpenAI
10
  from langchain import hub
11
  from langchain_core.output_parsers import StrOutputParser
12
-
 
 
 
13
  # Load environment variables
14
  load_dotenv()
15
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
@@ -18,61 +21,64 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
18
  text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
19
  embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
20
  llm = ChatOpenAI(model="gpt-4-1106-preview", api_key=OPENAI_API_KEY)
21
- vectordb_path = './vector_db'
22
 
23
- # Load and process documents
24
- uploaded_files = ['airbus.pdf', 'annualreport2223.pdf']
25
  dbname = 'vector_db'
 
26
  vectorstore = None
27
 
28
- for file in uploaded_files:
29
- loader = PyPDFLoader(file)
30
- data = loader.load()
31
- texts = text_splitter.split_documents(data)
 
32
 
33
- if vectorstore is None:
34
- vectorstore = Chroma.from_documents(documents=texts, embedding=embeddings, persist_directory=os.path.join(vectordb_path, dbname))
35
- else:
36
- vectorstore.add_documents(texts)
37
 
38
- vectorstore.persist()
39
- retriever = vectorstore.as_retriever()
40
 
41
- # Load prompt template
42
- prompt = hub.pull("rlm/rag-prompt")
43
- print(prompt)
44
 
45
- def format_docs(docs):
46
- return "\n\n".join(doc.page_content for doc in docs)
 
 
47
 
48
- rag_chain = (
49
- {"context": retriever | format_docs, "question": RunnablePassthrough()}
50
- | prompt
51
- | llm
52
- | StrOutputParser()
53
- )
54
 
55
- # Gradio interface
56
- def rag_bot(query, chat_history):
57
- response = rag_chain.invoke({"input": query, "chat_history": chat_history})
58
- return response
59
 
60
- chatbot = gr.Chatbot(avatar_images=["user.jpg", "bot.png"], height=600)
61
- clear_but = gr.Button(value="Clear Chat")
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  def chat(query, chat_history):
64
  response = rag_bot(query, chat_history)
65
- chat_history.append((query, response))
66
- return chat_history, chat_history
67
-
68
- demo = gr.Interface(
69
- fn=chat,
70
- inputs=["text", "state"],
71
- outputs=["chatbot", "state"],
72
- title="RAG Chatbot Prototype",
73
- description="A Chatbot using Retrieval-Augmented Generation (RAG) with PDF files.",
74
- allow_flagging="never",
75
- )
76
 
77
  if __name__ == '__main__':
78
  demo.launch(debug=True, share=True)
 
9
  from langchain_openai import ChatOpenAI
10
  from langchain import hub
11
  from langchain_core.output_parsers import StrOutputParser
12
+ from langchain.chains import create_history_aware_retriever
13
+ from langchain.prompts import PromptTemplate
14
+ from langchain.chains.question_answering import load_qa_chain
15
+ import pydantic
16
  # Load environment variables
17
  load_dotenv()
18
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 
21
  text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
22
  embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
23
  llm = ChatOpenAI(model="gpt-4-1106-preview", api_key=OPENAI_API_KEY)
 
24
 
25
+ vectordb_path = './vector_db'
 
26
  dbname = 'vector_db'
27
+ uploaded_files = ['airbus.pdf', 'annualreport2223.pdf']
28
  vectorstore = None
29
 
30
+ def create_vectordb():
31
+ for file in uploaded_files:
32
+ loader = PyPDFLoader(file)
33
+ data = loader.load()
34
+ texts = text_splitter.split_documents(data)
35
 
36
+ if vectorstore is None:
37
+ vectorstore = Chroma.from_documents(documents=texts, embedding=embeddings, persist_directory=os.path.join(vectordb_path, dbname))
38
+ else:
39
+ vectorstore.add_documents(texts)
40
 
 
 
41
 
42
+ def rag_bot(query, chat_history):
43
+ print(f"Received query: {query}")
 
44
 
45
+ template = """Please answer to human's input based on context. If the input is not mentioned in context, output something like 'I don't know'.
46
+ Context: {context}
47
+ Human: {human_input}
48
+ Your Response as Chatbot:"""
49
 
50
+ prompt_s = PromptTemplate(
51
+ input_variables=["human_input", "context"],
52
+ template=template
53
+ )
 
 
54
 
55
+ # Initialize vector store
56
+ vectorstore = Chroma(persist_directory=os.path.join(vectordb_path), embedding_function=embeddings)
 
 
57
 
58
+ # prompt = hub.pull("langchain-ai/chat-langchain-rephrase")
59
+
60
+ docs = vectorstore.similarity_search(query)
61
+
62
+ try:
63
+ stuff_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt_s)
64
+ except pydantic.ValidationError as e:
65
+ print(f"Validation error: {e}")
66
+
67
+ output = stuff_chain({"input_documents": docs, "human_input": query}, return_only_outputs=False)
68
+
69
+ final_answer = output["output_text"]
70
+ print(f"Final Answer ---> {final_answer}")
71
+
72
+ return final_answer
73
 
74
  def chat(query, chat_history):
75
  response = rag_bot(query, chat_history)
76
+ # chat_history.append((query, response))
77
+ return response
78
+
79
+ chatbot = gr.Chatbot(avatar_images=["user.jpg", "bot.png"], height=600)
80
+ clear_but = gr.Button(value="Clear Chat")
81
+ demo = gr.ChatInterface(fn=chat, title="RAG Chatbot Prototype", multimodal=False, retry_btn=None, undo_btn=None, clear_btn=clear_but, chatbot=chatbot)
 
 
 
 
 
82
 
83
  if __name__ == '__main__':
84
  demo.launch(debug=True, share=True)