from langchain.chat_models import ChatOpenAI from langchain.embeddings.openai import OpenAIEmbeddings from langchain.retrievers import BM25Retriever,EnsembleRetriever from langchain.vectorstores import Qdrant from langchain.text_splitter import CharacterTextSplitter from langchain.chains import ConversationalRetrievalChain from langchain.memory import ConversationBufferMemory from qdrant_client import QdrantClient from qdrant_client.http import models as qdrant_models import os from langchain.document_loaders import PyPDFLoader import gradio as gr from langchain_qdrant import QdrantVectorStore, FastEmbedSparse, RetrievalMode openai_api_key = os.getenv('OPENAI_API_KEY') os.environ["OPENAI_API_KEY"] = openai_api_key pdf_folder_path = "files" documents = [] for filename in os.listdir(pdf_folder_path): if filename.endswith(".pdf"): file_path = os.path.join(pdf_folder_path, filename) loader = PyPDFLoader(file_path) documents.extend(loader.load()) text_splitter = CharacterTextSplitter(chunk_size=128, chunk_overlap=5) docs = text_splitter.split_documents(documents) embeddings = OpenAIEmbeddings() qdrant_client = QdrantClient(":memory:") qdrant_client.create_collection( collection_name="langchain_collection", vectors_config=qdrant_models.VectorParams(size=3000, distance=qdrant_models.Distance.COSINE) ) sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25") vector_store = QdrantVectorStore.from_documents( docs, embedding=embeddings, sparse_embedding=sparse_embeddings, location=":memory:", collection_name="langchain_collection", retrieval_mode=RetrievalMode.HYBRID, ) memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3}) keyword_retriever = BM25Retriever.from_documents(docs) keyword_retriever.k = 3 ensemble_retriever = EnsembleRetriever(retrievers=[retriever,keyword_retriever], weights=[0.5, 0.5]) llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.4) conversational_chain = ConversationalRetrievalChain.from_llm( llm=llm, retriever=ensemble_retriever, memory=memory, verbose=True ) def chat_with_ai(user_input, chat_history): response = conversational_chain({"question": user_input}) chat_history.append((user_input, response['answer'])) return chat_history, "" def clear_history(): return [], "" def gradio_chatbot(): with gr.Blocks() as demo: gr.Markdown("# Chat Interface for Langchain") chatbot = gr.Chatbot(label="Langchain Chatbot") user_input = gr.Textbox( placeholder="Ask a question...", label="Enter your question" ) submit_button = gr.Button("Send") btn_clear = gr.Button("Delete Context") chat_history = gr.State([]) submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input]) user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input]) btn_clear.click(fn=clear_history, outputs=[chatbot, user_input]) return demo gradio_chatbot().launch(debug=True)