import os import pickle from langchain.document_loaders import UnstructuredURLLoader from langchain.text_splitter import CharacterTextSplitter from InstructorEmbedding import INSTRUCTOR from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceInstructEmbeddings from langchain.chains import RetrievalQA from langchain import HuggingFaceHub import streamlit as st from langchain.utilities import GoogleSerperAPIWrapper class Chatbot: def __init__(self): os.environ["Hugging_Face_API_KEY"] = "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" os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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' os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee" def load_data(self): urls = [ 'https://zollege.in/exams/bitsat', 'https://zollege.in/exams/cat', 'https://zollege.in/exams/gate', 'https://zollege.in/exams/neet', 'https://zollege.in/exams/lsat', 'https://zollege.in/exams/jee-advanced', 'https://zollege.in/exams/aipmcet' ] loaders = UnstructuredURLLoader(urls=urls) data = loaders.load() return data def split_documents(self, data): text_splitter = CharacterTextSplitter(separator='\n', chunk_size=500, chunk_overlap=20) docs = text_splitter.split_documents(data) return docs def create_embeddings(self, docs): instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained") db_instructEmbedd = FAISS.from_documents(docs, instructor_embeddings) retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3}) with open("db_instructEmbedd.pkl", "wb") as f: pickle.dump(db_instructEmbedd, f) return retriever def load_embeddings(self): with open("db_instructEmbedd.pkl", "rb") as f: retriever = pickle.load(f) retriever = retriever.as_retriever(search_kwargs={"k": 3}) return retriever def create_qa_model(self, retriever): llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.1}) qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True) return qa def run_chatbot(self): st.title('Chatbot Trained on Indian Exam Articles') st.header("Hi!! How Can I Help You ?") query = st.text_input('> ') result = self.qa({'query': query}) st.write(result['result']) st.button('Not Satisfied! Talk to our Expert Here..') def run_google_search(self, query): search = GoogleSerperAPIWrapper() return search.run(query) if __name__ == "__main__": chatbot = Chatbot() data = chatbot.load_data() docs = chatbot.split_documents(data) retriever = chatbot.create_embeddings(docs) retrievers = chatbot.load_embeddings() qa = chatbot.create_qa_model(retrievers) st.title('Chatbot Trained on Indian Exam Articles') st.header("Hi!! How Can I Help You ?") query = st.text_input('ENTER TEXT HERE ') result = qa({'query': query}) st.write(result['result']) if st.button('Not Satisfied! Talk to our Expert Here..'): st.write(run_google_search(query))