import os import pickle from langchain.document_loaders import UnstructuredURLLoader from langchain.text_splitter import CharacterTextSplitter from InstructorEmbedding import INSTRUCTOR from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceInstructEmbeddings from langchain.chains import RetrievalQA from langchain import HuggingFaceHub from langchain.utilities import GoogleSerperAPIWrapper import gradio as gr import pandas as pd df = pd.read_csv('linkss.csv') url = [] for i in df.itertuples(): url.append(f"{i[1]}") class Chatbot: def __init__(self): os.environ["Hugging_Face_API_KEY"] = "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" os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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' os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee" self.load_data() self.load_embeddings() self.create_qa_model() def load_data(self): urls = url loaders = UnstructuredURLLoader(urls=urls) self.data = loaders.load() def split_documents(self): text_splitter = CharacterTextSplitter(separator='\n', chunk_size=500, chunk_overlap=20) self.docs = text_splitter.split_documents(self.data) def create_embeddings(self): instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained") db_instructEmbedd = FAISS.from_documents(self.docs, instructor_embeddings) self.retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3}) with open("db_instructEmbedd.pkl", "wb") as f: pickle.dump(db_instructEmbedd, f) def load_embeddings(self): with open("db_instructEmbedd.pkl", "rb") as f: self.retriever = pickle.load(f) self.retriever = self.retriever.as_retriever(search_kwargs={"k": 3}) def create_qa_model(self): llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.1}) self.qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=self.retriever, return_source_documents=True) def chat(self, query): result = self.qa({'query': query}) return result['result'] chatbot = Chatbot() def chatbot_response(query): response = chatbot.chat(query) return response iface = gr.Interface(fn=chatbot_response, inputs="text", outputs="text", title="Chatbot Trained on Indian Exam Articles") iface.launch()