|
import os |
|
import pickle |
|
from langchain.document_loaders import UnstructuredURLLoader |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from InstructorEmbedding import INSTRUCTOR |
|
from langchain.vectorstores import FAISS |
|
from langchain.embeddings import HuggingFaceInstructEmbeddings |
|
from langchain.chains import RetrievalQA |
|
from langchain import HuggingFaceHub |
|
from langchain.utilities import GoogleSerperAPIWrapper |
|
import gradio as gr |
|
|
|
class Chatbot: |
|
def __init__(self): |
|
os.environ["Hugging_Face_API_KEY"] = "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" |
|
os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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' |
|
os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee" |
|
|
|
self.load_data() |
|
self.load_embeddings() |
|
self.create_qa_model() |
|
|
|
def load_data(self): |
|
urls = ['https://www.iimrohtak.ac.in/job.php', |
|
'https://www.iimrohtak.ac.in/mandatory-disclosure.php', |
|
'https://www.iimrohtak.ac.in/placement.php', |
|
'https://www.iimrohtak.ac.in/tender.php', |
|
'https://www.iimrohtak.ac.in/rti.php', |
|
'https://www.iimrohtak.ac.in/library.php', |
|
'https://www.iimrohtak.ac.in/index.php', |
|
'https://www.iimrohtak.ac.in/index.php', |
|
'https://www.iimrohtak.ac.in/about.php', |
|
'https://www.iimrohtak.ac.in/vision.php', |
|
'https://www.iimrohtak.ac.in/mission.php', |
|
'https://www.iimrohtak.ac.in/board.php', |
|
'https://www.iimrohtak.ac.in/director-message.php', |
|
'https://www.iimrohtak.ac.in/accreditation.php', |
|
'https://www.iimrohtak.ac.in/logo.php', |
|
'https://www.iimrohtak.ac.in/nirf.php', |
|
'https://www.iimrohtak.ac.in/iim-rohtak-values.php', |
|
'https://www.iimrohtak.ac.in/council-for-strategic-affairs.php', |
|
'https://www.iimrohtak.ac.in/mandatory-disclosure.php', |
|
'https://www.iimrohtak.ac.in/ranking.php', |
|
'https://www.iimrohtak.ac.in/annual-report.php', |
|
'https://www.iimrohtak.ac.in/event.php', |
|
'https://www.iimrohtak.ac.in/event-gallery.php', |
|
'https://www.iimrohtak.ac.in/job.php', |
|
'https://www.iimrohtak.ac.in/initiative.php', |
|
'https://www.iimrohtak.ac.in/iimr-in-media.php', |
|
'https://www.iimrohtak.ac.in/rti.php', |
|
'https://www.iimrohtak.ac.in/internal-complaint-commitee.php', |
|
'https://www.iimrohtak.ac.in', |
|
'https://www.iimrohtak.ac.in/pgp.php', |
|
'https://www.iimrohtak.ac.in/pgp-first-year.php', |
|
'https://www.iimrohtak.ac.in/pgp-second-year.php', |
|
'https://www.iimrohtak.ac.in/objective-methodology.php', |
|
'https://www.iimrohtak.ac.in/programme-fee.php', |
|
'https://www.iimrohtak.ac.in/academic-calendar.php', |
|
'https://www.iimrohtak.ac.in/pgp-contact.php', |
|
'https://www.iimrohtak.ac.in/placement.php', |
|
'https://www.iimrohtak.ac.in/panel/assets/images/prospectus/16891449485032.pdf', |
|
'https://www.iimrohtak.ac.in/dpm.php', |
|
'https://www.iimrohtak.ac.in/dpm.php', |
|
'https://www.iimrohtak.ac.in/dpm-admission.php', |
|
'https://www.iimrohtak.ac.in/areas-of-specialisation.php', |
|
'https://www.iimrohtak.ac.in/financial-assistance.php', |
|
'https://www.iimrohtak.ac.in/panel/assets/images/prospectus/16903487969776.pdf', |
|
'https://www.iimrohtak.ac.in/faqs-for-dpm.php', |
|
'https://www.iimrohtak.ac.in/dpm-student.php', |
|
'https://www.iimrohtak.ac.in/publication.php', |
|
'https://www.iimrohtak.ac.in/dpm-contact.php', |
|
'https://www.iimrohtak.ac.in/ipm.php', |
|
'https://www.iimrohtak.ac.in/ipm.php', |
|
'https://www.iimrohtak.ac.in/faculty-speaks.php', |
|
'https://www.iimrohtak.ac.in/ipm-curriculum.php', |
|
'https://www.iimrohtak.ac.in/panel/assets/images/prospectus/16881248909525.pdf', |
|
'https://www.iimrohtak.ac.in/ipm-admission.php', |
|
'https://www.iimrohtak.ac.in/ipm-contact.php', |
|
'https://www.iimrohtak.ac.in/ipm-important-date.php', |
|
'https://www.iimrohtak.ac.in/ipl.php', |
|
'https://www.iimrohtak.ac.in/ipl-admission.php', |
|
'https://www.iimrohtak.ac.in/panel/assets/images/programme-fee/16881248477847.pdf', |
|
'https://www.iimrohtak.ac.in/ipl-faculty.php', |
|
'https://www.iimrohtak.ac.in/ipl-contactus.php', |
|
'https://www.iimrohtak.ac.in/ipl-advisory.php', |
|
'https://www.iimrohtak.ac.in/ipl-curriculum.php', |
|
'https://www.iimrohtak.ac.in/panel/assets/images/prospectus/16881265522765.pdf', |
|
'https://www.iimrohtak.ac.in/fpm-about.php', |
|
'https://www.iimrohtak.ac.in/panel/assets/images/prospectus/16881252023208.pdf', |
|
'https://www.iimrohtak.ac.in/fpm-about.php', |
|
'https://www.iimrohtak.ac.in/fpm-admission.php', |
|
'https://www.iimrohtak.ac.in/fpm-course-work.php', |
|
'https://www.iimrohtak.ac.in/panel/assets/images/lor/16884755042121.pdf', |
|
'https://www.iimrohtak.ac.in/fpm-contact.php', |
|
'https://www.iimrohtak.ac.in/enquiry.php', |
|
'https://www.iimrohtak.ac.in/epgdsm-about.php', |
|
'https://www.iimrohtak.ac.in/epgdsm-about.php', |
|
'https://www.iimrohtak.ac.in/epgdsm-director-message.php', |
|
'https://www.iimrohtak.ac.in/epgdsm-advisory.php', |
|
'https://www.iimrohtak.ac.in/epgdsm-program-curriculum.php', |
|
'https://www.iimrohtak.ac.in/epgdsm-faculty.php', |
|
'https://www.iimrohtak.ac.in/epgdsm-admission.php', |
|
'https://www.iimrohtak.ac.in/epgdsm-contact.php', |
|
'https://www.iimrohtak.ac.in/enquiry.php', |
|
'https://www.iimrohtak.ac.in/epgdsm-blog.php', |
|
'https://www.iimrohtak.ac.in/one-year-post-graduate-programme-in-management-for-executives.php', |
|
'https://www.iimrohtak.ac.in/executive-training.php', |
|
'https://www.iimrohtak.ac.in/faculty-by-area.php', |
|
'https://www.iimrohtak.ac.in/director.php', |
|
'https://www.iimrohtak.ac.in/dean.php', |
|
'https://www.iimrohtak.ac.in/dean-ree.php', |
|
'https://www.iimrohtak.ac.in/faculty-by-name.php', |
|
'https://www.iimrohtak.ac.in/faculty-by-area.php', |
|
'https://www.iimrohtak.ac.in/visiting-faculty.php', |
|
'https://www.iimrohtak.ac.in/adjunct-faculty.php', |
|
'https://www.iimrohtak.ac.in/publication.php', |
|
'https://www.iimrohtak.ac.in/publication.php', |
|
'https://www.iimrohtak.ac.in/papers-presented-in-conferences.php', |
|
'https://www.iimrohtak.ac.in/broad-research-areas.php', |
|
'https://www.iimrohtak.ac.in/cases-other-publication.php', |
|
'https://www.iimrohtak.ac.in/emerging-economies-cases-journal.php', |
|
'https://www.iimrohtak.ac.in/newspaper-articles-interviews.php', |
|
'https://www.iimrohtak.ac.in/assets/images/Research Policy.pdf', |
|
'https://www.iimrohtak.ac.in/partnership.php', |
|
'https://www.iimrohtak.ac.in/partnership.php', |
|
'https://www.iimrohtak.ac.in/institutional-relations-committee.php', |
|
'https://www.iimrohtak.ac.in/international-partnerships.php', |
|
'https://www.iimrohtak.ac.in/student-exchange.php', |
|
'https://www.iimrohtak.ac.in/collaborations-contact.php', |
|
'https://www.iimrohtak.ac.in/membership.php', |
|
'https://www.iimrohtak.ac.in/contact.php', |
|
'https://www.iimrohtak.ac.in/about.php', |
|
'https://www.iimrohtak.ac.in/faculty-by-area.php', |
|
'https://www.iimrohtak.ac.in/faculty-by-name.php', |
|
'https://www.iimrohtak.ac.in/iimr-in-media.php', |
|
'https://www.iimrohtak.ac.in/publication.php', |
|
'https://www.iimrohtak.ac.in/library.php', |
|
'https://www.iimrohtak.ac.in/placement.php', |
|
'https://www.iimrohtak.ac.in/index.php', |
|
'https://www.iimrohtak.ac.in/job.php', |
|
'https://www.iimrohtak.ac.in/tender.php', |
|
'https://www.iimrohtak.ac.in/library.php', |
|
'https://www.iimrohtak.ac.in/event.php', |
|
'https://www.iimrohtak.ac.in/iimr-in-media.php', |
|
'https://www.iimrohtak.ac.in/it-resource-centre.php', |
|
'https://www.iimrohtak.ac.in/placement.php', |
|
'https://www.iimrohtak.ac.in/rti.php', |
|
'https://www.iimrohtak.ac.in/contact.php', |
|
'https://www.iimrohtak.ac.in/privacy-policy.php', |
|
'https://www.iimrohtak.ac.in/panel/assets/images/admission-policy/admissionpolicy2023-25.pdf', |
|
'https://www.iimrohtak.ac.in/annual-report.php', |
|
'https://www.iimrohtak.ac.in/students-clubs-and-committees.php', |
|
'https://www.iimrohtak.ac.in/disclaimer.php', |
|
'https://www.iimrohtak.ac.in/event-gallery.php', |
|
'https://www.iimrohtak.ac.in/enquiry.php'] |
|
|
|
loaders = UnstructuredURLLoader(urls=urls) |
|
self.data = loaders.load() |
|
|
|
def split_documents(self): |
|
text_splitter = CharacterTextSplitter(separator='\n', chunk_size=500, chunk_overlap=20) |
|
self.docs = text_splitter.split_documents(self.data) |
|
|
|
def create_embeddings(self): |
|
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained") |
|
db_instructEmbedd = FAISS.from_documents(self.docs, instructor_embeddings) |
|
self.retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3}) |
|
|
|
with open("db_instructEmbedd.pkl", "wb") as f: |
|
pickle.dump(db_instructEmbedd, f) |
|
|
|
def load_embeddings(self): |
|
with open("db_instructEmbedd.pkl", "rb") as f: |
|
self.retriever = pickle.load(f) |
|
|
|
self.retriever = self.retriever.as_retriever(search_kwargs={"k": 3}) |
|
|
|
def create_qa_model(self): |
|
llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.1}) |
|
self.qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=self.retriever, return_source_documents=True) |
|
|
|
def search_google(self, query): |
|
search = GoogleSerperAPIWrapper() |
|
search.run(query) |
|
|
|
def chat(self, query): |
|
self.search_google(query) |
|
result = self.qa({'query': query}) |
|
return result['result'] |
|
|
|
chatbot = Chatbot() |
|
|
|
def chatbot_response(query): |
|
response = chatbot.chat(query) |
|
return response |
|
|
|
iface = gr.Interface(fn=chatbot_response, inputs="text", outputs="text", title="Chatbot Trained on Indian Exam Articles") |
|
iface.launch() |
|
|