chatbot_3 / app.py
edjdhug3's picture
Update app.py
c0bfd55
raw
history blame
9.33 kB
import os
import pickle
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import CharacterTextSplitter
from InstructorEmbedding import INSTRUCTOR
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.chains import RetrievalQA
from langchain import HuggingFaceHub
from langchain.utilities import GoogleSerperAPIWrapper
import gradio as gr
class Chatbot:
def __init__(self):
os.environ["Hugging_Face_API_KEY"] = "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"
os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee"
self.load_data()
self.load_embeddings()
self.create_qa_model()
def load_data(self):
urls = ['https://www.iimrohtak.ac.in/job.php',
'https://www.iimrohtak.ac.in/mandatory-disclosure.php',
'https://www.iimrohtak.ac.in/placement.php',
'https://www.iimrohtak.ac.in/tender.php',
'https://www.iimrohtak.ac.in/rti.php',
'https://www.iimrohtak.ac.in/library.php',
'https://www.iimrohtak.ac.in/index.php',
'https://www.iimrohtak.ac.in/index.php',
'https://www.iimrohtak.ac.in/about.php',
'https://www.iimrohtak.ac.in/vision.php',
'https://www.iimrohtak.ac.in/mission.php',
'https://www.iimrohtak.ac.in/board.php',
'https://www.iimrohtak.ac.in/director-message.php',
'https://www.iimrohtak.ac.in/accreditation.php',
'https://www.iimrohtak.ac.in/logo.php',
'https://www.iimrohtak.ac.in/nirf.php',
'https://www.iimrohtak.ac.in/iim-rohtak-values.php',
'https://www.iimrohtak.ac.in/council-for-strategic-affairs.php',
'https://www.iimrohtak.ac.in/mandatory-disclosure.php',
'https://www.iimrohtak.ac.in/ranking.php',
'https://www.iimrohtak.ac.in/annual-report.php',
'https://www.iimrohtak.ac.in/event.php',
'https://www.iimrohtak.ac.in/event-gallery.php',
'https://www.iimrohtak.ac.in/job.php',
'https://www.iimrohtak.ac.in/initiative.php',
'https://www.iimrohtak.ac.in/iimr-in-media.php',
'https://www.iimrohtak.ac.in/rti.php',
'https://www.iimrohtak.ac.in/internal-complaint-commitee.php',
'https://www.iimrohtak.ac.in',
'https://www.iimrohtak.ac.in/pgp.php',
'https://www.iimrohtak.ac.in/pgp-first-year.php',
'https://www.iimrohtak.ac.in/pgp-second-year.php',
'https://www.iimrohtak.ac.in/objective-methodology.php',
'https://www.iimrohtak.ac.in/programme-fee.php',
'https://www.iimrohtak.ac.in/academic-calendar.php',
'https://www.iimrohtak.ac.in/pgp-contact.php',
'https://www.iimrohtak.ac.in/placement.php',
'https://www.iimrohtak.ac.in/panel/assets/images/prospectus/16891449485032.pdf',
'https://www.iimrohtak.ac.in/dpm.php',
'https://www.iimrohtak.ac.in/dpm.php',
'https://www.iimrohtak.ac.in/dpm-admission.php',
'https://www.iimrohtak.ac.in/areas-of-specialisation.php',
'https://www.iimrohtak.ac.in/financial-assistance.php',
'https://www.iimrohtak.ac.in/panel/assets/images/prospectus/16903487969776.pdf',
'https://www.iimrohtak.ac.in/faqs-for-dpm.php',
'https://www.iimrohtak.ac.in/dpm-student.php',
'https://www.iimrohtak.ac.in/publication.php',
'https://www.iimrohtak.ac.in/dpm-contact.php',
'https://www.iimrohtak.ac.in/ipm.php',
'https://www.iimrohtak.ac.in/ipm.php',
'https://www.iimrohtak.ac.in/faculty-speaks.php',
'https://www.iimrohtak.ac.in/ipm-curriculum.php',
'https://www.iimrohtak.ac.in/panel/assets/images/prospectus/16881248909525.pdf',
'https://www.iimrohtak.ac.in/ipm-admission.php',
'https://www.iimrohtak.ac.in/ipm-contact.php',
'https://www.iimrohtak.ac.in/ipm-important-date.php',
'https://www.iimrohtak.ac.in/ipl.php',
'https://www.iimrohtak.ac.in/ipl-admission.php',
'https://www.iimrohtak.ac.in/panel/assets/images/programme-fee/16881248477847.pdf',
'https://www.iimrohtak.ac.in/ipl-faculty.php',
'https://www.iimrohtak.ac.in/ipl-contactus.php',
'https://www.iimrohtak.ac.in/ipl-advisory.php',
'https://www.iimrohtak.ac.in/ipl-curriculum.php',
'https://www.iimrohtak.ac.in/panel/assets/images/prospectus/16881265522765.pdf',
'https://www.iimrohtak.ac.in/fpm-about.php',
'https://www.iimrohtak.ac.in/panel/assets/images/prospectus/16881252023208.pdf',
'https://www.iimrohtak.ac.in/fpm-about.php',
'https://www.iimrohtak.ac.in/fpm-admission.php',
'https://www.iimrohtak.ac.in/fpm-course-work.php',
'https://www.iimrohtak.ac.in/panel/assets/images/lor/16884755042121.pdf',
'https://www.iimrohtak.ac.in/fpm-contact.php',
'https://www.iimrohtak.ac.in/enquiry.php',
'https://www.iimrohtak.ac.in/epgdsm-about.php',
'https://www.iimrohtak.ac.in/epgdsm-about.php',
'https://www.iimrohtak.ac.in/epgdsm-director-message.php',
'https://www.iimrohtak.ac.in/epgdsm-advisory.php',
'https://www.iimrohtak.ac.in/epgdsm-program-curriculum.php',
'https://www.iimrohtak.ac.in/epgdsm-faculty.php',
'https://www.iimrohtak.ac.in/epgdsm-admission.php',
'https://www.iimrohtak.ac.in/epgdsm-contact.php',
'https://www.iimrohtak.ac.in/enquiry.php',
'https://www.iimrohtak.ac.in/epgdsm-blog.php',
'https://www.iimrohtak.ac.in/one-year-post-graduate-programme-in-management-for-executives.php',
'https://www.iimrohtak.ac.in/executive-training.php',
'https://www.iimrohtak.ac.in/faculty-by-area.php',
'https://www.iimrohtak.ac.in/director.php',
'https://www.iimrohtak.ac.in/dean.php',
'https://www.iimrohtak.ac.in/dean-ree.php',
'https://www.iimrohtak.ac.in/faculty-by-name.php',
'https://www.iimrohtak.ac.in/faculty-by-area.php',
'https://www.iimrohtak.ac.in/visiting-faculty.php',
'https://www.iimrohtak.ac.in/adjunct-faculty.php',
'https://www.iimrohtak.ac.in/publication.php',
'https://www.iimrohtak.ac.in/publication.php',
'https://www.iimrohtak.ac.in/papers-presented-in-conferences.php',
'https://www.iimrohtak.ac.in/broad-research-areas.php',
'https://www.iimrohtak.ac.in/cases-other-publication.php',
'https://www.iimrohtak.ac.in/emerging-economies-cases-journal.php',
'https://www.iimrohtak.ac.in/newspaper-articles-interviews.php',
'https://www.iimrohtak.ac.in/assets/images/Research Policy.pdf',
'https://www.iimrohtak.ac.in/partnership.php',
'https://www.iimrohtak.ac.in/partnership.php',
'https://www.iimrohtak.ac.in/institutional-relations-committee.php',
'https://www.iimrohtak.ac.in/international-partnerships.php',
'https://www.iimrohtak.ac.in/student-exchange.php',
'https://www.iimrohtak.ac.in/collaborations-contact.php',
'https://www.iimrohtak.ac.in/membership.php',
'https://www.iimrohtak.ac.in/contact.php',
'https://www.iimrohtak.ac.in/about.php',
'https://www.iimrohtak.ac.in/faculty-by-area.php',
'https://www.iimrohtak.ac.in/faculty-by-name.php',
'https://www.iimrohtak.ac.in/iimr-in-media.php',
'https://www.iimrohtak.ac.in/publication.php',
'https://www.iimrohtak.ac.in/library.php',
'https://www.iimrohtak.ac.in/placement.php',
'https://www.iimrohtak.ac.in/index.php',
'https://www.iimrohtak.ac.in/job.php',
'https://www.iimrohtak.ac.in/tender.php',
'https://www.iimrohtak.ac.in/library.php',
'https://www.iimrohtak.ac.in/event.php',
'https://www.iimrohtak.ac.in/iimr-in-media.php',
'https://www.iimrohtak.ac.in/it-resource-centre.php',
'https://www.iimrohtak.ac.in/placement.php',
'https://www.iimrohtak.ac.in/rti.php',
'https://www.iimrohtak.ac.in/contact.php',
'https://www.iimrohtak.ac.in/privacy-policy.php',
'https://www.iimrohtak.ac.in/panel/assets/images/admission-policy/admissionpolicy2023-25.pdf',
'https://www.iimrohtak.ac.in/annual-report.php',
'https://www.iimrohtak.ac.in/students-clubs-and-committees.php',
'https://www.iimrohtak.ac.in/disclaimer.php',
'https://www.iimrohtak.ac.in/event-gallery.php',
'https://www.iimrohtak.ac.in/enquiry.php']
loaders = UnstructuredURLLoader(urls=urls)
self.data = loaders.load()
def split_documents(self):
text_splitter = CharacterTextSplitter(separator='\n', chunk_size=500, chunk_overlap=20)
self.docs = text_splitter.split_documents(self.data)
def create_embeddings(self):
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained")
db_instructEmbedd = FAISS.from_documents(self.docs, instructor_embeddings)
self.retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3})
with open("db_instructEmbedd.pkl", "wb") as f:
pickle.dump(db_instructEmbedd, f)
def load_embeddings(self):
with open("db_instructEmbedd.pkl", "rb") as f:
self.retriever = pickle.load(f)
self.retriever = self.retriever.as_retriever(search_kwargs={"k": 3})
def create_qa_model(self):
llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.1})
self.qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=self.retriever, return_source_documents=True)
def search_google(self, query):
search = GoogleSerperAPIWrapper()
search.run(query)
def chat(self, query):
self.search_google(query)
result = self.qa({'query': query})
return result['result']
chatbot = Chatbot()
def chatbot_response(query):
response = chatbot.chat(query)
return response
iface = gr.Interface(fn=chatbot_response, inputs="text", outputs="text", title="Chatbot Trained on Indian Exam Articles")
iface.launch()