chatbot_trial_2 / app.py
edjdhug3's picture
app.py
a38b3b9
raw
history blame
2.36 kB
import os
import pickle
os.environ["Hugging_Face_API_KEY"] = "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"
urls = [
'https://zollege.in/exams/bitsat',
'https://zollege.in/exams/cat',
'https://zollege.in/exams/gate',
'https://zollege.in/exams/neet'
'https://zollege.in/exams/lsat',
'https://zollege.in/exams/jee-advanced',
'https://zollege.in/exams/aipmcet']
from langchain.document_loaders import UnstructuredURLLoader
loaders = UnstructuredURLLoader(urls=urls)
data = loaders.load()
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(separator='\n',
chunk_size=500,
chunk_overlap=20)
docs = text_splitter.split_documents(data)
from InstructorEmbedding import INSTRUCTOR
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceInstructEmbeddings
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained"
) # nice working with model = sentence-transformers/all-MiniLM-L6-v2
db_instructEmbedd = FAISS.from_documents(docs, instructor_embeddings)
retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3})
with open("db_instructEmbedd.pkl", "wb") as f:
pickle.dump(db_instructEmbedd, f)
with open("db_instructEmbedd.pkl", "rb") as f:
retriever = pickle.load(f)
retriever = retriever.as_retriever(search_kwargs={"k": 3})
os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
from langchain.chains import RetrievalQA
from langchain import HuggingFaceHub
llm = llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.1}) #repo_id = gpt2 works goodd
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
import streamlit as st
st.title('Chatbot Trained on Indian Exam Articles')
st.header("Hi!! How Can I Help You ?")
query = st.chat_input('> ')
result = qa({'query': query})
st.write(result['result'])
st.button('Not Satisfied! Talk to our Expert Here..')
import pprint
os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee"
from langchain.utilities import GoogleSerperAPIWrapper
search = GoogleSerperAPIWrapper()
search.run(result['query'])
if __name__ == "__main__":
main()