Spaces:
Sleeping
Sleeping
import os | |
import pickle | |
os.environ["Hugging_Face_API_KEY"] = "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" | |
urls = [ | |
'https://zollege.in/exams/bitsat', | |
'https://zollege.in/exams/cat', | |
'https://zollege.in/exams/gate', | |
'https://zollege.in/exams/neet' | |
'https://zollege.in/exams/lsat', | |
'https://zollege.in/exams/jee-advanced', | |
'https://zollege.in/exams/aipmcet'] | |
from langchain.document_loaders import UnstructuredURLLoader | |
loaders = UnstructuredURLLoader(urls=urls) | |
data = loaders.load() | |
from langchain.text_splitter import CharacterTextSplitter | |
text_splitter = CharacterTextSplitter(separator='\n', | |
chunk_size=500, | |
chunk_overlap=20) | |
docs = text_splitter.split_documents(data) | |
from InstructorEmbedding import INSTRUCTOR | |
from langchain.vectorstores import FAISS | |
from langchain.embeddings import HuggingFaceInstructEmbeddings | |
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained" | |
) # nice working with model = sentence-transformers/all-MiniLM-L6-v2 | |
db_instructEmbedd = FAISS.from_documents(docs, instructor_embeddings) | |
retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3}) | |
with open("db_instructEmbedd.pkl", "wb") as f: | |
pickle.dump(db_instructEmbedd, f) | |
with open("db_instructEmbedd.pkl", "rb") as f: | |
retriever = pickle.load(f) | |
retriever = retriever.as_retriever(search_kwargs={"k": 3}) | |
os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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' | |
from langchain.chains import RetrievalQA | |
from langchain import HuggingFaceHub | |
llm = llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.1}) #repo_id = gpt2 works goodd | |
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True) | |
import streamlit as st | |
st.title('Chatbot Trained on Indian Exam Articles') | |
st.header("Hi!! How Can I Help You ?") | |
query = st.chat_input('> ') | |
result = qa({'query': query}) | |
st.write(result['result']) | |
st.button('Not Satisfied! Talk to our Expert Here..') | |
import pprint | |
os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee" | |
from langchain.utilities import GoogleSerperAPIWrapper | |
search = GoogleSerperAPIWrapper() | |
search.run(result['query']) | |
if __name__ == "__main__": | |
main() |