File size: 2,363 Bytes
1b25c4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import pickle
os.environ["Hugging_Face_API_KEY"] = "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"

urls = [
 'https://zollege.in/exams/bitsat',
 'https://zollege.in/exams/cat',
 'https://zollege.in/exams/gate',
 'https://zollege.in/exams/neet'
 'https://zollege.in/exams/lsat',
 'https://zollege.in/exams/jee-advanced',
 'https://zollege.in/exams/aipmcet']

from langchain.document_loaders import UnstructuredURLLoader
loaders = UnstructuredURLLoader(urls=urls)
data = loaders.load()

from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(separator='\n',
                                      chunk_size=500,
                                      chunk_overlap=20)


docs = text_splitter.split_documents(data)


from InstructorEmbedding import INSTRUCTOR
from langchain.vectorstores import FAISS

from langchain.embeddings import HuggingFaceInstructEmbeddings

instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained"
                        )  # nice working with model = sentence-transformers/all-MiniLM-L6-v2

db_instructEmbedd = FAISS.from_documents(docs, instructor_embeddings)
retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3})

with open("db_instructEmbedd.pkl", "wb") as f:
    pickle.dump(db_instructEmbedd, f)

with open("db_instructEmbedd.pkl", "rb") as f:
    retriever = pickle.load(f)

retriever = retriever.as_retriever(search_kwargs={"k": 3})


os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'

from langchain.chains import RetrievalQA
from langchain import HuggingFaceHub


llm = llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.1}) #repo_id = gpt2 works goodd

qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)


import streamlit as st

st.title('Chatbot Trained on Indian Exam Articles')
st.header("Hi!! How Can I Help You ?")

query = st.chat_input('> ')
result = qa({'query': query})
st.write(result['result'])
st.button('Not Satisfied! Talk to our Expert Here..')

import pprint

os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee"

from langchain.utilities import GoogleSerperAPIWrapper

search = GoogleSerperAPIWrapper()

search.run(result['query'])


if __name__ == "__main__":
    main()