File size: 3,269 Bytes
1b25c4c
 
 
 
 
 
 
 
 
 
 
 
b66e441
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0eb4d48
b66e441
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os
import pickle
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import CharacterTextSplitter
from InstructorEmbedding import INSTRUCTOR
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.chains import RetrievalQA
from langchain import HuggingFaceHub
import streamlit as st
from langchain.utilities import GoogleSerperAPIWrapper

class Chatbot:
    def __init__(self):
        os.environ["Hugging_Face_API_KEY"] = "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"
        os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
        os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee"
        
    def load_data(self):
        urls = [
            'https://zollege.in/exams/bitsat',
            'https://zollege.in/exams/cat',
            'https://zollege.in/exams/gate',
            'https://zollege.in/exams/neet',
            'https://zollege.in/exams/lsat',
            'https://zollege.in/exams/jee-advanced',
            'https://zollege.in/exams/aipmcet'
        ]

        loaders = UnstructuredURLLoader(urls=urls)
        data = loaders.load()

        return data

    def split_documents(self, data):
        text_splitter = CharacterTextSplitter(separator='\n', chunk_size=500, chunk_overlap=20)
        docs = text_splitter.split_documents(data)

        return docs

    def create_embeddings(self, docs):
        instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained")
        db_instructEmbedd = FAISS.from_documents(docs, instructor_embeddings)
        retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3})

        with open("db_instructEmbedd.pkl", "wb") as f:
            pickle.dump(db_instructEmbedd, f)

        return retriever

    def load_embeddings(self):
        with open("db_instructEmbedd.pkl", "rb") as f:
            retriever = pickle.load(f)
        
        retriever = retriever.as_retriever(search_kwargs={"k": 3})
        return retriever

    def create_qa_model(self, retriever):
        llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.1})
        qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
        return qa

    def run_chatbot(self):
        st.title('Chatbot Trained on Indian Exam Articles')
        st.header("Hi!! How Can I Help You ?")

        query = st.text_input('> ')
        result = self.qa({'query': query})
        st.write(result['result'])
        st.button('Not Satisfied! Talk to our Expert Here..')

    def run_google_search(self, query):
        search = GoogleSerperAPIWrapper()
        search.run(query)
        
if __name__ == "__main__":
    chatbot = Chatbot()
    data = chatbot.load_data()
    docs = chatbot.split_documents(data)
    retriever = chatbot.create_embeddings(docs)
    qa = chatbot.create_qa_model(retriever)

    st.title('Chatbot Trained on Indian Exam Articles')
    st.header("Hi!! How Can I Help You ?")

    query = st.text_input('> ')
    result = qa({'query': query})
    st.write(result['result'])
    st.button('Not Satisfied! Talk to our Expert Here..')