Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,81 +1,89 @@
|
|
1 |
import os
|
2 |
import pickle
|
3 |
-
os.environ["Hugging_Face_API_KEY"] = "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"
|
4 |
-
|
5 |
-
urls = [
|
6 |
-
'https://zollege.in/exams/bitsat',
|
7 |
-
'https://zollege.in/exams/cat',
|
8 |
-
'https://zollege.in/exams/gate',
|
9 |
-
'https://zollege.in/exams/neet'
|
10 |
-
'https://zollege.in/exams/lsat',
|
11 |
-
'https://zollege.in/exams/jee-advanced',
|
12 |
-
'https://zollege.in/exams/aipmcet']
|
13 |
-
|
14 |
from langchain.document_loaders import UnstructuredURLLoader
|
15 |
-
loaders = UnstructuredURLLoader(urls=urls)
|
16 |
-
data = loaders.load()
|
17 |
-
|
18 |
from langchain.text_splitter import CharacterTextSplitter
|
19 |
-
|
20 |
-
text_splitter = CharacterTextSplitter(separator='\n',
|
21 |
-
chunk_size=500,
|
22 |
-
chunk_overlap=20)
|
23 |
-
|
24 |
-
|
25 |
-
docs = text_splitter.split_documents(data)
|
26 |
-
|
27 |
-
|
28 |
from InstructorEmbedding import INSTRUCTOR
|
29 |
from langchain.vectorstores import FAISS
|
30 |
-
|
31 |
from langchain.embeddings import HuggingFaceInstructEmbeddings
|
32 |
-
|
33 |
-
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained"
|
34 |
-
) # nice working with model = sentence-transformers/all-MiniLM-L6-v2
|
35 |
-
|
36 |
-
db_instructEmbedd = FAISS.from_documents(docs, instructor_embeddings)
|
37 |
-
retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3})
|
38 |
-
|
39 |
-
with open("db_instructEmbedd.pkl", "wb") as f:
|
40 |
-
pickle.dump(db_instructEmbedd, f)
|
41 |
-
|
42 |
-
with open("db_instructEmbedd.pkl", "rb") as f:
|
43 |
-
retriever = pickle.load(f)
|
44 |
-
|
45 |
-
retriever = retriever.as_retriever(search_kwargs={"k": 3})
|
46 |
-
|
47 |
-
|
48 |
-
os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
|
49 |
-
|
50 |
from langchain.chains import RetrievalQA
|
51 |
from langchain import HuggingFaceHub
|
52 |
-
|
53 |
-
|
54 |
-
llm = llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.1}) #repo_id = gpt2 works goodd
|
55 |
-
|
56 |
-
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
|
57 |
-
|
58 |
-
|
59 |
import streamlit as st
|
60 |
-
|
61 |
-
st.title('Chatbot Trained on Indian Exam Articles')
|
62 |
-
st.header("Hi!! How Can I Help You ?")
|
63 |
-
|
64 |
-
query = st.chat_input('> ')
|
65 |
-
result = qa({'query': query})
|
66 |
-
st.write(result['result'])
|
67 |
-
st.button('Not Satisfied! Talk to our Expert Here..')
|
68 |
-
|
69 |
-
import pprint
|
70 |
-
|
71 |
-
os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee"
|
72 |
-
|
73 |
from langchain.utilities import GoogleSerperAPIWrapper
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
if __name__ == "__main__":
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import pickle
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from langchain.document_loaders import UnstructuredURLLoader
|
|
|
|
|
|
|
4 |
from langchain.text_splitter import CharacterTextSplitter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
from InstructorEmbedding import INSTRUCTOR
|
6 |
from langchain.vectorstores import FAISS
|
|
|
7 |
from langchain.embeddings import HuggingFaceInstructEmbeddings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
from langchain.chains import RetrievalQA
|
9 |
from langchain import HuggingFaceHub
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
from langchain.utilities import GoogleSerperAPIWrapper
|
12 |
|
13 |
+
class Chatbot:
|
14 |
+
def __init__(self):
|
15 |
+
os.environ["Hugging_Face_API_KEY"] = "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"
|
16 |
+
os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
|
17 |
+
os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee"
|
18 |
+
|
19 |
+
def load_data(self):
|
20 |
+
urls = [
|
21 |
+
'https://zollege.in/exams/bitsat',
|
22 |
+
'https://zollege.in/exams/cat',
|
23 |
+
'https://zollege.in/exams/gate',
|
24 |
+
'https://zollege.in/exams/neet',
|
25 |
+
'https://zollege.in/exams/lsat',
|
26 |
+
'https://zollege.in/exams/jee-advanced',
|
27 |
+
'https://zollege.in/exams/aipmcet'
|
28 |
+
]
|
29 |
+
|
30 |
+
loaders = UnstructuredURLLoader(urls=urls)
|
31 |
+
data = loaders.load()
|
32 |
+
|
33 |
+
return data
|
34 |
+
|
35 |
+
def split_documents(self, data):
|
36 |
+
text_splitter = CharacterTextSplitter(separator='\n', chunk_size=500, chunk_overlap=20)
|
37 |
+
docs = text_splitter.split_documents(data)
|
38 |
+
|
39 |
+
return docs
|
40 |
+
|
41 |
+
def create_embeddings(self, docs):
|
42 |
+
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained")
|
43 |
+
db_instructEmbedd = FAISS.from_documents(docs, instructor_embeddings)
|
44 |
+
retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3})
|
45 |
+
|
46 |
+
with open("db_instructEmbedd.pkl", "wb") as f:
|
47 |
+
pickle.dump(db_instructEmbedd, f)
|
48 |
+
|
49 |
+
return retriever
|
50 |
+
|
51 |
+
def load_embeddings(self):
|
52 |
+
with open("db_instructEmbedd.pkl", "rb") as f:
|
53 |
+
retriever = pickle.load(f)
|
54 |
+
|
55 |
+
retriever = retriever.as_retriever(search_kwargs={"k": 3})
|
56 |
+
return retriever
|
57 |
+
|
58 |
+
def create_qa_model(self, retriever):
|
59 |
+
llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.1})
|
60 |
+
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
|
61 |
+
return qa
|
62 |
+
|
63 |
+
def run_chatbot(self):
|
64 |
+
st.title('Chatbot Trained on Indian Exam Articles')
|
65 |
+
st.header("Hi!! How Can I Help You ?")
|
66 |
+
|
67 |
+
query = st.text_input('> ')
|
68 |
+
result = self.qa({'query': query})
|
69 |
+
st.write(result['result'])
|
70 |
+
st.button('Not Satisfied! Talk to our Expert Here..')
|
71 |
+
|
72 |
+
def run_google_search(self, query):
|
73 |
+
search = GoogleSerperAPIWrapper()
|
74 |
+
search.run(query)
|
75 |
+
|
76 |
if __name__ == "__main__":
|
77 |
+
chatbot = Chatbot()
|
78 |
+
data = chatbot.load_data()
|
79 |
+
docs = chatbot.split_documents(data)
|
80 |
+
retriever = chatbot.create_embeddings(docs)
|
81 |
+
qa = chatbot.create_qa_model(retriever)
|
82 |
+
|
83 |
+
st.title('Chatbot Trained on Indian Exam Articles')
|
84 |
+
st.header("Hi!! How Can I Help You ?")
|
85 |
+
|
86 |
+
query = st.text_input('> ')
|
87 |
+
result = qa({'query': query})
|
88 |
+
st.write(result['result'])
|
89 |
+
st.button('Not Satisfied! Talk to our Expert Here..')
|