edjdhug3 commited on
Commit
a38b3b9
·
1 Parent(s): 0a065b9
Files changed (1) hide show
  1. app.py +66 -67
app.py CHANGED
@@ -1,82 +1,81 @@
1
  import os
2
  import pickle
 
 
 
 
 
 
 
 
 
 
 
3
  from langchain.document_loaders import UnstructuredURLLoader
 
 
 
4
  from langchain.text_splitter import CharacterTextSplitter
 
 
 
 
 
 
 
 
 
5
  from InstructorEmbedding import INSTRUCTOR
6
  from langchain.vectorstores import FAISS
 
7
  from langchain.embeddings import HuggingFaceInstructEmbeddings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  from langchain.chains import RetrievalQA
9
  from langchain import HuggingFaceHub
 
 
 
 
 
 
 
10
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  from langchain.utilities import GoogleSerperAPIWrapper
12
 
13
- class Chatbot:
14
- def __init__(self):
15
- os.environ["Hugging_Face_API_KEY"] = "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"
16
- os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
17
- os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee"
18
-
19
- def load_data(self):
20
- urls = [
21
- 'https://zollege.in/exams/bitsat',
22
- 'https://zollege.in/exams/cat',
23
- 'https://zollege.in/exams/gate',
24
- 'https://zollege.in/exams/neet',
25
- 'https://zollege.in/exams/lsat',
26
- 'https://zollege.in/exams/jee-advanced',
27
- 'https://zollege.in/exams/aipmcet'
28
- ]
29
-
30
- loaders = UnstructuredURLLoader(urls=urls)
31
- data = loaders.load()
32
-
33
- return data
34
-
35
- def split_documents(self, data):
36
- text_splitter = CharacterTextSplitter(separator='\n', chunk_size=500, chunk_overlap=20)
37
- docs = text_splitter.split_documents(data)
38
-
39
- return docs
40
-
41
- def create_embeddings(self, docs):
42
- instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained")
43
- db_instructEmbedd = FAISS.from_documents(docs, instructor_embeddings)
44
- retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3})
45
-
46
- with open("db_instructEmbedd.pkl", "wb") as f:
47
- pickle.dump(db_instructEmbedd, f)
48
-
49
- return retriever
50
-
51
- def load_embeddings(self):
52
- with open("db_instructEmbedd.pkl", "rb") as f:
53
- retriever = pickle.load(f)
54
-
55
- retriever = retriever.as_retriever(search_kwargs={"k": 3})
56
- return retriever
57
-
58
- def create_qa_model(self, retriever):
59
- llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.1})
60
- qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
61
- return qa
62
-
63
- def run_chatbot(self):
64
- st.title('Chatbot Trained on Indian Exam Articles')
65
- st.header("Hi!! How Can I Help You ?")
66
-
67
- query = st.text_input('')
68
- result = self.qa({'query': query})
69
- st.write(result['result'])
70
-
71
- def run_google_search(self, query):
72
- search = GoogleSerperAPIWrapper()
73
- search.run(query)
74
 
75
 
76
  if __name__ == "__main__":
77
- chatbot = Chatbot()
78
- data = chatbot.load_data()
79
- docs = chatbot.split_documents(data)
80
- retriever = chatbot.create_embeddings(docs)
81
- chatbot.qa = chatbot.create_qa_model(retriever)
82
- chatbot.run_chatbot()
 
1
  import os
2
  import pickle
3
+ os.environ["Hugging_Face_API_KEY"] = "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"
4
+
5
+ urls = [
6
+ 'https://zollege.in/exams/bitsat',
7
+ 'https://zollege.in/exams/cat',
8
+ 'https://zollege.in/exams/gate',
9
+ 'https://zollege.in/exams/neet'
10
+ 'https://zollege.in/exams/lsat',
11
+ 'https://zollege.in/exams/jee-advanced',
12
+ 'https://zollege.in/exams/aipmcet']
13
+
14
  from langchain.document_loaders import UnstructuredURLLoader
15
+ loaders = UnstructuredURLLoader(urls=urls)
16
+ data = loaders.load()
17
+
18
  from langchain.text_splitter import CharacterTextSplitter
19
+
20
+ text_splitter = CharacterTextSplitter(separator='\n',
21
+ chunk_size=500,
22
+ chunk_overlap=20)
23
+
24
+
25
+ docs = text_splitter.split_documents(data)
26
+
27
+
28
  from InstructorEmbedding import INSTRUCTOR
29
  from langchain.vectorstores import FAISS
30
+
31
  from langchain.embeddings import HuggingFaceInstructEmbeddings
32
+
33
+ instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained"
34
+ ) # nice working with model = sentence-transformers/all-MiniLM-L6-v2
35
+
36
+ db_instructEmbedd = FAISS.from_documents(docs, instructor_embeddings)
37
+ retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3})
38
+
39
+ with open("db_instructEmbedd.pkl", "wb") as f:
40
+ pickle.dump(db_instructEmbedd, f)
41
+
42
+ with open("db_instructEmbedd.pkl", "rb") as f:
43
+ retriever = pickle.load(f)
44
+
45
+ retriever = retriever.as_retriever(search_kwargs={"k": 3})
46
+
47
+
48
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
49
+
50
  from langchain.chains import RetrievalQA
51
  from langchain import HuggingFaceHub
52
+
53
+
54
+ llm = llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.1}) #repo_id = gpt2 works goodd
55
+
56
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
57
+
58
+
59
  import streamlit as st
60
+
61
+ st.title('Chatbot Trained on Indian Exam Articles')
62
+ st.header("Hi!! How Can I Help You ?")
63
+
64
+ query = st.chat_input('> ')
65
+ result = qa({'query': query})
66
+ st.write(result['result'])
67
+ st.button('Not Satisfied! Talk to our Expert Here..')
68
+
69
+ import pprint
70
+
71
+ os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee"
72
+
73
  from langchain.utilities import GoogleSerperAPIWrapper
74
 
75
+ search = GoogleSerperAPIWrapper()
76
+
77
+ search.run(result['query'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
 
80
  if __name__ == "__main__":
81
+ main()