edjdhug3 commited on
Commit
71c3cac
·
1 Parent(s): f3bca26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -56
app.py CHANGED
@@ -7,86 +7,63 @@ from langchain.vectorstores import FAISS
7
  from langchain.embeddings import HuggingFaceInstructEmbeddings
8
  from langchain.chains import RetrievalQA
9
  from langchain import HuggingFaceHub
10
- import streamlit as st
11
  from langchain.utilities import GoogleSerperAPIWrapper
 
 
 
 
 
 
 
 
12
 
13
  class Chatbot:
14
  def __init__(self):
15
  os.environ["Hugging_Face_API_KEY"] = "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"
16
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
17
  os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee"
18
-
 
 
 
 
19
  def load_data(self):
20
- urls = [
21
- 'https://zollege.in/exams/bitsat',
22
- 'https://zollege.in/exams/cat',
23
- 'https://zollege.in/exams/gate',
24
- 'https://zollege.in/exams/neet',
25
- 'https://zollege.in/exams/lsat',
26
- 'https://zollege.in/exams/jee-advanced',
27
- 'https://zollege.in/exams/aipmcet'
28
- ]
29
 
30
  loaders = UnstructuredURLLoader(urls=urls)
31
- data = loaders.load()
32
-
33
- return data
34
 
35
- def split_documents(self, data):
36
  text_splitter = CharacterTextSplitter(separator='\n', chunk_size=500, chunk_overlap=20)
37
- docs = text_splitter.split_documents(data)
38
 
39
- return docs
40
-
41
- def create_embeddings(self, docs):
42
  instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained")
43
- db_instructEmbedd = FAISS.from_documents(docs, instructor_embeddings)
44
- retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3})
45
 
46
  with open("db_instructEmbedd.pkl", "wb") as f:
47
  pickle.dump(db_instructEmbedd, f)
48
 
49
- return retriever
50
-
51
  def load_embeddings(self):
52
  with open("db_instructEmbedd.pkl", "rb") as f:
53
- retriever = pickle.load(f)
54
-
55
- retriever = retriever.as_retriever(search_kwargs={"k": 3})
56
- return retriever
57
 
58
- def create_qa_model(self, retriever):
59
- llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.1})
60
- qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
61
- return qa
62
 
63
- def run_chatbot(self):
64
- st.title('Chatbot Trained on Indian Exam Articles')
65
- st.header("Hi!! How Can I Help You ?")
66
 
67
- query = st.text_input('> ')
68
  result = self.qa({'query': query})
69
- st.write(result['result'])
70
- st.button('Not Satisfied! Talk to our Expert Here..')
71
 
72
- def run_google_search(self, query):
73
- search = GoogleSerperAPIWrapper()
74
- return search.run(query)
75
-
76
- if __name__ == "__main__":
77
- chatbot = Chatbot()
78
- data = chatbot.load_data()
79
- docs = chatbot.split_documents(data)
80
- retriever = chatbot.create_embeddings(docs)
81
- retrievers = chatbot.load_embeddings()
82
- qa = chatbot.create_qa_model(retrievers)
83
 
84
- st.title('Chatbot Trained on Indian Exam Articles')
85
- st.header("Hi!! How Can I Help You ?")
 
86
 
87
- query = st.text_input('ENTER TEXT HERE ')
88
- result = qa({'query': query})
89
- st.write(result['result'])
90
- if st.button('Not Satisfied! Talk to our Expert Here..'):
91
- st.write(run_google_search(query))
92
-
 
7
  from langchain.embeddings import HuggingFaceInstructEmbeddings
8
  from langchain.chains import RetrievalQA
9
  from langchain import HuggingFaceHub
 
10
  from langchain.utilities import GoogleSerperAPIWrapper
11
+ import gradio as gr
12
+
13
+ import pandas as pd
14
+ df = pd.read_csv('linkss.csv')
15
+ url = []
16
+ for i in df.itertuples():
17
+ url.append(f"{i[1]}")
18
+
19
 
20
  class Chatbot:
21
  def __init__(self):
22
  os.environ["Hugging_Face_API_KEY"] = "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"
23
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
24
  os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee"
25
+
26
+ self.load_data()
27
+ self.load_embeddings()
28
+ self.create_qa_model()
29
+
30
  def load_data(self):
31
+ urls = url
 
 
 
 
 
 
 
 
32
 
33
  loaders = UnstructuredURLLoader(urls=urls)
34
+ self.data = loaders.load()
 
 
35
 
36
+ def split_documents(self):
37
  text_splitter = CharacterTextSplitter(separator='\n', chunk_size=500, chunk_overlap=20)
38
+ self.docs = text_splitter.split_documents(self.data)
39
 
40
+ def create_embeddings(self):
 
 
41
  instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained")
42
+ db_instructEmbedd = FAISS.from_documents(self.docs, instructor_embeddings)
43
+ self.retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3})
44
 
45
  with open("db_instructEmbedd.pkl", "wb") as f:
46
  pickle.dump(db_instructEmbedd, f)
47
 
 
 
48
  def load_embeddings(self):
49
  with open("db_instructEmbedd.pkl", "rb") as f:
50
+ self.retriever = pickle.load(f)
 
 
 
51
 
52
+ self.retriever = self.retriever.as_retriever(search_kwargs={"k": 3})
 
 
 
53
 
54
+ def create_qa_model(self):
55
+ llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.1})
56
+ self.qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=self.retriever, return_source_documents=True)
57
 
58
+ def chat(self, query):
59
  result = self.qa({'query': query})
60
+ return result['result']
 
61
 
62
+ chatbot = Chatbot()
 
 
 
 
 
 
 
 
 
 
63
 
64
+ def chatbot_response(query):
65
+ response = chatbot.chat(query)
66
+ return response
67
 
68
+ iface = gr.Interface(fn=chatbot_response, inputs="text", outputs="text", title="Chatbot Trained on Indian Exam Articles")
69
+ iface.launch()