Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,86 +7,63 @@ from langchain.vectorstores import FAISS
|
|
7 |
from langchain.embeddings import HuggingFaceInstructEmbeddings
|
8 |
from langchain.chains import RetrievalQA
|
9 |
from langchain import HuggingFaceHub
|
10 |
-
import streamlit as st
|
11 |
from langchain.utilities import GoogleSerperAPIWrapper
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
class Chatbot:
|
14 |
def __init__(self):
|
15 |
os.environ["Hugging_Face_API_KEY"] = "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"
|
16 |
os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
|
17 |
os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee"
|
18 |
-
|
|
|
|
|
|
|
|
|
19 |
def load_data(self):
|
20 |
-
urls =
|
21 |
-
'https://zollege.in/exams/bitsat',
|
22 |
-
'https://zollege.in/exams/cat',
|
23 |
-
'https://zollege.in/exams/gate',
|
24 |
-
'https://zollege.in/exams/neet',
|
25 |
-
'https://zollege.in/exams/lsat',
|
26 |
-
'https://zollege.in/exams/jee-advanced',
|
27 |
-
'https://zollege.in/exams/aipmcet'
|
28 |
-
]
|
29 |
|
30 |
loaders = UnstructuredURLLoader(urls=urls)
|
31 |
-
data = loaders.load()
|
32 |
-
|
33 |
-
return data
|
34 |
|
35 |
-
def split_documents(self
|
36 |
text_splitter = CharacterTextSplitter(separator='\n', chunk_size=500, chunk_overlap=20)
|
37 |
-
docs = text_splitter.split_documents(data)
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
def create_embeddings(self, docs):
|
42 |
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained")
|
43 |
-
db_instructEmbedd = FAISS.from_documents(docs, instructor_embeddings)
|
44 |
-
retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3})
|
45 |
|
46 |
with open("db_instructEmbedd.pkl", "wb") as f:
|
47 |
pickle.dump(db_instructEmbedd, f)
|
48 |
|
49 |
-
return retriever
|
50 |
-
|
51 |
def load_embeddings(self):
|
52 |
with open("db_instructEmbedd.pkl", "rb") as f:
|
53 |
-
retriever = pickle.load(f)
|
54 |
-
|
55 |
-
retriever = retriever.as_retriever(search_kwargs={"k": 3})
|
56 |
-
return retriever
|
57 |
|
58 |
-
|
59 |
-
llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.1})
|
60 |
-
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
|
61 |
-
return qa
|
62 |
|
63 |
-
def
|
64 |
-
|
65 |
-
|
66 |
|
67 |
-
|
68 |
result = self.qa({'query': query})
|
69 |
-
|
70 |
-
st.button('Not Satisfied! Talk to our Expert Here..')
|
71 |
|
72 |
-
|
73 |
-
search = GoogleSerperAPIWrapper()
|
74 |
-
return search.run(query)
|
75 |
-
|
76 |
-
if __name__ == "__main__":
|
77 |
-
chatbot = Chatbot()
|
78 |
-
data = chatbot.load_data()
|
79 |
-
docs = chatbot.split_documents(data)
|
80 |
-
retriever = chatbot.create_embeddings(docs)
|
81 |
-
retrievers = chatbot.load_embeddings()
|
82 |
-
qa = chatbot.create_qa_model(retrievers)
|
83 |
|
84 |
-
|
85 |
-
|
|
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
st.write(result['result'])
|
90 |
-
if st.button('Not Satisfied! Talk to our Expert Here..'):
|
91 |
-
st.write(run_google_search(query))
|
92 |
-
|
|
|
7 |
from langchain.embeddings import HuggingFaceInstructEmbeddings
|
8 |
from langchain.chains import RetrievalQA
|
9 |
from langchain import HuggingFaceHub
|
|
|
10 |
from langchain.utilities import GoogleSerperAPIWrapper
|
11 |
+
import gradio as gr
|
12 |
+
|
13 |
+
import pandas as pd
|
14 |
+
df = pd.read_csv('linkss.csv')
|
15 |
+
url = []
|
16 |
+
for i in df.itertuples():
|
17 |
+
url.append(f"{i[1]}")
|
18 |
+
|
19 |
|
20 |
class Chatbot:
|
21 |
def __init__(self):
|
22 |
os.environ["Hugging_Face_API_KEY"] = "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"
|
23 |
os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
|
24 |
os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee"
|
25 |
+
|
26 |
+
self.load_data()
|
27 |
+
self.load_embeddings()
|
28 |
+
self.create_qa_model()
|
29 |
+
|
30 |
def load_data(self):
|
31 |
+
urls = url
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
loaders = UnstructuredURLLoader(urls=urls)
|
34 |
+
self.data = loaders.load()
|
|
|
|
|
35 |
|
36 |
+
def split_documents(self):
|
37 |
text_splitter = CharacterTextSplitter(separator='\n', chunk_size=500, chunk_overlap=20)
|
38 |
+
self.docs = text_splitter.split_documents(self.data)
|
39 |
|
40 |
+
def create_embeddings(self):
|
|
|
|
|
41 |
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained")
|
42 |
+
db_instructEmbedd = FAISS.from_documents(self.docs, instructor_embeddings)
|
43 |
+
self.retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3})
|
44 |
|
45 |
with open("db_instructEmbedd.pkl", "wb") as f:
|
46 |
pickle.dump(db_instructEmbedd, f)
|
47 |
|
|
|
|
|
48 |
def load_embeddings(self):
|
49 |
with open("db_instructEmbedd.pkl", "rb") as f:
|
50 |
+
self.retriever = pickle.load(f)
|
|
|
|
|
|
|
51 |
|
52 |
+
self.retriever = self.retriever.as_retriever(search_kwargs={"k": 3})
|
|
|
|
|
|
|
53 |
|
54 |
+
def create_qa_model(self):
|
55 |
+
llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.1})
|
56 |
+
self.qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=self.retriever, return_source_documents=True)
|
57 |
|
58 |
+
def chat(self, query):
|
59 |
result = self.qa({'query': query})
|
60 |
+
return result['result']
|
|
|
61 |
|
62 |
+
chatbot = Chatbot()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
+
def chatbot_response(query):
|
65 |
+
response = chatbot.chat(query)
|
66 |
+
return response
|
67 |
|
68 |
+
iface = gr.Interface(fn=chatbot_response, inputs="text", outputs="text", title="Chatbot Trained on Indian Exam Articles")
|
69 |
+
iface.launch()
|
|
|
|
|
|
|
|