Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
os.environ["Hugging_Face_API_KEY"] = "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"
|
4 |
+
|
5 |
+
urls = [
|
6 |
+
'https://zollege.in/exams/bitsat',
|
7 |
+
'https://zollege.in/exams/cat',
|
8 |
+
'https://zollege.in/exams/gate',
|
9 |
+
'https://zollege.in/exams/neet'
|
10 |
+
'https://zollege.in/exams/lsat',
|
11 |
+
'https://zollege.in/exams/jee-advanced',
|
12 |
+
'https://zollege.in/exams/aipmcet']
|
13 |
+
|
14 |
+
from langchain.document_loaders import UnstructuredURLLoader
|
15 |
+
loaders = UnstructuredURLLoader(urls=urls)
|
16 |
+
data = loaders.load()
|
17 |
+
|
18 |
+
from langchain.text_splitter import CharacterTextSplitter
|
19 |
+
|
20 |
+
text_splitter = CharacterTextSplitter(separator='\n',
|
21 |
+
chunk_size=500,
|
22 |
+
chunk_overlap=20)
|
23 |
+
|
24 |
+
|
25 |
+
docs = text_splitter.split_documents(data)
|
26 |
+
|
27 |
+
|
28 |
+
from InstructorEmbedding import INSTRUCTOR
|
29 |
+
from langchain.vectorstores import FAISS
|
30 |
+
|
31 |
+
from langchain.embeddings import HuggingFaceInstructEmbeddings
|
32 |
+
|
33 |
+
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained"
|
34 |
+
) # nice working with model = sentence-transformers/all-MiniLM-L6-v2
|
35 |
+
|
36 |
+
db_instructEmbedd = FAISS.from_documents(docs, instructor_embeddings)
|
37 |
+
retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3})
|
38 |
+
|
39 |
+
with open("db_instructEmbedd.pkl", "wb") as f:
|
40 |
+
pickle.dump(db_instructEmbedd, f)
|
41 |
+
|
42 |
+
with open("db_instructEmbedd.pkl", "rb") as f:
|
43 |
+
retriever = pickle.load(f)
|
44 |
+
|
45 |
+
retriever = retriever.as_retriever(search_kwargs={"k": 3})
|
46 |
+
|
47 |
+
|
48 |
+
os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
|
49 |
+
|
50 |
+
from langchain.chains import RetrievalQA
|
51 |
+
from langchain import HuggingFaceHub
|
52 |
+
|
53 |
+
|
54 |
+
llm = llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.1}) #repo_id = gpt2 works goodd
|
55 |
+
|
56 |
+
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
|
57 |
+
|
58 |
+
|
59 |
+
import streamlit as st
|
60 |
+
|
61 |
+
st.title('Chatbot Trained on Indian Exam Articles')
|
62 |
+
st.header("Hi!! How Can I Help You ?")
|
63 |
+
|
64 |
+
query = st.chat_input('> ')
|
65 |
+
result = qa({'query': query})
|
66 |
+
st.write(result['result'])
|
67 |
+
st.button('Not Satisfied! Talk to our Expert Here..')
|
68 |
+
|
69 |
+
import pprint
|
70 |
+
|
71 |
+
os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee"
|
72 |
+
|
73 |
+
from langchain.utilities import GoogleSerperAPIWrapper
|
74 |
+
|
75 |
+
search = GoogleSerperAPIWrapper()
|
76 |
+
|
77 |
+
search.run(result['query'])
|
78 |
+
|
79 |
+
|
80 |
+
if __name__ == "__main__":
|
81 |
+
main()
|