edjdhug3 commited on
Commit
1b25c4c
·
1 Parent(s): 326f0b1

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ os.environ["Hugging_Face_API_KEY"] = "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"
4
+
5
+ urls = [
6
+ 'https://zollege.in/exams/bitsat',
7
+ 'https://zollege.in/exams/cat',
8
+ 'https://zollege.in/exams/gate',
9
+ 'https://zollege.in/exams/neet'
10
+ 'https://zollege.in/exams/lsat',
11
+ 'https://zollege.in/exams/jee-advanced',
12
+ 'https://zollege.in/exams/aipmcet']
13
+
14
+ from langchain.document_loaders import UnstructuredURLLoader
15
+ loaders = UnstructuredURLLoader(urls=urls)
16
+ data = loaders.load()
17
+
18
+ from langchain.text_splitter import CharacterTextSplitter
19
+
20
+ text_splitter = CharacterTextSplitter(separator='\n',
21
+ chunk_size=500,
22
+ chunk_overlap=20)
23
+
24
+
25
+ docs = text_splitter.split_documents(data)
26
+
27
+
28
+ from InstructorEmbedding import INSTRUCTOR
29
+ from langchain.vectorstores import FAISS
30
+
31
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
32
+
33
+ instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained"
34
+ ) # nice working with model = sentence-transformers/all-MiniLM-L6-v2
35
+
36
+ db_instructEmbedd = FAISS.from_documents(docs, instructor_embeddings)
37
+ retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3})
38
+
39
+ with open("db_instructEmbedd.pkl", "wb") as f:
40
+ pickle.dump(db_instructEmbedd, f)
41
+
42
+ with open("db_instructEmbedd.pkl", "rb") as f:
43
+ retriever = pickle.load(f)
44
+
45
+ retriever = retriever.as_retriever(search_kwargs={"k": 3})
46
+
47
+
48
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
49
+
50
+ from langchain.chains import RetrievalQA
51
+ from langchain import HuggingFaceHub
52
+
53
+
54
+ llm = llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.1}) #repo_id = gpt2 works goodd
55
+
56
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
57
+
58
+
59
+ import streamlit as st
60
+
61
+ st.title('Chatbot Trained on Indian Exam Articles')
62
+ st.header("Hi!! How Can I Help You ?")
63
+
64
+ query = st.chat_input('> ')
65
+ result = qa({'query': query})
66
+ st.write(result['result'])
67
+ st.button('Not Satisfied! Talk to our Expert Here..')
68
+
69
+ import pprint
70
+
71
+ os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee"
72
+
73
+ from langchain.utilities import GoogleSerperAPIWrapper
74
+
75
+ search = GoogleSerperAPIWrapper()
76
+
77
+ search.run(result['query'])
78
+
79
+
80
+ if __name__ == "__main__":
81
+ main()