captain-awesome commited on
Commit
7f721d2
·
verified ·
1 Parent(s): be1f39f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -31
app.py CHANGED
@@ -1,11 +1,13 @@
1
 
2
  from langchain.llms import CTransformers
3
  from langchain.agents import Tool
 
4
  from langchain.chains import RetrievalQA
5
  from langchain.text_splitter import CharacterTextSplitter
6
  from langchain_community.document_loaders import PyPDFLoader
7
  from langchain_community.vectorstores import FAISS
8
  from langchain.embeddings import HuggingFaceBgeEmbeddings
 
9
  import streamlit as st
10
 
11
 
@@ -18,39 +20,10 @@ def main():
18
 
19
  st.title("Document Comparison with Q&A using Agents")
20
 
21
- config = {
22
- 'max_new_tokens': 1024,
23
- 'repetition_penalty': 1.1,
24
- 'temperature': 0.1,
25
- 'top_k': 50,
26
- 'top_p': 0.9,
27
- 'stream': True,
28
- 'threads': int(os.cpu_count() / 2)
29
- }
30
-
31
- llm = CTransformers(
32
- model="TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF",
33
- model_file="mistral-7b-instruct-v0.2.Q4_0.gguf",
34
- model_type="mistral",
35
- lib="avx2", #for CPU use
36
- **config
37
- )
38
-
39
- print("LLM Initialized...")
40
 
41
-
42
-
43
- model_name = "BAAI/bge-large-en"
44
- model_kwargs = {'device': 'cpu'}
45
- encode_kwargs = {'normalize_embeddings': False}
46
- embeddings = HuggingFaceBgeEmbeddings(
47
- model_name=model_name,
48
- model_kwargs=model_kwargs,
49
- encode_kwargs=encode_kwargs
50
- )
51
 
52
  # Upload files
53
- uploaded_files = st.file_uploader("Upload your documents", type=["pdf], accept_multiple_files=True)
54
  loaded_documents = []
55
 
56
  if uploaded_files:
@@ -76,10 +49,90 @@ def main():
76
  loader = loader_class(file_path, **loader_args)
77
  loaded_documents.extend(loader.load())
78
  else:
79
- st.warning(f"Unsupported file extension: {ext}")
 
 
 
 
80
 
81
 
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
 
85
  if __name__ == "__main__":
 
1
 
2
  from langchain.llms import CTransformers
3
  from langchain.agents import Tool
4
+ from langchain.agents import AgentType, initialize_agent
5
  from langchain.chains import RetrievalQA
6
  from langchain.text_splitter import CharacterTextSplitter
7
  from langchain_community.document_loaders import PyPDFLoader
8
  from langchain_community.vectorstores import FAISS
9
  from langchain.embeddings import HuggingFaceBgeEmbeddings
10
+
11
  import streamlit as st
12
 
13
 
 
20
 
21
  st.title("Document Comparison with Q&A using Agents")
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  # Upload files
26
+ uploaded_files = st.file_uploader("Upload your documents", type=["pdf"], accept_multiple_files=True)
27
  loaded_documents = []
28
 
29
  if uploaded_files:
 
49
  loader = loader_class(file_path, **loader_args)
50
  loaded_documents.extend(loader.load())
51
  else:
52
+ st.warning(f"Unsupported file extension: {ext}, the app currently only supports 'pdf'")
53
+
54
+ st.write("Ask question to get comparison from the documents:")
55
+ query = st.text_input("Ask a question:")
56
+
57
 
58
 
59
 
60
+ if st.button("Get Answer"):
61
+ if query:
62
+ # Load model, set prompts, create vector database, and retrieve answer
63
+ try:
64
+ start = timeit.default_timer()
65
+ config = {
66
+ 'max_new_tokens': 1024,
67
+ 'repetition_penalty': 1.1,
68
+ 'temperature': 0.1,
69
+ 'top_k': 50,
70
+ 'top_p': 0.9,
71
+ 'stream': True,
72
+ 'threads': int(os.cpu_count() / 2)
73
+ }
74
+
75
+ llm = CTransformers(
76
+ model="TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF",
77
+ model_file="mistral-7b-instruct-v0.2.Q4_0.gguf",
78
+ model_type="mistral",
79
+ lib="avx2", #for CPU use
80
+ **config
81
+ )
82
+
83
+ print("LLM Initialized...")
84
+
85
+
86
+
87
+ model_name = "BAAI/bge-large-en"
88
+ model_kwargs = {'device': 'cpu'}
89
+ encode_kwargs = {'normalize_embeddings': False}
90
+ embeddings = HuggingFaceBgeEmbeddings(
91
+ model_name=model_name,
92
+ model_kwargs=model_kwargs,
93
+ encode_kwargs=encode_kwargs
94
+ )
95
+
96
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
97
+ chunked_documents = text_splitter.split_documents(loaded_documents)
98
+ retriever = FAISS.from_documents(docs, embeddings).as_retriever()
99
+
100
+ # Wrap retrievers in a Tool
101
+ tools.append(
102
+ Tool(
103
+ name="Comparison tool",
104
+ description=f"useful when you want to answer questions about the uploaded documents}",
105
+ func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever),
106
+ )
107
+
108
+ agent = initialize_agent(
109
+ tools=tools,
110
+ llm=llm,
111
+ agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION
112
+ verbose=True
113
+ )
114
+
115
+ response = agent.run(query")
116
+
117
+ end = timeit.default_timer()
118
+ st.write("Elapsed time:")
119
+ st.write(end - start)
120
+
121
+ st.write("Bot Response:")
122
+ st.write(response)
123
+
124
+
125
+
126
+ except Exception as e:
127
+ st.error(f"An error occurred: {str(e)}")
128
+ else:
129
+ st.warning("Please enter a question.")
130
+
131
+
132
+
133
+ )
134
+
135
+ )
136
 
137
 
138
  if __name__ == "__main__":