captain-awesome commited on
Commit
be1f39f
·
verified ·
1 Parent(s): 84ad3fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -1
app.py CHANGED
@@ -1,8 +1,87 @@
 
 
1
  from langchain.agents import Tool
2
  from langchain.chains import RetrievalQA
3
  from langchain.text_splitter import CharacterTextSplitter
4
  from langchain_community.document_loaders import PyPDFLoader
5
  from langchain_community.vectorstores import FAISS
 
6
  import streamlit as st
7
 
8
- st.title("Docuverse")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from langchain.llms import CTransformers
3
  from langchain.agents import Tool
4
  from langchain.chains import RetrievalQA
5
  from langchain.text_splitter import CharacterTextSplitter
6
  from langchain_community.document_loaders import PyPDFLoader
7
  from langchain_community.vectorstores import FAISS
8
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
9
  import streamlit as st
10
 
11
+
12
+ def main():
13
+
14
+ FILE_LOADER_MAPPING = {
15
+ "pdf": (PyPDFLoader, {})
16
+ # Add more mappings for other file extensions and loaders as needed
17
+ }
18
+
19
+ st.title("Document Comparison with Q&A using Agents")
20
+
21
+ config = {
22
+ 'max_new_tokens': 1024,
23
+ 'repetition_penalty': 1.1,
24
+ 'temperature': 0.1,
25
+ 'top_k': 50,
26
+ 'top_p': 0.9,
27
+ 'stream': True,
28
+ 'threads': int(os.cpu_count() / 2)
29
+ }
30
+
31
+ llm = CTransformers(
32
+ model="TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF",
33
+ model_file="mistral-7b-instruct-v0.2.Q4_0.gguf",
34
+ model_type="mistral",
35
+ lib="avx2", #for CPU use
36
+ **config
37
+ )
38
+
39
+ print("LLM Initialized...")
40
+
41
+
42
+
43
+ model_name = "BAAI/bge-large-en"
44
+ model_kwargs = {'device': 'cpu'}
45
+ encode_kwargs = {'normalize_embeddings': False}
46
+ embeddings = HuggingFaceBgeEmbeddings(
47
+ model_name=model_name,
48
+ model_kwargs=model_kwargs,
49
+ encode_kwargs=encode_kwargs
50
+ )
51
+
52
+ # Upload files
53
+ uploaded_files = st.file_uploader("Upload your documents", type=["pdf], accept_multiple_files=True)
54
+ loaded_documents = []
55
+
56
+ if uploaded_files:
57
+ # Create a temporary directory
58
+ with tempfile.TemporaryDirectory() as td:
59
+ # Move the uploaded files to the temporary directory and process them
60
+ for uploaded_file in uploaded_files:
61
+ st.write(f"Uploaded: {uploaded_file.name}")
62
+ ext = os.path.splitext(uploaded_file.name)[-1][1:].lower()
63
+ st.write(f"Uploaded: {ext}")
64
+
65
+ # Check if the extension is in FILE_LOADER_MAPPING
66
+ if ext in FILE_LOADER_MAPPING:
67
+ loader_class, loader_args = FILE_LOADER_MAPPING[ext]
68
+ # st.write(f"loader_class: {loader_class}")
69
+
70
+ # Save the uploaded file to the temporary directory
71
+ file_path = os.path.join(td, uploaded_file.name)
72
+ with open(file_path, 'wb') as temp_file:
73
+ temp_file.write(uploaded_file.read())
74
+
75
+ # Use Langchain loader to process the file
76
+ loader = loader_class(file_path, **loader_args)
77
+ loaded_documents.extend(loader.load())
78
+ else:
79
+ st.warning(f"Unsupported file extension: {ext}")
80
+
81
+
82
+
83
+
84
+
85
+ if __name__ == "__main__":
86
+ main()
87
+