Nechba commited on
Commit
a4200f5
·
1 Parent(s): 534b4e8

fisrt commit1

Browse files
app.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utlis.helper import *
2
+
3
+ initialize_session_state()
4
+
5
+ with st.sidebar:
6
+ st.image("logo.png", width=170)
7
+ st.title("Smart Retrieval")
8
+ # Get List of models
9
+ llms = ['Gemini-Pro','Cohere','Mistral-7B-Instruct-v0.3','gemma-2b','Meta-Llama-3-8B-Instruct','Phi-3-mini-4k-instruct','zephyr-7b-beta']
10
+ st.session_state.llm = st.selectbox("Choose LLM",llms)
11
+ genre = st.radio(
12
+ "Choose option",
13
+ ["Select document(s)", "Add document(s)","Delete service(s)", "Delete document(s)"])
14
+
15
+ if genre=="Add document(s)":
16
+ st.title('Add Document(s)')
17
+ # Check service status
18
+ # Get all available services
19
+ add_new_service = st.checkbox("Add new service")
20
+ if add_new_service:
21
+ new_service = st.text_input("Enter service name")
22
+ # Get list of Embedding models
23
+ res_request= requests.get(EMBEDDING_MODELS_API)
24
+ embidding_models =json.loads(res_request.text)
25
+ embdding_model = st.selectbox("Choose Embidding model",embidding_models["Model_Names_paid"])
26
+ if new_service and st.button('Add'):
27
+ add_service(st.session_state.token,new_service, embdding_model)
28
+
29
+ services = requests.get(SERVICES_API+st.session_state.token)
30
+ services =json.loads(services.text)
31
+ if len(services)>0:
32
+ st.session_state.service = st.selectbox("Choose Service",services)
33
+
34
+ # Get list of Indexing methods
35
+ # indexing_method_list = ['FLAT','HSNW']
36
+ # st.session_state.indexing_method = st.selectbox("Choose Indexing method",indexing_method_list)
37
+ # Send Document to API
38
+ if st.session_state.service:
39
+ st.session_state.uploaded_files = st.file_uploader("Upload PDF files", type=["pdf", "png", "jpg", "jpeg"], accept_multiple_files=True)
40
+ if st.session_state.uploaded_files:
41
+ st.session_state.process = st.button('Process')
42
+ if st.session_state.process:
43
+ add_document(st.session_state.token,st.session_state.service)
44
+
45
+ elif genre=="Select document(s)":
46
+ st.title('Chat with Document(s)')
47
+ services = requests.get(SERVICES_API+st.session_state.token)
48
+ services =json.loads(services.text)
49
+
50
+ if len(services)>0:
51
+ st.session_state.service_slected_to_chat = st.selectbox("Choose Service",services)
52
+ st.session_state.top_k = st.number_input("Top k ", min_value=1, value=5)
53
+ history_document = requests.get(DOCUMENT_API+f'/{st.session_state.token}/{st.session_state.service_slected_to_chat}')
54
+ history_document =json.loads(history_document.text).get("documents",[])
55
+ if len(history_document)>=2:
56
+ history_document.append("ALL")
57
+ # Get list of documents from histrory
58
+ if "ALL" in history_document:
59
+ st.session_state.doument_slected_to_chat = st.multiselect(
60
+ "",history_document ,default="ALL"
61
+ )
62
+ elif len(history_document)==1:
63
+ st.session_state.doument_slected_to_chat = st.multiselect(
64
+ "",history_document,default=history_document[0]
65
+ )
66
+ else:
67
+ st.session_state.doument_slected_to_chat = st.multiselect(
68
+ "",history_document
69
+ )
70
+ if "ALL" in st.session_state.doument_slected_to_chat:
71
+ st.session_state.doument_slected_to_chat = history_document
72
+ st.session_state.doument_slected_to_chat.remove("ALL")
73
+ st.write("You selected:", st.session_state.doument_slected_to_chat)
74
+ elif genre == "Delete service(s)":
75
+ st.title('Delete Service(s)')
76
+ services = requests.get(SERVICES_API+st.session_state.token)
77
+ services =json.loads(services.text)
78
+ if len(services)>=2:
79
+ services.append("ALL")
80
+ # Get list of documents from histrory
81
+ if "ALL" in services:
82
+ service_slected = st.multiselect(
83
+ "",services ,default="ALL"
84
+ )
85
+ elif len(services)==1:
86
+ service_slected = st.multiselect(
87
+ "",services,default=services[0]
88
+ )
89
+ else:
90
+ service_slected = st.multiselect(
91
+ "",services
92
+ )
93
+ if "ALL" in service_slected:
94
+ service_slected = services
95
+ service_slected.remove("ALL")
96
+ st.write("You selected:", service_slected)
97
+
98
+ if len(service_slected) > 0:
99
+ st.session_state.delete = st.button('Delete')
100
+ if st.session_state.delete:
101
+ delete_service(st.session_state.token ,service_slected)
102
+
103
+ elif genre == "Delete document(s)":
104
+ st.title('Delete Document(s)')
105
+ services = requests.get(SERVICES_API+st.session_state.token)
106
+ services =json.loads(services.text)
107
+ if len(services)>0:
108
+ service = st.selectbox("Choose Service",services)
109
+ history_document = requests.get(DOCUMENT_API+f'/{st.session_state.token}/{service}')
110
+ history_document =json.loads(history_document.text).get("documents",[])
111
+
112
+ if len(history_document)>=2:
113
+ history_document.append("ALL")
114
+ # Get list of documents from histrory
115
+ if "ALL" in history_document:
116
+ document_slected_to_delete = st.multiselect(
117
+ "",history_document ,default="ALL"
118
+ )
119
+ elif len(history_document)==1:
120
+ document_slected_to_delete = st.multiselect(
121
+ "",history_document,default=history_document[0]
122
+ )
123
+ else:
124
+ document_slected_to_delete = st.multiselect(
125
+ "",history_document
126
+ )
127
+ if "ALL" in document_slected_to_delete:
128
+ document_slected_to_delete = history_document
129
+ document_slected_to_delete.remove("ALL")
130
+
131
+ st.write("You selected:", document_slected_to_delete)
132
+ if len(document_slected_to_delete) > 0:
133
+ st.session_state.delete = st.button('Delete')
134
+ if st.session_state.delete:
135
+ delete_document(st.session_state.token,st.session_state.service ,document_slected_to_delete)
136
+
137
+
138
+
139
+
140
+ for msg in st.session_state.messages:
141
+ if msg["role"] == "user":
142
+ st.chat_message(msg["role"], avatar="🧑‍💻").write(msg["content"])
143
+ else:
144
+ st.chat_message(msg["role"], avatar="🤖").write(msg["content"])
145
+
146
+ if prompt := st.chat_input():
147
+ st.session_state.messages.append({"role": "user", "content": prompt})
148
+ st.chat_message("user", avatar="🧑‍💻").write(prompt)
149
+
150
+ context = get_context(prompt,st.session_state.token,st.session_state.service_slected_to_chat,st.session_state.top_k)
151
+ template = " "
152
+ for i in range(0,len(context)):
153
+ template += f"Chunk{i}: "+context[i] + "\n"
154
+ print(template)
155
+ response=generate_response(st.session_state.llm, prompt, context = template)
156
+
157
+ #response = generate_response(st.session_state.llm,prompt, context)
158
+ st.session_state.messages.append({"role": "assistant", "content": response})
159
+ # with st.chat_message("assistant"):
160
+ # message_placeholder = st.empty()
161
+ # message_placeholder.markdown("Search...")
162
+ # message_placeholder.markdown(response)
163
+ st.chat_message("assistant", avatar="🤖").write(response)
logo.png ADDED
requirments.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pdfplumber
3
+ python-dotenv
4
+ haystack-ai
5
+ transformers
6
+ accelerate
7
+ bitsandbytes
8
+ redis
9
+ python-multipart
10
+ sentence-transformers
11
+ langchain
12
+ semantic_text_splitter
13
+ google-generativeai
utlis/__init__.py ADDED
File without changes
utlis/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (140 Bytes). View file
 
utlis/__pycache__/constant.cpython-39.pyc ADDED
Binary file (1.05 kB). View file
 
utlis/__pycache__/helper.cpython-39.pyc ADDED
Binary file (5.37 kB). View file
 
utlis/constant.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ import redis
4
+
5
+ REDIS_CONNECTION = redis.Redis(
6
+ host="redis-13875.c240.us-east-1-3.ec2.redns.redis-cloud.com",
7
+ port=13875,
8
+ password="CWkHBok23bakpa9lRif3nGSk6y0baVPu",
9
+ ssl=False # Enable SSL for the connection
10
+ )
11
+
12
+
13
+ # IP_WEB_SERVER = "https://f564-196-65-150-53.ngrok-free.app"
14
+ # IP_MODEL_SERVER = "https://fluffy-mole-81.telebit.io"
15
+ IP_WEB_SERVER = "http://192.168.11.119:8000"
16
+ IP_MODEL_SERVER = "http://192.168.11.119:8001"
17
+ EMBEDDING_MODELS_API = IP_MODEL_SERVER+"/models_&_sizes"
18
+ SERVICES_API = IP_WEB_SERVER+"/services/"
19
+ ADD_SERVICES_API = IP_WEB_SERVER+"/add_services"
20
+ CHUNK_STORE_API = IP_WEB_SERVER+"/chunk_and_store"
21
+ SEARCH_API = IP_WEB_SERVER+"/search"
22
+ DOCUMENT_API = IP_WEB_SERVER+"/documents"
23
+ REMOVE_DOCUMENT_API = IP_WEB_SERVER+"/remove_documents"
24
+ REMOVE_SERVICE_API = IP_WEB_SERVER+"/remove_service"
25
+ CHAT_API = IP_MODEL_SERVER+"/chat"
utlis/helper.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pdfplumber
2
+ import streamlit as st
3
+ import requests
4
+ import json
5
+ import redis
6
+ import redis.commands.search
7
+ from redis.commands.search.field import TagField, VectorField, TextField
8
+ from redis.commands.search.indexDefinition import IndexDefinition, IndexType
9
+ import logging
10
+ from redis.commands.search.query import Query
11
+ import numpy as np
12
+ from typing import List, Dict, Any
13
+ from semantic_text_splitter import TextSplitter
14
+ from tokenizers import Tokenizer
15
+ from sentence_transformers import SentenceTransformer
16
+ from utlis.constant import *
17
+ from PIL import Image
18
+ import google.generativeai as genai
19
+ genai.configure(api_key="AIzaSyAhz9UBzkEIYI886zZRm40qqB1Kd_9Y4-0")
20
+
21
+ def initialize_session_state():
22
+ if "token" not in st.session_state:
23
+ st.session_state["token"] ="abcd"
24
+ if "service" not in st.session_state:
25
+ st.session_state["service"] = None
26
+ if "use_document" not in st.session_state:
27
+ st.session_state.use_document = False
28
+ if "flag" not in st.session_state:
29
+ st.session_state.flag = False
30
+ if "embdding_model" not in st.session_state:
31
+ st.session_state["embdding_model"] = None
32
+ if "indexing_method" not in st.session_state:
33
+ st.session_state["indexing_method"] = None
34
+ if "uploaded_files" not in st.session_state:
35
+ st.session_state["uploaded_files"] = None
36
+
37
+ if "messages" not in st.session_state:
38
+ st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
39
+
40
+
41
+ def extract_text_from_pdf(pdf_path):
42
+ text=""
43
+ with pdfplumber.open(pdf_path) as pdf:
44
+ for page_number, page in enumerate(pdf.pages, start=1):
45
+ # Try to extract the text
46
+ text+= page.extract_text(x_tolerance=2, y_tolerance=4, layout=True, x_density=5, y_density=10)
47
+ return text
48
+
49
+ def delete_service(token,service_slected_to_delete):
50
+ for srevice_name in service_slected_to_delete:
51
+ url = REMOVE_SERVICE_API
52
+ # JSON payload to be sent
53
+ data = {
54
+ "token": token,
55
+ "servicename": srevice_name
56
+ }
57
+ json_data = json.dumps(data)
58
+
59
+ # Set the headers to specify that the content type is JSON
60
+ headers = {'Content-Type': 'application/json'}
61
+
62
+ # Send the POST request
63
+ response = requests.post(url, data=json_data, headers=headers)
64
+ if json.loads( response.text).get("success")==True:
65
+ st.success(f"{srevice_name} deleted successfully")
66
+ else:
67
+ st.error(f"{srevice_name} not deleted successfully")
68
+
69
+ def delete_document(token, service,document_slected_to_delete):
70
+
71
+ for document_name in document_slected_to_delete:
72
+ url = REMOVE_DOCUMENT_API
73
+ # JSON payload to be sent
74
+ data = {
75
+ "token": token,
76
+ "servicename": service,
77
+ "documentname":document_name}
78
+
79
+ # Convert the dictionary to a JSON formatted string
80
+ json_data = json.dumps(data)
81
+
82
+ # Set the headers to specify that the content type is JSON
83
+ headers = {'Content-Type': 'application/json'}
84
+
85
+ # Send the POST request
86
+ response = requests.post(url, data=json_data, headers=headers)
87
+ if json.loads( response.text).get("status")=="success":
88
+ st.success(f"{document_name} deleted successfully")
89
+ else:
90
+ st.error(f"{document_name} not deleted successfully")
91
+ def gemini_vision(file):
92
+ load_image = Image.open(file)
93
+ prompt= "please extract all text fromt this image"
94
+ model = genai.GenerativeModel('gemini-pro-vision')
95
+ response = model.generate_content([prompt, load_image])
96
+
97
+ return response.text
98
+ def add_service(token,servicename,embdding_model):
99
+ url = ADD_SERVICES_API
100
+ # JSON payload to be sent
101
+ data = {
102
+ "token": token,
103
+ "services": [
104
+ {
105
+ "servicename": servicename,
106
+ "modelname": embdding_model
107
+ }
108
+ ]
109
+ }
110
+
111
+ # Convert the dictionary to a JSON formatted string
112
+ json_data = json.dumps(data)
113
+
114
+ # Set the headers to specify that the content type is JSON
115
+ headers = {'Content-Type': 'application/json'}
116
+
117
+ # Send the POST request
118
+ response = requests.post(url, data=json_data, headers=headers)
119
+ if json.loads( response.text).get("added_services"):
120
+ st.success(f"{servicename} added successfully")
121
+ else:
122
+ st.error(response.text)
123
+ def add_document(token,servicename):
124
+
125
+
126
+ for file in st.session_state.uploaded_files:
127
+ if file.type.split('/')[-1]=='pdf':
128
+ text= extract_text_from_pdf(file)
129
+ else:
130
+ text = gemini_vision(file)
131
+ print(text)
132
+ if text:
133
+ url = CHUNK_STORE_API
134
+
135
+ # JSON payload to be sent
136
+ document_name = file.name.replace(" ","")
137
+ #document_name = document_name.replace(".pdf","")
138
+ document_name = document_name.replace("(","_")
139
+ document_name = document_name.replace(")","_")
140
+ document_name = document_name.replace("-","_")
141
+ data = {
142
+ "text": text,
143
+ "document_name":document_name,
144
+ "user_id": token,
145
+ "service_name": servicename
146
+ }
147
+
148
+ # Convert the dictionary to a JSON formatted string
149
+ json_data = json.dumps(data)
150
+
151
+ # Set the headers to specify that the content type is JSON
152
+ headers = {'Content-Type': 'application/json'}
153
+
154
+ # Send the POST request
155
+ response = requests.post(url, data=json_data, headers=headers)
156
+ document_name = file.name.replace(" ","_")
157
+ if json.loads( response.text).get("success")==True:
158
+ st.success(f"{document_name} uploaded successfully")
159
+ else:
160
+ st.error(f"{document_name} not uploaded successfully")
161
+ else:
162
+ st.error("we can't extract text from {}".format(file.name))
163
+
164
+
165
+ def get_context(prompt,token,service_name,top_k):
166
+ url = SEARCH_API
167
+ # JSON payload to be sent
168
+ data = {
169
+ "userid": token,
170
+ "service_name": service_name,
171
+ "query_str": prompt,
172
+ "document_names":st.session_state.doument_slected_to_chat ,
173
+ "top_k": top_k
174
+ }
175
+
176
+ # Convert the dictionary to a JSON formatted string
177
+ json_data = json.dumps(data)
178
+
179
+ # Set the headers to specify that the content type is JSON
180
+ headers = {'Content-Type': 'application/json'}
181
+
182
+ # Send the POST request
183
+ response = requests.post(url, data=json_data, headers=headers)
184
+
185
+ if json.loads( response.text).get("results"):
186
+ context = []
187
+ for chunk in json.loads( response.text).get("results"):
188
+ context.append(chunk['chunk'])
189
+ return context
190
+ else:
191
+ return []
192
+
193
+ def query(payload):
194
+ response = requests.post(API_URL, headers=HEADERS, json=payload)
195
+ return response.json()
196
+
197
+
198
+ def generate_response(llm_name, question, context = None):
199
+ url = CHAT_API
200
+ #st.chat_message("assistant", avatar="🤖").write(context)
201
+ # JSON payload to be sent
202
+ data = {
203
+ "context": context,
204
+ "question": question,
205
+ "model_name": llm_name,
206
+ }
207
+
208
+ # Convert the dictionary to a JSON formatted string
209
+ json_data = json.dumps(data)
210
+
211
+ # Set the headers to specify that the content type is JSON
212
+ headers = {'Content-Type': 'application/json'}
213
+
214
+ # Send the POST request
215
+ response = requests.post(url, data=json_data, headers=headers)
216
+ return json.loads( response.text).get("response", "429 Quota exceeded for quota metric.")