fisrt commit
Browse files- app.py +194 -0
- logo.png +0 -0
- requirments.txt +13 -0
- utlis/__init__.py +0 -0
- utlis/__pycache__/__init__.cpython-39.pyc +0 -0
- utlis/__pycache__/constant.cpython-39.pyc +0 -0
- utlis/__pycache__/helper.cpython-39.pyc +0 -0
- utlis/constant.py +31 -0
- utlis/helper.py +199 -0
app.py
ADDED
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utlis.helper import *
|
2 |
+
|
3 |
+
initialize_session_state()
|
4 |
+
|
5 |
+
with st.sidebar:
|
6 |
+
st.image("logo.png", width=170)
|
7 |
+
st.title("AGDC")
|
8 |
+
# Get List of models
|
9 |
+
llms = ['gpt-3.5-turbo', 'gemini']
|
10 |
+
st.session_state.llm = st.selectbox("Choose LLM",llms)
|
11 |
+
st.session_state.genre = st.radio(
|
12 |
+
"Choose option",
|
13 |
+
["Select document", "Add document(s)","Delete service(s)", "Delete document(s)"])
|
14 |
+
|
15 |
+
if st.session_state.genre=="Add document(s)":
|
16 |
+
st.title('Add Document(s)')
|
17 |
+
# Check service status
|
18 |
+
# Get all available services
|
19 |
+
add_new_service = st.checkbox("Add new service")
|
20 |
+
if add_new_service:
|
21 |
+
new_service = st.text_input("Enter service name")
|
22 |
+
# Get list of Embedding models
|
23 |
+
|
24 |
+
if new_service and st.button('Add'):
|
25 |
+
add_service(st.session_state.token,new_service)
|
26 |
+
data = {"token": st.session_state.token}
|
27 |
+
json_data = json.dumps(data)
|
28 |
+
headers = {'Content-Type': 'application/json'}
|
29 |
+
services = requests.get(SERVICES_API,data=json_data, headers=headers)
|
30 |
+
services =json.loads(services.text)
|
31 |
+
if len(services)>0:
|
32 |
+
st.session_state.service = st.selectbox("Choose Service",services)
|
33 |
+
|
34 |
+
|
35 |
+
if len(services)>0:
|
36 |
+
st.session_state.uploaded_files = st.file_uploader("Upload PDF file", type=["pdf"], accept_multiple_files=False)
|
37 |
+
if st.session_state.uploaded_files:
|
38 |
+
st.session_state.process = st.button('Process')
|
39 |
+
if st.session_state.process:
|
40 |
+
add_document(st.session_state.token,st.session_state.service)
|
41 |
+
|
42 |
+
elif st.session_state.genre=="Select document":
|
43 |
+
st.title('Scrape Document')
|
44 |
+
data = {"token": st.session_state.token}
|
45 |
+
json_data = json.dumps(data)
|
46 |
+
headers = {'Content-Type': 'application/json'}
|
47 |
+
services = requests.get(SERVICES_API,data=json_data, headers=headers)
|
48 |
+
services =json.loads(services.text)
|
49 |
+
|
50 |
+
if len(services)>0:
|
51 |
+
st.session_state.service_slected_to_chat = st.selectbox("Choose Service",services)
|
52 |
+
data = {"token": st.session_state.token, "servicename": st.session_state.service_slected_to_chat}
|
53 |
+
json_data = json.dumps(data)
|
54 |
+
headers = {'Content-Type': 'application/json'}
|
55 |
+
history_document = requests.get(DOCUMENT_API,data=json_data, headers=headers)
|
56 |
+
history_document =json.loads(history_document.text).get("documents",[])
|
57 |
+
history_document = [doc["documentname"] for doc in history_document]
|
58 |
+
st.session_state.doument_slected_to_chat = st.selectbox("Choose Documnet",history_document)
|
59 |
+
data = {"token": st.session_state.token, "service_name": st.session_state.service_slected_to_chat,"document_name":st.session_state.doument_slected_to_chat}
|
60 |
+
json_data = json.dumps(data)
|
61 |
+
headers = {'Content-Type': 'application/json'}
|
62 |
+
number_pages = requests.get(GET_NUM_PAGES,data=json_data, headers=headers)
|
63 |
+
number_pages =json.loads(number_pages.text).get("num_pages")
|
64 |
+
page_options = list(range(1, int(number_pages) + 1))
|
65 |
+
|
66 |
+
st.session_state.start_page = st.selectbox("Start Page",page_options)
|
67 |
+
st.session_state.end_page = st.selectbox("End Page", page_options, index=len(page_options) - 1)
|
68 |
+
st.session_state.method = st.selectbox("Chunking Method", ["chunk_per_page", "personalize_chunking"])
|
69 |
+
if st.session_state.method=="personalize_chunking":
|
70 |
+
st.session_state.split_token = st.text_area("Split Token")
|
71 |
+
else:
|
72 |
+
st.session_state.service_slected_to_chat = None
|
73 |
+
|
74 |
+
|
75 |
+
elif st.session_state.genre == "Delete service(s)":
|
76 |
+
st.title('Delete Service(s)')
|
77 |
+
data = {"token": st.session_state.token}
|
78 |
+
json_data = json.dumps(data)
|
79 |
+
headers = {'Content-Type': 'application/json'}
|
80 |
+
services = requests.get(SERVICES_API,data=json_data, headers=headers)
|
81 |
+
services =json.loads(services.text)
|
82 |
+
if len(services)>=2:
|
83 |
+
services.append("ALL")
|
84 |
+
# Get list of documents from histrory
|
85 |
+
if "ALL" in services:
|
86 |
+
service_slected = st.multiselect(
|
87 |
+
"",services ,default="ALL"
|
88 |
+
)
|
89 |
+
elif len(services)==1:
|
90 |
+
service_slected = st.multiselect(
|
91 |
+
"",services,default=services[0]
|
92 |
+
)
|
93 |
+
else:
|
94 |
+
service_slected = st.multiselect(
|
95 |
+
"",services
|
96 |
+
)
|
97 |
+
if "ALL" in service_slected:
|
98 |
+
service_slected = services
|
99 |
+
service_slected.remove("ALL")
|
100 |
+
st.write("You selected:", service_slected)
|
101 |
+
|
102 |
+
if len(service_slected) > 0:
|
103 |
+
st.session_state.delete = st.button('Delete')
|
104 |
+
if st.session_state.delete:
|
105 |
+
delete_service(st.session_state.token ,service_slected)
|
106 |
+
|
107 |
+
elif st.session_state.genre == "Delete document(s)":
|
108 |
+
st.title('Delete Document(s)')
|
109 |
+
data = {"token": st.session_state.token}
|
110 |
+
json_data = json.dumps(data)
|
111 |
+
headers = {'Content-Type': 'application/json'}
|
112 |
+
services = requests.get(SERVICES_API,data=json_data, headers=headers)
|
113 |
+
services =json.loads(services.text)
|
114 |
+
if len(services)>0:
|
115 |
+
service = st.selectbox("Choose Service",services)
|
116 |
+
data = {"token": st.session_state.token, "servicename": service}
|
117 |
+
json_data = json.dumps(data)
|
118 |
+
headers = {'Content-Type': 'application/json'}
|
119 |
+
history_document = requests.get(DOCUMENT_API,data=json_data, headers=headers)
|
120 |
+
history_document =json.loads(history_document.text).get("documents",[])
|
121 |
+
history_document = [doc["documentname"] for doc in history_document]
|
122 |
+
if len(history_document)>=2:
|
123 |
+
history_document.append("ALL")
|
124 |
+
# Get list of documents from histrory
|
125 |
+
if "ALL" in history_document:
|
126 |
+
document_slected_to_delete = st.multiselect(
|
127 |
+
"",history_document ,default="ALL"
|
128 |
+
)
|
129 |
+
elif len(history_document)==1:
|
130 |
+
document_slected_to_delete = st.multiselect(
|
131 |
+
"",history_document,default=history_document[0]
|
132 |
+
)
|
133 |
+
else:
|
134 |
+
document_slected_to_delete = st.multiselect(
|
135 |
+
"",history_document
|
136 |
+
)
|
137 |
+
if "ALL" in document_slected_to_delete:
|
138 |
+
document_slected_to_delete = history_document
|
139 |
+
document_slected_to_delete.remove("ALL")
|
140 |
+
|
141 |
+
st.write("You selected:", document_slected_to_delete)
|
142 |
+
if len(document_slected_to_delete) > 0:
|
143 |
+
st.session_state.delete = st.button('Delete')
|
144 |
+
if st.session_state.delete:
|
145 |
+
delete_document(st.session_state.token,st.session_state.service ,document_slected_to_delete)
|
146 |
+
|
147 |
+
css_style = """
|
148 |
+
<style>
|
149 |
+
.title {
|
150 |
+
white-space: nowrap;
|
151 |
+
}
|
152 |
+
</style>
|
153 |
+
"""
|
154 |
+
|
155 |
+
st.markdown(css_style, unsafe_allow_html=True)
|
156 |
+
|
157 |
+
with st.container():
|
158 |
+
st.markdown('<h1 class="title">Augmented Generative Document Scraper</h1>', unsafe_allow_html=True)
|
159 |
+
if st.session_state.genre=="Select document" and st.session_state.service_slected_to_chat:
|
160 |
+
schema = display_and_validate_schema()
|
161 |
+
comments = None
|
162 |
+
if schema and st.checkbox("Add comments") :
|
163 |
+
keys = get_all_keys(schema)
|
164 |
+
comments = handle_comments(keys)
|
165 |
+
if schema and st.button('Process') :
|
166 |
+
data = {"token": st.session_state.token,
|
167 |
+
"service_name": st.session_state.service_slected_to_chat,
|
168 |
+
"document_name": st.session_state.doument_slected_to_chat,
|
169 |
+
"method": st.session_state.method,
|
170 |
+
"model": st.session_state.llm,
|
171 |
+
"schema": schema,
|
172 |
+
"comment": comments,
|
173 |
+
"split_token": st.session_state.split_token if st.session_state.method == "personalize_chunking" else "",
|
174 |
+
"start_page": st.session_state.start_page,
|
175 |
+
"end_page": st.session_state.end_page}
|
176 |
+
json_data = json.dumps(data)
|
177 |
+
headers = {'Content-Type': 'application/json'}
|
178 |
+
response = requests.get(RESPONSE_API,data=json_data, headers=headers)
|
179 |
+
response_data = json.loads(response.text)
|
180 |
+
if response_data.get('status')=='success':
|
181 |
+
json_str =response_data.get("json")
|
182 |
+
json_str_formatted = json.dumps(json_str)
|
183 |
+
|
184 |
+
# Encode this JSON string to bytes, which is required for the download
|
185 |
+
json_bytes = json_str_formatted.encode('utf-8')
|
186 |
+
st.download_button(
|
187 |
+
label="Download JSON",
|
188 |
+
data=json_bytes,
|
189 |
+
file_name="results.json",
|
190 |
+
mime="application/json"
|
191 |
+
)
|
192 |
+
else:
|
193 |
+
st.error("Error in processing document")
|
194 |
+
|
logo.png
ADDED
requirments.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pdfplumber
|
3 |
+
python-dotenv
|
4 |
+
haystack-ai
|
5 |
+
transformers
|
6 |
+
accelerate
|
7 |
+
bitsandbytes
|
8 |
+
redis
|
9 |
+
python-multipart
|
10 |
+
sentence-transformers
|
11 |
+
langchain
|
12 |
+
semantic_text_splitter
|
13 |
+
google-generativeai
|
utlis/__init__.py
ADDED
File without changes
|
utlis/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (140 Bytes). View file
|
|
utlis/__pycache__/constant.cpython-39.pyc
ADDED
Binary file (759 Bytes). View file
|
|
utlis/__pycache__/helper.cpython-39.pyc
ADDED
Binary file (5.74 kB). View file
|
|
utlis/constant.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# IP_WEB_SERVER = "https://f564-196-65-150-53.ngrok-free.app"
|
3 |
+
# IP_MODEL_SERVER = "https://fluffy-mole-81.telebit.io"
|
4 |
+
IP_WEB_SERVER = "http://localhost:80"
|
5 |
+
SERVICES_API = IP_WEB_SERVER+"/services/"
|
6 |
+
ADD_SERVICES_API = IP_WEB_SERVER+"/add_services"
|
7 |
+
ADD_STORE_DOCUMENT = IP_WEB_SERVER+"/add_and_store_document"
|
8 |
+
DOCUMENT_API = IP_WEB_SERVER+"/documents"
|
9 |
+
REMOVE_DOCUMENTS_API = IP_WEB_SERVER+"/remove_documents"
|
10 |
+
REMOVE_SERVICE_API = IP_WEB_SERVER+"/remove_service"
|
11 |
+
GET_NUM_PAGES = IP_WEB_SERVER+"/get_num_pages"
|
12 |
+
RESPONSE_API = IP_WEB_SERVER+"/structure_response"
|
13 |
+
DEFAULT_SCHEMA = {
|
14 |
+
"GeographicContext": "<variable>",
|
15 |
+
"SubGeographicContext": "<variable>",
|
16 |
+
"Channel": "<variable>",
|
17 |
+
"RateType": "<variable>",
|
18 |
+
"Notes": ["<variable>"],
|
19 |
+
"Rates": [
|
20 |
+
{
|
21 |
+
"PaymentProduct": "<variable>",
|
22 |
+
"Details": [
|
23 |
+
{
|
24 |
+
"FeeTier": "<variable>",
|
25 |
+
"IRD": ["<variable>"],
|
26 |
+
"Rate": "<variable>"
|
27 |
+
},
|
28 |
+
]
|
29 |
+
},
|
30 |
+
]
|
31 |
+
}
|
utlis/helper.py
ADDED
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pdfplumber
|
2 |
+
import streamlit as st
|
3 |
+
import requests
|
4 |
+
import json
|
5 |
+
import redis
|
6 |
+
import redis.commands.search
|
7 |
+
from redis.commands.search.field import TagField, VectorField, TextField
|
8 |
+
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
|
9 |
+
import logging
|
10 |
+
from redis.commands.search.query import Query
|
11 |
+
import numpy as np
|
12 |
+
from typing import List, Dict, Any
|
13 |
+
from semantic_text_splitter import TextSplitter
|
14 |
+
from tokenizers import Tokenizer
|
15 |
+
from sentence_transformers import SentenceTransformer
|
16 |
+
from utlis.constant import *
|
17 |
+
from PIL import Image
|
18 |
+
import google.generativeai as genai
|
19 |
+
genai.configure(api_key="AIzaSyAhz9UBzkEIYI886zZRm40qqB1Kd_9Y4-0")
|
20 |
+
import base64
|
21 |
+
def initialize_session_state():
|
22 |
+
if "token" not in st.session_state:
|
23 |
+
st.session_state["token"] ="abcd"
|
24 |
+
if "service" not in st.session_state:
|
25 |
+
st.session_state["service"] = None
|
26 |
+
if "use_document" not in st.session_state:
|
27 |
+
st.session_state.use_document = False
|
28 |
+
if "flag" not in st.session_state:
|
29 |
+
st.session_state.flag = False
|
30 |
+
if "embdding_model" not in st.session_state:
|
31 |
+
st.session_state["embdding_model"] = None
|
32 |
+
if "indexing_method" not in st.session_state:
|
33 |
+
st.session_state["indexing_method"] = None
|
34 |
+
if "uploaded_files" not in st.session_state:
|
35 |
+
st.session_state["uploaded_files"] = None
|
36 |
+
|
37 |
+
if "messages" not in st.session_state:
|
38 |
+
st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
|
39 |
+
|
40 |
+
|
41 |
+
def extract_text_from_pdf(pdf_path):
|
42 |
+
text=""
|
43 |
+
with pdfplumber.open(pdf_path) as pdf:
|
44 |
+
for page_number, page in enumerate(pdf.pages, start=1):
|
45 |
+
# Try to extract the text
|
46 |
+
text+= page.extract_text(x_tolerance=2, y_tolerance=4, layout=True, x_density=5, y_density=10)
|
47 |
+
return text
|
48 |
+
|
49 |
+
def delete_service(token,service_slected_to_delete):
|
50 |
+
for srevice_name in service_slected_to_delete:
|
51 |
+
url = REMOVE_SERVICE_API
|
52 |
+
# JSON payload to be sent
|
53 |
+
data = {
|
54 |
+
"token": token,
|
55 |
+
"servicename": srevice_name
|
56 |
+
}
|
57 |
+
json_data = json.dumps(data)
|
58 |
+
|
59 |
+
# Set the headers to specify that the content type is JSON
|
60 |
+
headers = {'Content-Type': 'application/json'}
|
61 |
+
|
62 |
+
# Send the POST request
|
63 |
+
response = requests.delete(url, data=json_data, headers=headers)
|
64 |
+
if json.loads( response.text).get("success")==True:
|
65 |
+
st.success(f"{srevice_name} deleted successfully")
|
66 |
+
else:
|
67 |
+
st.error(f"{srevice_name} not deleted successfully")
|
68 |
+
|
69 |
+
def delete_document(token, service,document_slected_to_delete):
|
70 |
+
print(document_slected_to_delete)
|
71 |
+
# for document_name in document_slected_to_delete:
|
72 |
+
url = REMOVE_DOCUMENTS_API
|
73 |
+
# JSON payload to be sent
|
74 |
+
data = {
|
75 |
+
"token": token,
|
76 |
+
"service_name": service,
|
77 |
+
"document_names":document_slected_to_delete
|
78 |
+
}
|
79 |
+
|
80 |
+
# Convert the dictionary to a JSON formatted string
|
81 |
+
json_data = json.dumps(data)
|
82 |
+
# Set the headers to specify that the content type is JSON
|
83 |
+
headers = {'Content-Type': 'application/json'}
|
84 |
+
|
85 |
+
# Send the POST request
|
86 |
+
response = requests.delete(url, data=json_data, headers=headers)
|
87 |
+
print(response)
|
88 |
+
if json.loads( response.text).get("status")=="success":
|
89 |
+
st.success("document(s) deleted successfully")
|
90 |
+
else:
|
91 |
+
st.error("document(s) not deleted successfully")
|
92 |
+
def gemini_vision(file):
|
93 |
+
load_image = Image.open(file)
|
94 |
+
prompt= "please extract all text fromt this image"
|
95 |
+
model = genai.GenerativeModel('gemini-pro-vision')
|
96 |
+
response = model.generate_content([prompt, load_image])
|
97 |
+
|
98 |
+
return response.text
|
99 |
+
def add_service(token,servicename):
|
100 |
+
url = ADD_SERVICES_API
|
101 |
+
# JSON payload to be sent
|
102 |
+
data = {
|
103 |
+
"token": token,
|
104 |
+
"services": [
|
105 |
+
{
|
106 |
+
"servicename": servicename
|
107 |
+
}
|
108 |
+
]
|
109 |
+
}
|
110 |
+
|
111 |
+
# Convert the dictionary to a JSON formatted string
|
112 |
+
json_data = json.dumps(data)
|
113 |
+
|
114 |
+
# Set the headers to specify that the content type is JSON
|
115 |
+
headers = {'Content-Type': 'application/json'}
|
116 |
+
|
117 |
+
# Send the POST request
|
118 |
+
response = requests.post(url, data=json_data, headers=headers)
|
119 |
+
if json.loads( response.text).get("added_services"):
|
120 |
+
st.success(f"{servicename} added successfully")
|
121 |
+
else:
|
122 |
+
st.error(response.text)
|
123 |
+
def add_document(token,servicename):
|
124 |
+
|
125 |
+
|
126 |
+
file = st.session_state.uploaded_files
|
127 |
+
print(file)
|
128 |
+
|
129 |
+
url = ADD_STORE_DOCUMENT
|
130 |
+
# JSON payload to be sent
|
131 |
+
document_name = file.name.replace(" ","")
|
132 |
+
#document_name = document_name.replace(".pdf","")
|
133 |
+
document_name = document_name.replace("(","_")
|
134 |
+
document_name = document_name.replace(")","_")
|
135 |
+
document_name = document_name.replace("-","_")
|
136 |
+
document_name = document_name.replace(".","_")
|
137 |
+
encoded_file = base64.b64encode(file.read()).decode('utf-8')
|
138 |
+
data = {
|
139 |
+
"token": token,
|
140 |
+
"service_name": servicename,
|
141 |
+
"document_name": document_name,
|
142 |
+
"file":encoded_file
|
143 |
+
}
|
144 |
+
|
145 |
+
# Convert the dictionary to a JSON formatted string
|
146 |
+
json_data = json.dumps(data)
|
147 |
+
|
148 |
+
# Set the headers to specify that the content type is JSON
|
149 |
+
headers = {'Content-Type': 'application/json'}
|
150 |
+
|
151 |
+
# Send the POST request
|
152 |
+
response = requests.post(url, data=json_data, headers=headers)
|
153 |
+
document_name = file.name.replace(" ","_")
|
154 |
+
if json.loads( response.text).get("status")=="success":
|
155 |
+
st.success(f"{document_name} uploaded successfully")
|
156 |
+
else:
|
157 |
+
st.error(f"{document_name} not uploaded successfully")
|
158 |
+
|
159 |
+
|
160 |
+
def get_all_keys(d):
|
161 |
+
all_keys = set()
|
162 |
+
def get_keys(d):
|
163 |
+
for k, v in d.items():
|
164 |
+
all_keys.add(k)
|
165 |
+
if isinstance(v, dict):
|
166 |
+
get_keys(v)
|
167 |
+
elif isinstance(v, list):
|
168 |
+
for item in v:
|
169 |
+
if isinstance(item, dict):
|
170 |
+
get_keys(item)
|
171 |
+
get_keys(d)
|
172 |
+
return list(all_keys)
|
173 |
+
def display_and_validate_schema():
|
174 |
+
schema_str = json.dumps(DEFAULT_SCHEMA, indent=2)
|
175 |
+
schema_input = st.text_area("JSON Schema", schema_str, height=300)
|
176 |
+
try:
|
177 |
+
schema = json.loads(schema_input)
|
178 |
+
st.success("JSON schema is valid.")
|
179 |
+
return schema
|
180 |
+
except json.JSONDecodeError:
|
181 |
+
st.error("The JSON schema is invalid. Please correct it and try again.")
|
182 |
+
return None
|
183 |
+
def handle_comments(keys):
|
184 |
+
comments = {}
|
185 |
+
items_per_page = 6 # Adjust this number based on your preference
|
186 |
+
total_pages = (len(keys) + items_per_page - 1) // items_per_page
|
187 |
+
|
188 |
+
st.write("Please provide comments for each key to assist our system:")
|
189 |
+
|
190 |
+
page = st.number_input("Page", min_value=1, max_value=total_pages, step=1)
|
191 |
+
start_idx = (page - 1) * items_per_page
|
192 |
+
end_idx = start_idx + items_per_page
|
193 |
+
|
194 |
+
for key in keys[start_idx:end_idx]:
|
195 |
+
with st.expander(f"{key}"):
|
196 |
+
comments[key] = st.text_input(f"{key}")
|
197 |
+
# if st.button("Submit"):
|
198 |
+
# st.session_state.flag=False
|
199 |
+
return comments
|