add txt file
Browse files- app.py +44 -26
- utlis/constant.py +1 -0
app.py
CHANGED
@@ -33,7 +33,7 @@ with st.sidebar:
|
|
33 |
|
34 |
|
35 |
if len(services)>0:
|
36 |
-
st.session_state.uploaded_files = st.file_uploader("Upload PDF file", type=["pdf"], accept_multiple_files=False)
|
37 |
if st.session_state.uploaded_files:
|
38 |
st.session_state.process = st.button('Process')
|
39 |
if st.session_state.process:
|
@@ -56,18 +56,22 @@ with st.sidebar:
|
|
56 |
history_document =json.loads(history_document.text).get("documents",[])
|
57 |
history_document = [doc["documentname"] for doc in history_document]
|
58 |
st.session_state.doument_slected_to_chat = st.selectbox("Choose Documnet",history_document)
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
71 |
else:
|
72 |
st.session_state.service_slected_to_chat = None
|
73 |
|
@@ -163,20 +167,34 @@ with st.container():
|
|
163 |
keys = get_all_keys(schema)
|
164 |
comments = handle_comments(keys)
|
165 |
if schema and st.button('Process') :
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
if response_data.get('status')=='success':
|
181 |
json_str =response_data.get("json")
|
182 |
json_str_formatted = json.dumps(json_str)
|
|
|
33 |
|
34 |
|
35 |
if len(services)>0:
|
36 |
+
st.session_state.uploaded_files = st.file_uploader("Upload PDF file", type=["pdf","txt"], accept_multiple_files=False)
|
37 |
if st.session_state.uploaded_files:
|
38 |
st.session_state.process = st.button('Process')
|
39 |
if st.session_state.process:
|
|
|
56 |
history_document =json.loads(history_document.text).get("documents",[])
|
57 |
history_document = [doc["documentname"] for doc in history_document]
|
58 |
st.session_state.doument_slected_to_chat = st.selectbox("Choose Documnet",history_document)
|
59 |
+
if st.session_state.doument_slected_to_chat.split("_")[-1]=="pdf":
|
60 |
+
data = {"token": st.session_state.token, "service_name": st.session_state.service_slected_to_chat,"document_name":st.session_state.doument_slected_to_chat}
|
61 |
+
json_data = json.dumps(data)
|
62 |
+
headers = {'Content-Type': 'application/json'}
|
63 |
+
number_pages = requests.get(GET_NUM_PAGES,data=json_data, headers=headers)
|
64 |
+
number_pages =json.loads(number_pages.text).get("num_pages")
|
65 |
+
page_options = list(range(1, int(number_pages) + 1))
|
66 |
|
67 |
+
st.session_state.start_page = st.selectbox("Start Page",page_options)
|
68 |
+
st.session_state.end_page = st.selectbox("End Page", page_options, index=len(page_options) - 1)
|
69 |
+
st.session_state.method = st.selectbox("Chunking Method", ["chunk_per_page", "personalize_chunking"])
|
70 |
+
if st.session_state.method=="personalize_chunking":
|
71 |
+
st.session_state.split_token = st.text_area("Split Token")
|
72 |
+
elif st.session_state.doument_slected_to_chat.split("_")[-1]=="txt":
|
73 |
+
st.session_state.method = st.selectbox("Chunking Method", ["personalize_chunking"])
|
74 |
+
st.session_state.split_token = st.text_area("Split Token")
|
75 |
else:
|
76 |
st.session_state.service_slected_to_chat = None
|
77 |
|
|
|
167 |
keys = get_all_keys(schema)
|
168 |
comments = handle_comments(keys)
|
169 |
if schema and st.button('Process') :
|
170 |
+
if st.session_state.doument_slected_to_chat.split("_")[-1]=="pdf":
|
171 |
+
data = {"token": st.session_state.token,
|
172 |
+
"service_name": st.session_state.service_slected_to_chat,
|
173 |
+
"document_name": st.session_state.doument_slected_to_chat,
|
174 |
+
"method": st.session_state.method,
|
175 |
+
"model": st.session_state.llm,
|
176 |
+
"schema": schema,
|
177 |
+
"comment": comments,
|
178 |
+
"split_token": st.session_state.split_token if st.session_state.method == "personalize_chunking" else "",
|
179 |
+
"start_page": st.session_state.start_page,
|
180 |
+
"end_page": st.session_state.end_page}
|
181 |
+
json_data = json.dumps(data)
|
182 |
+
headers = {'Content-Type': 'application/json'}
|
183 |
+
response = requests.get(RESPONSE_API,data=json_data, headers=headers)
|
184 |
+
response_data = json.loads(response.text)
|
185 |
+
elif st.session_state.doument_slected_to_chat.split("_")[-1]=="txt":
|
186 |
+
data = {"token": st.session_state.token,
|
187 |
+
"service_name": st.session_state.service_slected_to_chat,
|
188 |
+
"document_name": st.session_state.doument_slected_to_chat,
|
189 |
+
"method": st.session_state.method,
|
190 |
+
"model": st.session_state.llm,
|
191 |
+
"schema": schema,
|
192 |
+
"comment": comments,
|
193 |
+
"split_token": st.session_state.split_token}
|
194 |
+
json_data = json.dumps(data)
|
195 |
+
headers = {'Content-Type': 'application/json'}
|
196 |
+
response = requests.get(RESPONSE_TXT_API,data=json_data, headers=headers)
|
197 |
+
response_data = json.loads(response.text)
|
198 |
if response_data.get('status')=='success':
|
199 |
json_str =response_data.get("json")
|
200 |
json_str_formatted = json.dumps(json_str)
|
utlis/constant.py
CHANGED
@@ -10,6 +10,7 @@ REMOVE_DOCUMENTS_API = IP_WEB_SERVER+"/remove_documents"
|
|
10 |
REMOVE_SERVICE_API = IP_WEB_SERVER+"/remove_service"
|
11 |
GET_NUM_PAGES = IP_WEB_SERVER+"/get_num_pages"
|
12 |
RESPONSE_API = IP_WEB_SERVER+"/structure_response"
|
|
|
13 |
DEFAULT_SCHEMA = {
|
14 |
"GeographicContext": "<variable>",
|
15 |
"SubGeographicContext": "<variable>",
|
|
|
10 |
REMOVE_SERVICE_API = IP_WEB_SERVER+"/remove_service"
|
11 |
GET_NUM_PAGES = IP_WEB_SERVER+"/get_num_pages"
|
12 |
RESPONSE_API = IP_WEB_SERVER+"/structure_response"
|
13 |
+
RESPONSE_TXT_API = IP_WEB_SERVER+"/structure_response_text"
|
14 |
DEFAULT_SCHEMA = {
|
15 |
"GeographicContext": "<variable>",
|
16 |
"SubGeographicContext": "<variable>",
|