Nechba commited on
Commit
f31bfcd
·
1 Parent(s): 045c775

add txt file

Browse files
Files changed (2) hide show
  1. app.py +44 -26
  2. utlis/constant.py +1 -0
app.py CHANGED
@@ -33,7 +33,7 @@ with st.sidebar:
33
 
34
 
35
  if len(services)>0:
36
- st.session_state.uploaded_files = st.file_uploader("Upload PDF file", type=["pdf"], accept_multiple_files=False)
37
  if st.session_state.uploaded_files:
38
  st.session_state.process = st.button('Process')
39
  if st.session_state.process:
@@ -56,18 +56,22 @@ with st.sidebar:
56
  history_document =json.loads(history_document.text).get("documents",[])
57
  history_document = [doc["documentname"] for doc in history_document]
58
  st.session_state.doument_slected_to_chat = st.selectbox("Choose Documnet",history_document)
59
- data = {"token": st.session_state.token, "service_name": st.session_state.service_slected_to_chat,"document_name":st.session_state.doument_slected_to_chat}
60
- json_data = json.dumps(data)
61
- headers = {'Content-Type': 'application/json'}
62
- number_pages = requests.get(GET_NUM_PAGES,data=json_data, headers=headers)
63
- number_pages =json.loads(number_pages.text).get("num_pages")
64
- page_options = list(range(1, int(number_pages) + 1))
 
65
 
66
- st.session_state.start_page = st.selectbox("Start Page",page_options)
67
- st.session_state.end_page = st.selectbox("End Page", page_options, index=len(page_options) - 1)
68
- st.session_state.method = st.selectbox("Chunking Method", ["chunk_per_page", "personalize_chunking"])
69
- if st.session_state.method=="personalize_chunking":
70
- st.session_state.split_token = st.text_area("Split Token")
 
 
 
71
  else:
72
  st.session_state.service_slected_to_chat = None
73
 
@@ -163,20 +167,34 @@ with st.container():
163
  keys = get_all_keys(schema)
164
  comments = handle_comments(keys)
165
  if schema and st.button('Process') :
166
- data = {"token": st.session_state.token,
167
- "service_name": st.session_state.service_slected_to_chat,
168
- "document_name": st.session_state.doument_slected_to_chat,
169
- "method": st.session_state.method,
170
- "model": st.session_state.llm,
171
- "schema": schema,
172
- "comment": comments,
173
- "split_token": st.session_state.split_token if st.session_state.method == "personalize_chunking" else "",
174
- "start_page": st.session_state.start_page,
175
- "end_page": st.session_state.end_page}
176
- json_data = json.dumps(data)
177
- headers = {'Content-Type': 'application/json'}
178
- response = requests.get(RESPONSE_API,data=json_data, headers=headers)
179
- response_data = json.loads(response.text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  if response_data.get('status')=='success':
181
  json_str =response_data.get("json")
182
  json_str_formatted = json.dumps(json_str)
 
33
 
34
 
35
  if len(services)>0:
36
+ st.session_state.uploaded_files = st.file_uploader("Upload PDF file", type=["pdf","txt"], accept_multiple_files=False)
37
  if st.session_state.uploaded_files:
38
  st.session_state.process = st.button('Process')
39
  if st.session_state.process:
 
56
  history_document =json.loads(history_document.text).get("documents",[])
57
  history_document = [doc["documentname"] for doc in history_document]
58
  st.session_state.doument_slected_to_chat = st.selectbox("Choose Documnet",history_document)
59
+ if st.session_state.doument_slected_to_chat.split("_")[-1]=="pdf":
60
+ data = {"token": st.session_state.token, "service_name": st.session_state.service_slected_to_chat,"document_name":st.session_state.doument_slected_to_chat}
61
+ json_data = json.dumps(data)
62
+ headers = {'Content-Type': 'application/json'}
63
+ number_pages = requests.get(GET_NUM_PAGES,data=json_data, headers=headers)
64
+ number_pages =json.loads(number_pages.text).get("num_pages")
65
+ page_options = list(range(1, int(number_pages) + 1))
66
 
67
+ st.session_state.start_page = st.selectbox("Start Page",page_options)
68
+ st.session_state.end_page = st.selectbox("End Page", page_options, index=len(page_options) - 1)
69
+ st.session_state.method = st.selectbox("Chunking Method", ["chunk_per_page", "personalize_chunking"])
70
+ if st.session_state.method=="personalize_chunking":
71
+ st.session_state.split_token = st.text_area("Split Token")
72
+ elif st.session_state.doument_slected_to_chat.split("_")[-1]=="txt":
73
+ st.session_state.method = st.selectbox("Chunking Method", ["personalize_chunking"])
74
+ st.session_state.split_token = st.text_area("Split Token")
75
  else:
76
  st.session_state.service_slected_to_chat = None
77
 
 
167
  keys = get_all_keys(schema)
168
  comments = handle_comments(keys)
169
  if schema and st.button('Process') :
170
+ if st.session_state.doument_slected_to_chat.split("_")[-1]=="pdf":
171
+ data = {"token": st.session_state.token,
172
+ "service_name": st.session_state.service_slected_to_chat,
173
+ "document_name": st.session_state.doument_slected_to_chat,
174
+ "method": st.session_state.method,
175
+ "model": st.session_state.llm,
176
+ "schema": schema,
177
+ "comment": comments,
178
+ "split_token": st.session_state.split_token if st.session_state.method == "personalize_chunking" else "",
179
+ "start_page": st.session_state.start_page,
180
+ "end_page": st.session_state.end_page}
181
+ json_data = json.dumps(data)
182
+ headers = {'Content-Type': 'application/json'}
183
+ response = requests.get(RESPONSE_API,data=json_data, headers=headers)
184
+ response_data = json.loads(response.text)
185
+ elif st.session_state.doument_slected_to_chat.split("_")[-1]=="txt":
186
+ data = {"token": st.session_state.token,
187
+ "service_name": st.session_state.service_slected_to_chat,
188
+ "document_name": st.session_state.doument_slected_to_chat,
189
+ "method": st.session_state.method,
190
+ "model": st.session_state.llm,
191
+ "schema": schema,
192
+ "comment": comments,
193
+ "split_token": st.session_state.split_token}
194
+ json_data = json.dumps(data)
195
+ headers = {'Content-Type': 'application/json'}
196
+ response = requests.get(RESPONSE_TXT_API,data=json_data, headers=headers)
197
+ response_data = json.loads(response.text)
198
  if response_data.get('status')=='success':
199
  json_str =response_data.get("json")
200
  json_str_formatted = json.dumps(json_str)
utlis/constant.py CHANGED
@@ -10,6 +10,7 @@ REMOVE_DOCUMENTS_API = IP_WEB_SERVER+"/remove_documents"
10
  REMOVE_SERVICE_API = IP_WEB_SERVER+"/remove_service"
11
  GET_NUM_PAGES = IP_WEB_SERVER+"/get_num_pages"
12
  RESPONSE_API = IP_WEB_SERVER+"/structure_response"
 
13
  DEFAULT_SCHEMA = {
14
  "GeographicContext": "<variable>",
15
  "SubGeographicContext": "<variable>",
 
10
  REMOVE_SERVICE_API = IP_WEB_SERVER+"/remove_service"
11
  GET_NUM_PAGES = IP_WEB_SERVER+"/get_num_pages"
12
  RESPONSE_API = IP_WEB_SERVER+"/structure_response"
13
+ RESPONSE_TXT_API = IP_WEB_SERVER+"/structure_response_text"
14
  DEFAULT_SCHEMA = {
15
  "GeographicContext": "<variable>",
16
  "SubGeographicContext": "<variable>",