Nechba commited on
Commit
c1903e3
·
1 Parent(s): f31bfcd

add text area option

Browse files
app.py CHANGED
@@ -33,11 +33,21 @@ with st.sidebar:
33
 
34
 
35
  if len(services)>0:
36
- st.session_state.uploaded_files = st.file_uploader("Upload PDF file", type=["pdf","txt"], accept_multiple_files=False)
37
- if st.session_state.uploaded_files:
38
- st.session_state.process = st.button('Process')
39
- if st.session_state.process:
40
- add_document(st.session_state.token,st.session_state.service)
 
 
 
 
 
 
 
 
 
 
41
 
42
  elif st.session_state.genre=="Select document":
43
  st.title('Scrape Document')
@@ -69,7 +79,8 @@ with st.sidebar:
69
  st.session_state.method = st.selectbox("Chunking Method", ["chunk_per_page", "personalize_chunking"])
70
  if st.session_state.method=="personalize_chunking":
71
  st.session_state.split_token = st.text_area("Split Token")
72
- elif st.session_state.doument_slected_to_chat.split("_")[-1]=="txt":
 
73
  st.session_state.method = st.selectbox("Chunking Method", ["personalize_chunking"])
74
  st.session_state.split_token = st.text_area("Split Token")
75
  else:
@@ -160,6 +171,12 @@ st.markdown(css_style, unsafe_allow_html=True)
160
 
161
  with st.container():
162
  st.markdown('<h1 class="title">Augmented Generative Document Scraper</h1>', unsafe_allow_html=True)
 
 
 
 
 
 
163
  if st.session_state.genre=="Select document" and st.session_state.service_slected_to_chat:
164
  schema = display_and_validate_schema()
165
  comments = None
@@ -182,7 +199,8 @@ with st.container():
182
  headers = {'Content-Type': 'application/json'}
183
  response = requests.get(RESPONSE_API,data=json_data, headers=headers)
184
  response_data = json.loads(response.text)
185
- elif st.session_state.doument_slected_to_chat.split("_")[-1]=="txt":
 
186
  data = {"token": st.session_state.token,
187
  "service_name": st.session_state.service_slected_to_chat,
188
  "document_name": st.session_state.doument_slected_to_chat,
@@ -195,18 +213,18 @@ with st.container():
195
  headers = {'Content-Type': 'application/json'}
196
  response = requests.get(RESPONSE_TXT_API,data=json_data, headers=headers)
197
  response_data = json.loads(response.text)
198
- if response_data.get('status')=='success':
199
- json_str =response_data.get("json")
200
- json_str_formatted = json.dumps(json_str)
201
 
202
- # Encode this JSON string to bytes, which is required for the download
203
- json_bytes = json_str_formatted.encode('utf-8')
204
- st.download_button(
205
- label="Download JSON",
206
- data=json_bytes,
207
- file_name="results.json",
208
- mime="application/json"
209
- )
210
- else:
211
- st.error("Error in processing document")
212
 
 
33
 
34
 
35
  if len(services)>0:
36
+ st.session_state.doc_ortext = st.radio("Choose option",["Documnt", "Text area"])
37
+ if st.session_state.doc_ortext=="Documnt":
38
+ st.session_state.uploaded_files = st.file_uploader("Upload PDF file", type=["pdf","txt"], accept_multiple_files=False)
39
+ if st.session_state.uploaded_files:
40
+ st.session_state.process = st.button('Process')
41
+ if st.session_state.process:
42
+ add_document(st.session_state.token,st.session_state.service)
43
+ # elif st.session_state.doc_ortext=="Text area":
44
+ # st.session_state.name_text_area = st.container().text_area("Enter name of the text area")
45
+ # st.session_state.text_area = st.container().text_area("Enter text")
46
+
47
+ # if st.session_state.text_area:
48
+ # st.session_state.process = st.container().button('Process')
49
+ # if st.session_state.process:
50
+ # add_text_document(st.session_state.token,st.session_state.service)
51
 
52
  elif st.session_state.genre=="Select document":
53
  st.title('Scrape Document')
 
79
  st.session_state.method = st.selectbox("Chunking Method", ["chunk_per_page", "personalize_chunking"])
80
  if st.session_state.method=="personalize_chunking":
81
  st.session_state.split_token = st.text_area("Split Token")
82
+ #elif st.session_state.doument_slected_to_chat.split("_")[-1]=="txt":
83
+ else:
84
  st.session_state.method = st.selectbox("Chunking Method", ["personalize_chunking"])
85
  st.session_state.split_token = st.text_area("Split Token")
86
  else:
 
171
 
172
  with st.container():
173
  st.markdown('<h1 class="title">Augmented Generative Document Scraper</h1>', unsafe_allow_html=True)
174
+ if st.session_state.genre=="Add document(s)" and st.session_state.doc_ortext == "Text area":
175
+ st.session_state.name_text_area = st.text_input("Enter name of the text area:")
176
+ st.session_state.text_area = st.text_area("Enter text:")
177
+ if st.session_state.text_area:
178
+ if st.button('Process Text'):
179
+ add_text_document(st.session_state.token, st.session_state.service)
180
  if st.session_state.genre=="Select document" and st.session_state.service_slected_to_chat:
181
  schema = display_and_validate_schema()
182
  comments = None
 
199
  headers = {'Content-Type': 'application/json'}
200
  response = requests.get(RESPONSE_API,data=json_data, headers=headers)
201
  response_data = json.loads(response.text)
202
+ #elif st.session_state.doument_slected_to_chat.split("_")[-1]=="txt":
203
+ else:
204
  data = {"token": st.session_state.token,
205
  "service_name": st.session_state.service_slected_to_chat,
206
  "document_name": st.session_state.doument_slected_to_chat,
 
213
  headers = {'Content-Type': 'application/json'}
214
  response = requests.get(RESPONSE_TXT_API,data=json_data, headers=headers)
215
  response_data = json.loads(response.text)
216
+ if response_data.get('status')=='success':
217
+ json_str =response_data.get("json")
218
+ json_str_formatted = json.dumps(json_str)
219
 
220
+ # Encode this JSON string to bytes, which is required for the download
221
+ json_bytes = json_str_formatted.encode('utf-8')
222
+ st.download_button(
223
+ label="Download JSON",
224
+ data=json_bytes,
225
+ file_name="results.json",
226
+ mime="application/json"
227
+ )
228
+ else:
229
+ st.error("Error in processing document")
230
 
utlis/__pycache__/constant.cpython-39.pyc CHANGED
Binary files a/utlis/__pycache__/constant.cpython-39.pyc and b/utlis/__pycache__/constant.cpython-39.pyc differ
 
utlis/__pycache__/helper.cpython-39.pyc CHANGED
Binary files a/utlis/__pycache__/helper.cpython-39.pyc and b/utlis/__pycache__/helper.cpython-39.pyc differ
 
utlis/helper.py CHANGED
@@ -117,6 +117,33 @@ def add_service(token,servicename):
117
  st.success(f"{servicename} added successfully")
118
  else:
119
  st.error(response.text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  def add_document(token,servicename):
121
 
122
 
 
117
  st.success(f"{servicename} added successfully")
118
  else:
119
  st.error(response.text)
120
+ def add_text_document(token, servicename):
121
+ # Retrieve text and document name from session state
122
+ document_text = st.session_state.text_area
123
+ document_name = st.session_state.name_text_area.replace(" ", "_").replace("(", "_").replace(")", "_").replace("-", "_").replace(".", "_")
124
+
125
+ # Encode the document text as Base64
126
+ encoded_text = base64.b64encode(document_text.encode('utf-8')).decode('utf-8')
127
+
128
+ url = ADD_STORE_DOCUMENT
129
+ # Prepare the JSON payload
130
+ data = {
131
+ "token": token,
132
+ "service_name": servicename,
133
+ "document_name": document_name,
134
+ "file": encoded_text # Assuming the API can handle Base64 encoded text under the 'file' key
135
+ }
136
+
137
+ # Convert the dictionary to a JSON formatted string and send the POST request
138
+ headers = {'Content-Type': 'application/json'}
139
+ response = requests.post(url, data=json.dumps(data), headers=headers)
140
+ status = json.loads(response.text).get("status")
141
+
142
+ if status == "success":
143
+ st.success(f"{document_name} uploaded successfully as text")
144
+ else:
145
+ st.error(f"{document_name} not uploaded successfully")
146
+
147
  def add_document(token,servicename):
148
 
149