from utlis.helper import * |
import sqlite3 |
import hashlib |
def create_document_id(token, service_selected, document_selected): |
unique_id = f"{token}{service_selected}{document_selected}" |
hashed_id = hashlib.sha256(unique_id.encode()).hexdigest() |
return hashed_id |
def create_database(): |
conn = sqlite3.connect('document_cache.db') |
c = conn.cursor() |
c.execute('''CREATE TABLE IF NOT EXISTS schemas |
(document_id TEXT PRIMARY KEY, schema TEXT)''') |
c.execute('''CREATE TABLE IF NOT EXISTS comments |
(document_id TEXT PRIMARY KEY, comments TEXT)''') |
conn.commit() |
conn.close() |
create_database() |
initialize_session_state() |
with st.sidebar: |
st.image("logo.png", width=170) |
st.title("AGDS") |
llms = ['gpt-3.5-turbo', 'gemini'] |
st.session_state.llm = st.selectbox("Choose LLM",llms) |
st.session_state.genre = st.radio( |
"Choose option", |
["Select document", "Add document(s)","Delete service(s)", "Delete document(s)"]) |
if st.session_state.genre=="Add document(s)": |
st.title('Add Document(s)') |
add_new_service = st.checkbox("Add new service") |
if add_new_service: |
new_service = st.text_input("Enter service name") |
if new_service and st.button('Add'): |
add_service(st.session_state.token,new_service) |
data = {"token": st.session_state.token} |
json_data = json.dumps(data) |
headers = {'Content-Type': 'application/json'} |
services = requests.get(SERVICES_API,data=json_data, headers=headers) |
services =json.loads(services.text) |
if len(services)>0: |
st.session_state.service = st.selectbox("Choose Service",services) |
if len(services)>0: |
st.session_state.doc_ortext = st.radio("Choose option",["Documnt", "Text area"]) |
if st.session_state.doc_ortext=="Documnt": |
st.session_state.uploaded_files = st.file_uploader("Upload PDF file", type=["pdf","txt"], accept_multiple_files=False) |
if st.session_state.uploaded_files: |
st.session_state.process = st.button('Process') |
if st.session_state.process: |
add_document(st.session_state.token,st.session_state.service) |
elif st.session_state.genre=="Select document": |
st.title('Scrape Document') |
data = {"token": st.session_state.token} |
json_data = json.dumps(data) |
headers = {'Content-Type': 'application/json'} |
services = requests.get(SERVICES_API,data=json_data, headers=headers) |
services =json.loads(services.text) |
if len(services)>0: |
st.session_state.service_slected_to_chat = st.selectbox("Choose Service",services) |
data = {"token": st.session_state.token, "servicename": st.session_state.service_slected_to_chat} |
json_data = json.dumps(data) |
headers = {'Content-Type': 'application/json'} |
history_document = requests.get(DOCUMENT_API,data=json_data, headers=headers) |
history_document =json.loads(history_document.text).get("documents",[]) |
history_document = [doc["documentname"] for doc in history_document] |
st.session_state.doument_slected_to_chat = st.selectbox("Choose Documnet",history_document) |
if st.session_state.doument_slected_to_chat.split("_")[-1]=="pdf": |
data = {"token": st.session_state.token, "service_name": st.session_state.service_slected_to_chat,"document_name":st.session_state.doument_slected_to_chat} |
json_data = json.dumps(data) |
headers = {'Content-Type': 'application/json'} |
number_pages = requests.get(GET_NUM_PAGES,data=json_data, headers=headers) |
number_pages =json.loads(number_pages.text).get("num_pages") |
page_options = list(range(1, int(number_pages) + 1)) |
st.session_state.start_page = st.selectbox("Start Page",page_options) |
st.session_state.end_page = st.selectbox("End Page", page_options, index=len(page_options) - 1) |
st.session_state.method = st.selectbox("Chunking Method", ["chunk_per_page", "personalize_chunking"]) |
if st.session_state.method=="personalize_chunking": |
st.session_state.split_token = st.text_area("Split Token") |
else: |
st.session_state.method = st.selectbox("Chunking Method", ["personalize_chunking"]) |
st.session_state.split_token = st.text_area("Split Token") |
else: |
st.session_state.service_slected_to_chat = None |
elif st.session_state.genre == "Delete service(s)": |
st.title('Delete Service(s)') |
data = {"token": st.session_state.token} |
json_data = json.dumps(data) |
headers = {'Content-Type': 'application/json'} |
services = requests.get(SERVICES_API,data=json_data, headers=headers) |
services =json.loads(services.text) |
if len(services)>=2: |
services.append("ALL") |
if "ALL" in services: |
service_slected = st.multiselect( |
"",services ,default="ALL" |
) |
elif len(services)==1: |
service_slected = st.multiselect( |
"",services,default=services[0] |
) |
else: |
service_slected = st.multiselect( |
"",services |
) |
if "ALL" in service_slected: |
service_slected = services |
service_slected.remove("ALL") |
st.write("You selected:", service_slected) |
if len(service_slected) > 0: |
st.session_state.delete = st.button('Delete') |
if st.session_state.delete: |
delete_service(st.session_state.token ,service_slected) |
elif st.session_state.genre == "Delete document(s)": |
st.title('Delete Document(s)') |
data = {"token": st.session_state.token} |
json_data = json.dumps(data) |
headers = {'Content-Type': 'application/json'} |
services = requests.get(SERVICES_API,data=json_data, headers=headers) |
services =json.loads(services.text) |
if len(services)>0: |
service = st.selectbox("Choose Service",services) |
data = {"token": st.session_state.token, "servicename": service} |
json_data = json.dumps(data) |
headers = {'Content-Type': 'application/json'} |
history_document = requests.get(DOCUMENT_API,data=json_data, headers=headers) |
history_document =json.loads(history_document.text).get("documents",[]) |
history_document = [doc["documentname"] for doc in history_document] |
if len(history_document)>=2: |
history_document.append("ALL") |
if "ALL" in history_document: |
document_slected_to_delete = st.multiselect( |
"",history_document ,default="ALL" |
) |
elif len(history_document)==1: |
document_slected_to_delete = st.multiselect( |
"",history_document,default=history_document[0] |
) |
else: |
document_slected_to_delete = st.multiselect( |
"",history_document |
) |
if "ALL" in document_slected_to_delete: |
document_slected_to_delete = history_document |
document_slected_to_delete.remove("ALL") |
st.write("You selected:", document_slected_to_delete) |
if len(document_slected_to_delete) > 0: |
st.session_state.delete = st.button('Delete') |
if st.session_state.delete: |
delete_document(st.session_state.token,st.session_state.service ,document_slected_to_delete) |
css_style = """ |
<style> |
.title { |
white-space: nowrap; |
} |
</style> |
""" |
st.markdown(css_style, unsafe_allow_html=True) |
with st.container(): |
st.markdown('<h1 class="title">Augmented Generative Document Scraper</h1>', unsafe_allow_html=True) |
if st.session_state.genre=="Add document(s)" and st.session_state.doc_ortext == "Text area": |
st.session_state.name_text_area = st.text_input("Enter name of the text area:") |
st.session_state.text_area = st.text_area("Enter text:") |
if st.session_state.text_area: |
if st.button('Process Text'): |
add_text_document(st.session_state.token, st.session_state.service) |
if st.session_state.genre=="Select document" and st.session_state.service_slected_to_chat: |
document_id = create_document_id(st.session_state.token, st.session_state.service_slected_to_chat, st.session_state.doument_slected_to_chat) |
print(document_id) |
schema = get_schema(document_id) |
schema = display_and_validate_schema(schema) |
if schema: |
save_schema(document_id, schema) |
if schema and st.checkbox("Add comments") : |
comments = get_comments(document_id) |
if not comments: |
comments = {} |
keys = get_all_keys(schema) |
else: |
keys = list(comments.keys()) |
comments = handle_comments(comments, keys) |
save_comments(document_id, comments) |
if schema and st.button('Process') : |
if st.session_state.doument_slected_to_chat.split("_")[-1]=="pdf": |
data = {"token": st.session_state.token, |
"service_name": st.session_state.service_slected_to_chat, |
"document_name": st.session_state.doument_slected_to_chat, |
"method": st.session_state.method, |
"model": st.session_state.llm, |
"schema": schema, |
"comment": comments, |
"split_token": st.session_state.split_token if st.session_state.method == "personalize_chunking" else "", |
"start_page": st.session_state.start_page, |
"end_page": st.session_state.end_page} |
json_data = json.dumps(data) |
headers = {'Content-Type': 'application/json'} |
response = requests.get(RESPONSE_API,data=json_data, headers=headers) |
print(response.text) |
response_data = json.loads(response.text) |
else: |
data = {"token": st.session_state.token, |
"service_name": st.session_state.service_slected_to_chat, |
"document_name": st.session_state.doument_slected_to_chat, |
"method": st.session_state.method, |
"model": st.session_state.llm, |
"schema": schema, |
"comment": comments, |
"split_token": st.session_state.split_token} |
json_data = json.dumps(data) |
headers = {'Content-Type': 'application/json'} |
response = requests.get(RESPONSE_TXT_API,data=json_data, headers=headers) |
response_data = json.loads(response.text) |
if response_data.get('status')=='success': |
json_str =response_data.get("json") |
json_bytes = json_str.encode('utf-8') |
st.download_button( |
label="Download JSON", |
data=json_bytes, |
file_name="results.json", |
mime="application/json" |
) |
else: |
st.error("Error in processing document") |