Spaces:

Effyis
/

AGDS-UI

Sleeping

File size: 9,050 Bytes

c398ab5

from utlis.helper import *

initialize_session_state()

with st.sidebar:
    st.image("logo.png", width=170)
    st.title("AGDC")
    # Get List of models
    llms = ['gpt-3.5-turbo', 'gemini']
    st.session_state.llm = st.selectbox("Choose LLM",llms)
    st.session_state.genre = st.radio(
    "Choose option",
    ["Select document", "Add document(s)","Delete service(s)", "Delete document(s)"])
    
    if st.session_state.genre=="Add document(s)":
        st.title('Add Document(s)')
        # Check service status
        # Get all available services
        add_new_service = st.checkbox("Add new service")
        if add_new_service:
            new_service = st.text_input("Enter service name")
            # Get list of Embedding models
            
            if  new_service and st.button('Add'):
                add_service(st.session_state.token,new_service)
        data = {"token": st.session_state.token}
        json_data = json.dumps(data)
        headers = {'Content-Type': 'application/json'}
        services  = requests.get(SERVICES_API,data=json_data, headers=headers)
        services =json.loads(services.text)
        if len(services)>0:
           st.session_state.service = st.selectbox("Choose Service",services)


        if len(services)>0:
            st.session_state.uploaded_files = st.file_uploader("Upload PDF file",  type=["pdf"], accept_multiple_files=False)
            if st.session_state.uploaded_files:
                st.session_state.process = st.button('Process')
                if st.session_state.process:
                    add_document(st.session_state.token,st.session_state.service)

    elif st.session_state.genre=="Select document":
        st.title('Scrape Document')
        data = {"token": st.session_state.token}
        json_data = json.dumps(data)
        headers = {'Content-Type': 'application/json'}
        services  = requests.get(SERVICES_API,data=json_data, headers=headers)
        services =json.loads(services.text)

        if len(services)>0:
            st.session_state.service_slected_to_chat = st.selectbox("Choose Service",services)
            data = {"token": st.session_state.token, "servicename": st.session_state.service_slected_to_chat}
            json_data = json.dumps(data)
            headers = {'Content-Type': 'application/json'}
            history_document  = requests.get(DOCUMENT_API,data=json_data, headers=headers)
            history_document =json.loads(history_document.text).get("documents",[])
            history_document = [doc["documentname"] for doc in history_document]
            st.session_state.doument_slected_to_chat = st.selectbox("Choose Documnet",history_document)
            data = {"token": st.session_state.token, "service_name": st.session_state.service_slected_to_chat,"document_name":st.session_state.doument_slected_to_chat}
            json_data = json.dumps(data)
            headers = {'Content-Type': 'application/json'}
            number_pages = requests.get(GET_NUM_PAGES,data=json_data, headers=headers)
            number_pages =json.loads(number_pages.text).get("num_pages")
            page_options = list(range(1, int(number_pages) + 1))

            st.session_state.start_page = st.selectbox("Start Page",page_options)
            st.session_state.end_page = st.selectbox("End Page", page_options, index=len(page_options) - 1)
            st.session_state.method = st.selectbox("Chunking Method", ["chunk_per_page", "personalize_chunking"])
            if st.session_state.method=="personalize_chunking":
               st.session_state.split_token = st.text_area("Split Token")
        else:
            st.session_state.service_slected_to_chat = None

            
    elif st.session_state.genre == "Delete service(s)":
        st.title('Delete Service(s)')
        data = {"token": st.session_state.token}
        json_data = json.dumps(data)
        headers = {'Content-Type': 'application/json'}
        services  = requests.get(SERVICES_API,data=json_data, headers=headers)
        services =json.loads(services.text)
        if len(services)>=2:
            services.append("ALL")
            # Get list of documents from histrory
        if "ALL" in services:
            service_slected = st.multiselect(
                    "",services ,default="ALL"
                    )
        elif len(services)==1:
            service_slected = st.multiselect(
                    "",services,default=services[0]
                    )
        else:
            service_slected = st.multiselect(
                    "",services
                    )
        if "ALL" in service_slected:
            service_slected = services
            service_slected.remove("ALL")
        st.write("You selected:", service_slected)

        if len(service_slected) > 0:
            st.session_state.delete = st.button('Delete')
            if st.session_state.delete:
                delete_service(st.session_state.token ,service_slected)
        
    elif st.session_state.genre == "Delete document(s)":
        st.title('Delete Document(s)')
        data = {"token": st.session_state.token}
        json_data = json.dumps(data)
        headers = {'Content-Type': 'application/json'}
        services  = requests.get(SERVICES_API,data=json_data, headers=headers)
        services =json.loads(services.text)
        if len(services)>0:
            service = st.selectbox("Choose Service",services)
            data = {"token": st.session_state.token, "servicename": service}
            json_data = json.dumps(data)
            headers = {'Content-Type': 'application/json'}
            history_document  = requests.get(DOCUMENT_API,data=json_data, headers=headers)
            history_document =json.loads(history_document.text).get("documents",[])
            history_document = [doc["documentname"] for doc in history_document]
            if len(history_document)>=2:
                history_document.append("ALL")
            # Get list of documents from histrory
            if "ALL" in history_document:
                document_slected_to_delete = st.multiselect(
                    "",history_document ,default="ALL"
                    )
            elif len(history_document)==1:
                document_slected_to_delete = st.multiselect(
                    "",history_document,default=history_document[0]
                    )
            else:
                document_slected_to_delete = st.multiselect(
                    "",history_document
                    )
            if "ALL" in document_slected_to_delete:
                document_slected_to_delete = history_document
                document_slected_to_delete.remove("ALL")

            st.write("You selected:", document_slected_to_delete)
            if len(document_slected_to_delete) > 0:
                st.session_state.delete = st.button('Delete')
                if st.session_state.delete:
                    delete_document(st.session_state.token,st.session_state.service ,document_slected_to_delete)

css_style = """
<style>
.title {
    white-space: nowrap;
}
</style>
"""

st.markdown(css_style, unsafe_allow_html=True)

with st.container():
    st.markdown('<h1 class="title">Augmented Generative Document Scraper</h1>', unsafe_allow_html=True)
    if st.session_state.genre=="Select document" and st.session_state.service_slected_to_chat:
        schema = display_and_validate_schema()
        comments = None
        if schema and st.checkbox("Add comments")  :
            keys = get_all_keys(schema)
            comments = handle_comments(keys)
        if schema and st.button('Process') :
            data = {"token": st.session_state.token,
            "service_name": st.session_state.service_slected_to_chat,
            "document_name": st.session_state.doument_slected_to_chat,
            "method": st.session_state.method,
            "model": st.session_state.llm,
            "schema": schema,
            "comment": comments,
            "split_token": st.session_state.split_token if st.session_state.method == "personalize_chunking" else "",
            "start_page": st.session_state.start_page,
            "end_page": st.session_state.end_page}
            json_data = json.dumps(data)
            headers = {'Content-Type': 'application/json'}
            response  = requests.get(RESPONSE_API,data=json_data, headers=headers)
            response_data = json.loads(response.text)
            if response_data.get('status')=='success':
                json_str =response_data.get("json")
                json_str_formatted = json.dumps(json_str)
    
                # Encode this JSON string to bytes, which is required for the download
                json_bytes = json_str_formatted.encode('utf-8')
                st.download_button(
                    label="Download JSON",
                    data=json_bytes,
                    file_name="results.json",
                    mime="application/json"
                )
            else:
                st.error("Error in processing document")