Spaces:

mtyrrell
/

maf_prefilter_app

Running

File size: 8,299 Bytes

import torch
try:
    print(f"Is CUDA available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        try:
            print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
        except Exception as e:
            print(f"Error getting CUDA device name: {str(e)}")
    else:
        print("No CUDA device available - using CPU")
except Exception as e:
    print(f"Error checking CUDA availability: {str(e)}")
    print("Continuing with CPU...")


from modules.logging_config import setup_logging
setup_logging()
import logging
logger = logging.getLogger(__name__)

import streamlit as st
import os
from huggingface_hub import login
from datetime import datetime
from modules.auth import validate_login
from modules.utils import create_excel, clean_text, extract_predicted_labels, predict_category, process_data

# Local
# from dotenv import load_dotenv
# load_dotenv()


# Main app logic
def main():
    # Temporarily set authentication to True for testing
    if 'authenticated' not in st.session_state:
        st.session_state['authenticated'] = False

    if st.session_state['authenticated']:
        # Remove login success message for testing
        hf_token = os.environ["HF_TOKEN"]
        login(token=hf_token, add_to_git_credential=True)

        # Initialize session state variables
        if 'data_processed' not in st.session_state:
            st.session_state['data_processed'] = False
            st.session_state['df'] = None

        # Main Streamlit app
        st.title('Application Pre-Filtering Tool')

        # Sidebar (filters)
        with st.sidebar:
            with st.expander("ℹ️ - Instructions", expanded=False):
                st.markdown(
                    """
                    1. **Download the Excel Template file (below)**
                    2. **[OPTIONAL]: Select the desired filtering sensitivity level (below)**
                    3. **Copy/paste the requisite application data in the template file. Best practice is to 'paste as values'**
                    4. **Upload the template file in the area to the right (or click browse files)**
                    5. **Click 'Start Analysis'**

                    The tool will start processing the uploaded application data. This can take some time
                    depending on the number of applications and the length of text in each. For example, a file with 1000 applications
                    could be expected to take approximately 5 minutes.

                    ***NOTE (1)** -  you can also simply rename the column headers in your own file. The headers must match the column names in the template for the tool to run properly.*

                    """
                )
            # Excel file download
            st.download_button(
                label="Download Excel Template",
                data=create_excel(),
                file_name="upload_template.xlsx",
                mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
            )

            # get sensitivity level for use in review / reject (ref. process_data function)
            sens_options = {
                "Low": 4,
                "Medium": 5,
                "High": 6,
            }

            sens_input = st.sidebar.radio(label = 'Select the Sensitivity Level [OPTIONAL]',
                                    help = 'Decreasing the level of sensitivity results in less \
                                    applications filtered out. This also \
                                    reduces the probability of false negatives (FNs). The rate of \
                                    FNs at the lowest setting is approximately 6 percent, and \
                                    approaches 13 percent at the highest setting. \
                                    NOTE: changing this setting does not affect the raw data in the CSV output file (only the labels)', 
                                    options = list(sens_options.keys()),
                                    index = list(sens_options.keys()).index("High"),
                                    horizontal = False)

            sens_level = sens_options[sens_input]

        with st.expander("ℹ️ - About this app", expanded=False):
            st.write(
                """
                This tool provides an interface for running an automated preliminary assessment of applications for a call for applications.

                The tool functions by running selected text fields from the application through a series of LLMs fine-tuned for text classification (ref. diagram below).
                The resulting output classifications are used to compute a score and a suggested pre-filtering action. The tool has been tested against 
                human assessors and exhibits an extremely low false negative rate (<6%) at a Sensitivity Level of 'Low' (i.e. rejection threshold for predicted score < 4).
                
                """)
            st.image('images/pipeline.png')

        uploaded_file = st.file_uploader("Select a file containing application pre-filtering data (see instructions in the sidebar)")

        # Add session state variables if they don't exist
        if 'show_button' not in st.session_state:
            st.session_state['show_button'] = True
        if 'processing' not in st.session_state:
            st.session_state['processing'] = False
        if 'data_processed' not in st.session_state:
            st.session_state['data_processed'] = False

        # Only show the button if show_button is True and file is uploaded and not processing
        if uploaded_file is not None and st.session_state['show_button'] and not st.session_state['processing']:
            if st.button("Start Analysis", key="start_analysis"):
                st.session_state['show_button'] = False
                st.session_state['processing'] = True
                st.rerun()

        # If we're processing, show the processing logic
        if st.session_state['processing']:
            try:
                logger.info(f"File uploaded: {uploaded_file.name}")
                
                if not st.session_state['data_processed']:
                    logger.info("Starting data processing")
                    try:
                        st.session_state['df'] = process_data(uploaded_file, sens_level)
                        logger.info("Data processing completed successfully")
                        st.session_state['data_processed'] = True
                    except Exception as e:
                        logger.error(f"Error in process_data: {str(e)}")
                        raise

                df = st.session_state['df']

                current_datetime = datetime.now().strftime('%d-%m-%Y_%H-%M-%S')
                output_filename = f'processed_applications_{current_datetime}.csv'
                csv_buffer = df.to_csv(index=False).encode()
                logger.info("CSV buffer created successfully")
                
                def reset_button_state():
                    st.session_state['show_button'] = True
                    st.session_state['processing'] = False
                    st.session_state['data_processed'] = False
                
                st.download_button(
                    label="Download data as CSV",
                    data=csv_buffer,
                    file_name=output_filename,
                    mime='text/csv',
                    on_click=reset_button_state
                )

            except Exception as e:
                logger.error(f"Error processing file: {str(e)}")
                st.error("Failed to process the file. Please ensure your column names match the template file.")
                st.session_state['show_button'] = True
                st.session_state['processing'] = False
                st.rerun()


    # Comment out for testing
    else:
        username = st.text_input("Username")
        password = st.text_input("Password", type="password")
        if st.button("Login"):
            if validate_login(username, password):
                st.session_state['authenticated'] = True
                st.rerun()
            else:
                st.error("Incorrect username or password")



main()