import torch try: print(f"Is CUDA available: {torch.cuda.is_available()}") if torch.cuda.is_available(): try: print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") except Exception as e: print(f"Error getting CUDA device name: {str(e)}") else: print("No CUDA device available - using CPU") except Exception as e: print(f"Error checking CUDA availability: {str(e)}") print("Continuing with CPU...") import streamlit as st import os from huggingface_hub import login from datetime import datetime from modules.auth import validate_login from modules.utils import create_excel, clean_text, extract_predicted_labels, predict_category, process_data from modules.logging_config import setup_logging setup_logging() import logging from io import BytesIO logger = logging.getLogger(__name__) # Local # from dotenv import load_dotenv # load_dotenv() # Main app logic def main(): # Temporarily set authentication to True for testing if 'authenticated' not in st.session_state: st.session_state['authenticated'] = False if st.session_state['authenticated']: # Remove login success message for testing hf_token = os.environ["HF_TOKEN"] login(token=hf_token, add_to_git_credential=True) # Initialize session state variables if 'data_processed' not in st.session_state: st.session_state['data_processed'] = False st.session_state['df'] = None # Main Streamlit app st.title('Application Pre-Filtering Tool') # Sidebar (filters) with st.sidebar: with st.expander("ℹ️ - Instructions", expanded=False): st.markdown( """ 1. **Download the Excel Template file (below)** 2. **[OPTIONAL]: Select the desired filtering sensitivity level (below)** 3. **Copy/paste the requisite application data in the template file. Best practice is to 'paste as values'** 4. **Upload the template file in the area to the right (or click browse files)** 5. **Click 'Start Analysis'** The tool will start processing the uploaded application data. This can take some time depending on the number of applications and the length of text in each. For example, a file with 1000 applications could be expected to take approximately 5 minutes. ***NOTE** - you can also simply rename the column headers in your own file. The headers must match the column names in the template for the tool to run properly.* """ ) # Excel file download st.download_button( label="Download Excel Template", data=create_excel(), file_name="upload_template.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) # get sensitivity level for use in review / reject (ref. process_data function) sens_options = { "Low": 4, "Medium": 5, "High": 6, } sens_input = st.sidebar.radio(label = 'Select the Sensitivity Level [OPTIONAL]', help = 'Decreasing the level of sensitivity results in less \ applications filtered out. This also \ reduces the probability of false negatives (FNs). The rate of \ FNs at the lowest setting is approximately 6 percent, and \ approaches 13 percent at the highest setting. \ NOTE: changing this setting does not affect the raw data in the CSV output file (only the labels)', options = list(sens_options.keys()), index = list(sens_options.keys()).index("High"), horizontal = False) sens_level = sens_options[sens_input] with st.expander("ℹ️ - About this app", expanded=False): st.write( """ This tool provides an interface for running an automated preliminary assessment of applications for a call for applications. The tool functions by running selected text fields from the application through a series of LLMs fine-tuned for text classification (ref. diagram below). The resulting output classifications are used to compute a score and a suggested pre-filtering action. The tool has been tested against human assessors and exhibits an extremely low false negative rate (<6%) at a Sensitivity Level of 'Low' (i.e. rejection threshold for predicted score < 4). """) st.image('images/pipeline.png') uploaded_file = st.file_uploader("Select a file containing application pre-filtering data (see instructions in the sidebar)") # Add session state variables if they don't exist if 'show_button' not in st.session_state: st.session_state['show_button'] = True if 'processing' not in st.session_state: st.session_state['processing'] = False if 'data_processed' not in st.session_state: st.session_state['data_processed'] = False # Only show the button if show_button is True and file is uploaded and not processing if uploaded_file is not None and st.session_state['show_button'] and not st.session_state['processing']: if st.button("Start Analysis", key="start_analysis"): st.session_state['show_button'] = False st.session_state['processing'] = True st.rerun() # If we're processing, show the processing logic if st.session_state['processing']: try: logger.info(f"File uploaded: {uploaded_file.name}") if not st.session_state['data_processed']: logger.info("Starting data processing") try: st.session_state['df'] = process_data(uploaded_file, sens_level) logger.info("Data processing completed successfully") st.session_state['data_processed'] = True except ValueError as e: # Handle specific validation errors logger.error(f"Validation error: {str(e)}") st.error(str(e)) st.session_state['show_button'] = True st.session_state['processing'] = False st.rerun() except Exception as e: # Handle other unexpected errors logger.error(f"Error in process_data: {str(e)}") st.error("An unexpected error occurred. Please check your input file and try again.") st.session_state['show_button'] = True st.session_state['processing'] = False st.rerun() df = st.session_state['df'] def reset_button_state(): st.session_state['show_button'] = True st.session_state['processing'] = False st.session_state['data_processed'] = False # Create Excel buffer excel_buffer = BytesIO() df.to_excel(excel_buffer, index=False, engine='openpyxl') excel_buffer.seek(0) current_datetime = datetime.now().strftime('%d-%m-%Y_%H-%M-%S') output_filename = f'processed_applications_{current_datetime}.xlsx' st.download_button( label="Download Analysis Data File", data=excel_buffer, file_name=output_filename, mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', on_click=reset_button_state ) except Exception as e: logger.error(f"Error processing file: {str(e)}") st.error("Failed to process the file. Please ensure your column names match the template file.") st.session_state['show_button'] = True st.session_state['processing'] = False st.rerun() # Comment out for testing else: username = st.text_input("Username") password = st.text_input("Password", type="password") if st.button("Login"): if validate_login(username, password): st.session_state['authenticated'] = True st.rerun() else: st.error("Incorrect username or password") main()