mtyrrell's picture
dotenv
f2efe55
import torch
try:
print(f"Is CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
try:
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
except Exception as e:
print(f"Error getting CUDA device name: {str(e)}")
else:
print("No CUDA device available - using CPU")
except Exception as e:
print(f"Error checking CUDA availability: {str(e)}")
print("Continuing with CPU...")
import streamlit as st
import os
from huggingface_hub import login
from datetime import datetime
from modules.auth import validate_login
from modules.utils import create_excel, clean_text, extract_predicted_labels, predict_category, process_data
from modules.logging_config import setup_logging
setup_logging()
import logging
from io import BytesIO
logger = logging.getLogger(__name__)
# Local
# from dotenv import load_dotenv
# load_dotenv()
# Main app logic
def main():
# Temporarily set authentication to True for testing
if 'authenticated' not in st.session_state:
st.session_state['authenticated'] = False
if st.session_state['authenticated']:
# Remove login success message for testing
hf_token = os.environ["HF_TOKEN"]
login(token=hf_token, add_to_git_credential=True)
# Initialize session state variables
if 'data_processed' not in st.session_state:
st.session_state['data_processed'] = False
st.session_state['df'] = None
# Main Streamlit app
st.title('Application Pre-Filtering Tool')
# Sidebar (filters)
with st.sidebar:
with st.expander("ℹ️ - Instructions", expanded=False):
st.markdown(
"""
1. **Download the Excel Template file (below)**
2. **[OPTIONAL]: Select the desired filtering sensitivity level (below)**
3. **Copy/paste the requisite application data in the template file. Best practice is to 'paste as values'**
4. **Upload the template file in the area to the right (or click browse files)**
5. **Click 'Start Analysis'**
The tool will start processing the uploaded application data. This can take some time
depending on the number of applications and the length of text in each. For example, a file with 1000 applications
could be expected to take approximately 5 minutes.
***NOTE** - you can also simply rename the column headers in your own file. The headers must match the column names in the template for the tool to run properly.*
"""
)
# Excel file download
st.download_button(
label="Download Excel Template",
data=create_excel(),
file_name="upload_template.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
# get sensitivity level for use in review / reject (ref. process_data function)
sens_options = {
"Low": 4,
"Medium": 5,
"High": 6,
}
sens_input = st.sidebar.radio(label = 'Select the Sensitivity Level [OPTIONAL]',
help = 'Decreasing the level of sensitivity results in less \
applications filtered out. This also \
reduces the probability of false negatives (FNs). The rate of \
FNs at the lowest setting is approximately 6 percent, and \
approaches 13 percent at the highest setting. \
NOTE: changing this setting does not affect the raw data in the CSV output file (only the labels)',
options = list(sens_options.keys()),
index = list(sens_options.keys()).index("High"),
horizontal = False)
sens_level = sens_options[sens_input]
with st.expander("ℹ️ - About this app", expanded=False):
st.write(
"""
This tool provides an interface for running an automated preliminary assessment of applications for a call for applications.
The tool functions by running selected text fields from the application through a series of LLMs fine-tuned for text classification (ref. diagram below).
The resulting output classifications are used to compute a score and a suggested pre-filtering action. The tool has been tested against
human assessors and exhibits an extremely low false negative rate (<6%) at a Sensitivity Level of 'Low' (i.e. rejection threshold for predicted score < 4).
""")
st.image('images/pipeline.png')
uploaded_file = st.file_uploader("Select a file containing application pre-filtering data (see instructions in the sidebar)")
# Add session state variables if they don't exist
if 'show_button' not in st.session_state:
st.session_state['show_button'] = True
if 'processing' not in st.session_state:
st.session_state['processing'] = False
if 'data_processed' not in st.session_state:
st.session_state['data_processed'] = False
# Only show the button if show_button is True and file is uploaded and not processing
if uploaded_file is not None and st.session_state['show_button'] and not st.session_state['processing']:
if st.button("Start Analysis", key="start_analysis"):
st.session_state['show_button'] = False
st.session_state['processing'] = True
st.rerun()
# If we're processing, show the processing logic
if st.session_state['processing']:
try:
logger.info(f"File uploaded: {uploaded_file.name}")
if not st.session_state['data_processed']:
logger.info("Starting data processing")
try:
st.session_state['df'] = process_data(uploaded_file, sens_level)
logger.info("Data processing completed successfully")
st.session_state['data_processed'] = True
except ValueError as e:
# Handle specific validation errors
logger.error(f"Validation error: {str(e)}")
st.error(str(e))
st.session_state['show_button'] = True
st.session_state['processing'] = False
st.rerun()
except Exception as e:
# Handle other unexpected errors
logger.error(f"Error in process_data: {str(e)}")
st.error("An unexpected error occurred. Please check your input file and try again.")
st.session_state['show_button'] = True
st.session_state['processing'] = False
st.rerun()
df = st.session_state['df']
def reset_button_state():
st.session_state['show_button'] = True
st.session_state['processing'] = False
st.session_state['data_processed'] = False
# Create Excel buffer
excel_buffer = BytesIO()
df.to_excel(excel_buffer, index=False, engine='openpyxl')
excel_buffer.seek(0)
current_datetime = datetime.now().strftime('%d-%m-%Y_%H-%M-%S')
output_filename = f'processed_applications_{current_datetime}.xlsx'
st.download_button(
label="Download Analysis Data File",
data=excel_buffer,
file_name=output_filename,
mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
on_click=reset_button_state
)
except Exception as e:
logger.error(f"Error processing file: {str(e)}")
st.error("Failed to process the file. Please ensure your column names match the template file.")
st.session_state['show_button'] = True
st.session_state['processing'] = False
st.rerun()
# Comment out for testing
else:
username = st.text_input("Username")
password = st.text_input("Password", type="password")
if st.button("Login"):
if validate_login(username, password):
st.session_state['authenticated'] = True
st.rerun()
else:
st.error("Incorrect username or password")
main()